# PC Top Features

## Imports and Functions

In [1]:
import pandas as pd
import xarray as xr
import os

import sys
import pickle



In [2]:
import warnings

warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)

In [3]:
# Add the folder to the Python path

os.chdir("../")
# change working directory to project's root path
print(os.getcwd())

folder_path = os.path.abspath("functions/") #INPUT_PATH)#'path_to_your_folder')  # Replace with the actual folder path
sys.path.insert(0, folder_path)

c:\Users\marti\Desktop\data\hw_extra


In [4]:
from PredictorsDrivers import (
    PCAPredictors
)

# Preprocess Indices

First we are just going to use ONI, PDO and SAM

In [5]:
df_oni = pd.read_csv("data/indices/oni.txt",sep="   |  | ", header=None, engine="python", names=["Season", "Year", "sst", "ONI"])
df_pdo = pd.read_csv("data/indices/pdo.dat",sep="  | ", header=None, engine="python", names=["Year"]+[str(i) for i in range(1,13)])
df_sam = pd.read_csv("data/indices/sam.txt",sep="  | ", header=None, engine="python", names=["Year"]+[str(i) for i in range(1,13)])

season_to_month = {
    "DJF": "02", "JFM": "03", "FMA": "04", "MAM": "05",
    "AMJ": "06", "MJJ": "07", "JJA": "08", "JAS": "09",
    "ASO": "10", "SON": "11", "OND": "12", "NDJ": "01"
}
df_oni["Year"] = df_oni.apply(lambda row: row["Year"] + 1 if row["Season"] == "NDJ" else row["Year"], axis=1)
df_oni["month"] = df_oni["Season"].map(season_to_month)
df_oni["Date"] = pd.to_datetime(df_oni["Year"].astype(str) + "-" + df_oni["month"])

# Select required columns
df_oni = df_oni[["Date", "ONI"]]
df_oni.set_index("Date", inplace=True)

df_pdo = df_pdo.melt(id_vars=['Year'], var_name='Month', value_name='PDO')
df_pdo["Month"] = pd.to_numeric(df_pdo["Month"])
df_pdo = df_pdo.sort_values(['Year','Month'])

df_pdo['Date'] = pd.to_datetime(df_pdo[['Year', 'Month']].assign(DAY=1))
df_pdo.set_index("Date", inplace=True)
df_pdo.drop(columns=["Year", "Month"],inplace=True)

df_sam = df_sam.melt(id_vars=['Year'], var_name='Month', value_name='SAM')
df_sam["Month"] = pd.to_numeric(df_sam["Month"])
df_sam = df_sam.sort_values(['Year','Month'])

df_sam['Date'] = pd.to_datetime(df_sam[['Year', 'Month']].assign(DAY=1))
df_sam.set_index("Date", inplace=True)

df_sam.drop(columns=["Year", "Month"],inplace=True)

In [6]:
df_pdo = df_pdo[(df_pdo.index.year >= 1972) & (df_pdo.index.year <=2022)]
df_oni = df_oni[(df_oni.index.year >= 1972) & (df_oni.index.year <=2022)]
df_sam = df_sam[(df_sam.index.year >= 1972) & (df_sam.index.year <=2022)]


# Chile SST

In [7]:
region = "chile"

First we load the experiments to after incorporate the indices in every experiment

In [8]:
ds_6means = xr.load_dataset("data/local_data/7means_world.nc")
name_pcas = "pcas_1972.pkl"
with open(name_pcas, "rb") as inp:
    pcas = pickle.load(inp)
predictors = PCAPredictors(ds_6means, 3, saved_pcas=pcas,total_variables=["SP", "TTR", "U10", "V10", "Z", "SST"])

In [9]:
print(f"Total of different PCAS {len(predictors.df_predictors.columns)}")

Total of different PCAS 4536


In [10]:
predictors.load_experiments(f"data/new_features/{region}", f"data/new_features/{region}/metadata.csv")

Loaded 60 experiments


In [11]:
for exp in list(predictors.experiments.keys()):
    experiment, num = predictors.incorporate_predictor(exp, [df_pdo["PDO"], df_oni["ONI"], df_sam["SAM"]], ["PDO", "ONI", "SAM"])
    predictors.experiment_to_parquet(num, f"data/new_features/{region}", f"data/new_features/{region}/metadata.csv")

Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved


# Chile T2M

First we load the experiments to after incorporate the indices in every experiment

In [12]:
del predictors
del pcas
name_pcas = "pcas_t2m.pkl"
with open(name_pcas, "rb") as inp:
    pcas = pickle.load(inp)
predictors = PCAPredictors(ds_6means, 3, saved_pcas=pcas,total_variables=["SP", "TTR", "U10", "V10", "Z", "T2M"])

In [13]:
print(f"Total of different PCAS {len(predictors.df_predictors.columns)}")

Total of different PCAS 4536


In [14]:
predictors.load_experiments(f"data/new_features_t2m/{region}", f"data/new_features_t2m/{region}/metadata.csv")

Loaded 60 experiments


In [15]:
for exp in list(predictors.experiments.keys()):
    experiment, num = predictors.incorporate_predictor(exp, [df_pdo["PDO"], df_oni["ONI"], df_sam["SAM"]], ["PDO", "ONI", "SAM"])
    predictors.experiment_to_parquet(num, f"data/new_features_t2m/{region}", f"data/new_features_t2m/{region}/metadata.csv")

Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved


# California SST

In [16]:
region = "california"

First we load the experiments to after incorporate the indices in every experiment

In [17]:
del predictors
del pcas
name_pcas = "pcas_1972.pkl"
with open(name_pcas, "rb") as inp:
    pcas = pickle.load(inp)
predictors = PCAPredictors(ds_6means, 3, saved_pcas=pcas,total_variables=["SP", "TTR", "U10", "V10", "Z", "SST"])

In [18]:
print(f"Total of different PCAS {len(predictors.df_predictors.columns)}")

Total of different PCAS 4536


In [19]:
predictors.load_experiments(f"data/new_features/{region}", f"data/new_features/{region}/metadata.csv")

Loaded 60 experiments


In [20]:
for exp in list(predictors.experiments.keys()):
    experiment, num = predictors.incorporate_predictor(exp, [df_pdo["PDO"], df_oni["ONI"], df_sam["SAM"]], ["PDO", "ONI", "SAM"])
    predictors.experiment_to_parquet(num, f"data/new_features/{region}", f"data/new_features/{region}/metadata.csv")

Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved


# California T2M

First we load the experiments to after incorporate the indices in every experiment

In [21]:
del predictors
del pcas
name_pcas = "pcas_t2m.pkl"
with open(name_pcas, "rb") as inp:
    pcas = pickle.load(inp)
predictors = PCAPredictors(ds_6means, 3, saved_pcas=pcas,total_variables=["SP", "TTR", "U10", "V10", "Z", "T2M"])

In [22]:
print(f"Total of different PCAS {len(predictors.df_predictors.columns)}")

Total of different PCAS 4536


In [23]:
predictors.load_experiments(f"data/new_features_t2m/{region}", f"data/new_features_t2m/{region}/metadata.csv")

Loaded 60 experiments


In [None]:
for exp in list(predictors.experiments.keys()):
    experiment, num = predictors.incorporate_predictor(exp, [df_pdo["PDO"], df_oni["ONI"], df_sam["SAM"]], ["PDO", "ONI", "SAM"])
    predictors.experiment_to_parquet(num, f"data/new_features_t2m/{region}", f"data/new_features_t2m/{region}/metadata.csv")

Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved
Saved


: 