# PC Yearly Automatization

## Imports and Functions

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import xarray as xr
import scipy as sp
import os
from itertools import combinations

import sys
import seaborn as sns
import re
import pickle


import cartopy.crs as ccrs
import cartopy.feature as cfeature



In [None]:
# Add the folder to the Python path

os.chdir("../")
# change working directory to project's root path
print(os.getcwd())

folder_path = os.path.abspath("functions/") #INPUT_PATH)#'path_to_your_folder')  # Replace with the actual folder path
sys.path.insert(0, folder_path)

In [None]:
from PredictorsDrivers import (
    PCAPredictors
)

In [None]:
ds_6means = xr.load_dataset("data/local_data/7means_world.nc")
num_modes = 3
generate_pcas = True

In [None]:
if generate_pcas:
    predictors = PCAPredictors(ds_6means, num_modes, frequency="yearly", total_variables=["SP", "TTR", "U10", "V10", "Z", "T2M"] )
    with open("pcas_t2m_yearly.pkl", "wb") as inp:
        pickle.dump(predictors.dict_predictors, inp, protocol=pickle.HIGHEST_PROTOCOL)
else:
    with open("pcas_1972_yearly.pkl", "rb") as inp:
        pcas = pickle.load(inp)
    predictors = PCAPredictors(ds_6means, num_modes, frequency="yearly", saved_pcas=pcas, total_variables=["SP", "TTR", "U10", "V10", "Z", "T2M"])

In [None]:
print(f"Total of different PCAS {len(predictors.df_predictors.columns)}")

## Chile

In [None]:
hwis_chile= pd.read_csv(f"data/local_data/yearly/HWs_chile_central.csv")
hwis_chile["Date"] = pd.to_datetime(hwis_chile["Date"],format='%Y')
hwis_chile.set_index('Date', inplace=True)
first_year = 1972
hwis_chile = hwis_chile[(hwis_chile.index.year <= 2022) & (hwis_chile.index.year >= first_year)]


In [None]:
top_n = [5, 10, 15, 20, 30]
var_thresh = [None, 0.05, 0.1, 0.15]
num_modes = [1, 2, 3]

for n_mod in num_modes:
    for thresh in var_thresh:
        for top in top_n:
            predictors.num_modes = n_mod
            predictors.df_predictors = predictors.set_df_predictors()
            top, n_exp = predictors.top_correlations_predictors(hwis_chile, threshold_variance=thresh, top_n=top)
            predictors.experiment_to_parquet(n_exp, "data/new_features_t2m/chile", "data/new_features_t2m/chile/metadata.csv")

## California

In [None]:
hwis_cali= pd.read_csv(f"data/local_data/yearly/HWs_california_NOAA.csv")
hwis_cali["Date"] = pd.to_datetime(hwis_cali["Date"],format='%Y')
hwis_cali.set_index('Date', inplace=True)
hwis_cali = hwis_cali[(hwis_cali.index.year <= 2022) & (hwis_cali.index.year >= first_year)]

In [None]:
for n_mod in num_modes:
    for thresh in var_thresh:
        for top in top_n:
            predictors.num_modes = n_mod
            predictors.df_predictors = predictors.set_df_predictors()
            top, n_exp = predictors.top_correlations_predictors(hwis_cali, threshold_variance=thresh, top_n=top)
            predictors.experiment_to_parquet(n_exp, "data/new_features_t2m/california", "data/new_features_t2m/california/metadata.csv")