# Climate Prediction Results

In [1]:
import pandas as pd
import os
import sys
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from xgboost import XGBRegressor



In [2]:
# Add the folder to the Python path

os.chdir("../")
# change working directory to project's root path
print(os.getcwd())

folder_path = os.path.abspath("functions/") #INPUT_PATH)#'path_to_your_folder')  # Replace with the actual folder path
sys.path.insert(0, folder_path)

from Predictions import (
    get_info_experiment,
    summarize_best_results_by_index,
    plot_average_best_results,
    plot_best_results_per_season,
    PredictionExperiment
)

c:\Users\marti\Desktop\data\hw_extra



TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.
Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). 

For more information see: https://github.com/tensorflow/addons/issues/2807 



In [3]:
def train_single_label_exp(exp_id, season, label_interest, model_exp, name_model, region):
    labels_to_remove = ["HWN", "HWF", "HWD", "HWA", "HWM"]
    labels_to_remove.remove(label_interest)
    display(get_info_experiment(exp_id, metadata_exp_path=metadata_exp_path, metadata_index_path=my_indices_path, extra_indices_path=extra_indices_path))
    data = {season: pd.read_parquet(f"data/climate_features/{region}/predictor_{id}_{season}.parquet")}
    data[season].drop(columns=labels_to_remove, inplace=True)
    experiment_1 = PredictionExperiment(data, [label_interest], [model_exp], [name_model], 5, id)
    experiment_1.execute_experiment()
    experiment_1.get_metrics("r2", stage="TSCV", show=False)
    experiment_1.get_metrics("mape", stage="TSCV", show=False)
    display(experiment_1.results)
    return experiment_1

def display_full(df):
    with pd.option_context('display.max_rows', None, 'display.max_columns', None, 'display.float_format', '{:,.2f}'.format):  # more options can be specified also
        display(df)

In [4]:
my_indices_path = "data/my_indices/metadata.csv"
extra_indices_path = "data/extra_indices/metadata.csv"

## California

In [5]:
region = "california"
metadata_exp_path = f"data/climate_features/{region}/metadata.csv"
metadata = pd.read_csv(metadata_exp_path)
metadata.rename(columns={"id":"id_data"}, inplace=True)
metadata

Unnamed: 0,id_data,filename,season,indices
0,6e47cb06,predictor_6e47cb06_1.parquet,1,df9a31c5-20a07cea-cfb03125-9169e0dc-0b0bffae-b...
1,6e47cb06,predictor_6e47cb06_2.parquet,2,df9a31c5-20a07cea-cfb03125-9169e0dc-0b0bffae-b...
2,6e47cb06,predictor_6e47cb06_3.parquet,3,df9a31c5-20a07cea-cfb03125-9169e0dc-0b0bffae-b...
3,6e47cb06,predictor_6e47cb06_4.parquet,4,df9a31c5-20a07cea-cfb03125-9169e0dc-0b0bffae-b...
4,6e47cb06,predictor_6e47cb06_5.parquet,5,df9a31c5-20a07cea-cfb03125-9169e0dc-0b0bffae-b...
5,6e47cb06,predictor_6e47cb06_6.parquet,6,df9a31c5-20a07cea-cfb03125-9169e0dc-0b0bffae-b...
6,6e47cb06,predictor_6e47cb06_7.parquet,7,df9a31c5-20a07cea-cfb03125-9169e0dc-0b0bffae-b...
7,6e47cb06,predictor_6e47cb06_8.parquet,8,df9a31c5-20a07cea-cfb03125-9169e0dc-0b0bffae-b...
8,6e47cb06,predictor_6e47cb06_9.parquet,9,df9a31c5-20a07cea-cfb03125-9169e0dc-0b0bffae-b...
9,6e47cb06,predictor_6e47cb06_10.parquet,10,df9a31c5-20a07cea-cfb03125-9169e0dc-0b0bffae-b...


In [6]:
results = pd.read_csv(f"data/climate_results_NN/{region}_results/results.csv")
results

Unnamed: 0,model,season,metric,stage,HWN,HWF,HWD,HWM,HWA,Average,id_data
0,RNN16,1,r2,prediction,0.404952,-0.044195,-0.053677,0.055344,0.309246,0.134334,6e47cb06
1,LSTM16,1,r2,prediction,-0.198438,-0.222593,-0.296213,-0.385324,-0.259515,-0.272417,6e47cb06
2,GRU16,1,r2,prediction,-0.237761,-0.238086,-0.385357,-0.419552,-0.315618,-0.319275,6e47cb06
3,SRNN16,1,r2,prediction,0.087323,0.109975,-0.438949,0.100229,-0.598095,-0.147903,6e47cb06
4,SLSTM16,1,r2,prediction,-0.141627,-0.197139,-0.299687,-0.358860,-0.337403,-0.266943,6e47cb06
...,...,...,...,...,...,...,...,...,...,...,...
1339,SRNN16,12,mae,TSCV,0.502856,0.480650,0.514638,0.551198,0.524057,0.514679,5cb3fa02
1340,SLSTM16,12,mae,TSCV,0.465773,0.437876,0.471648,0.472722,0.448999,0.459404,5cb3fa02
1341,CNNRNN16,12,mae,TSCV,0.572167,0.465668,0.647416,0.447689,0.474824,0.521553,5cb3fa02
1342,CNNLSTM16,12,mae,TSCV,0.522929,0.534274,0.558769,0.565681,0.520581,0.540447,5cb3fa02


In [7]:
# Example usage
summary_r2 = summarize_best_results_by_index(results, metadata, metric="r2", top_n=2, stage="prediction")
summary_mape = summarize_best_results_by_index(results, metadata, metric="mape", top_n=2, stage="prediction")
summary_cv_r2 = summarize_best_results_by_index(results, metadata, metric="r2", top_n=2, stage="CV")
summary_cv_mape = summarize_best_results_by_index(results, metadata, metric="mape", top_n=2, stage="CV")
summary_tscv_r2 = summarize_best_results_by_index(results, metadata, metric="r2", top_n=2, stage="TSCV")
summary_tscv_mape = summarize_best_results_by_index(results, metadata, metric="mape", top_n=2, stage="TSCV")

In [8]:
summary_tscv_r2

Unnamed: 0,model,season,id_data,index,best_value,filename,indices
0,LSTM16,5,5cb3fa02,Average,0.292254,predictor_5cb3fa02_5.parquet,fde0e327-340e2882-f27c56aa-5b9237bf-46fa0cb8-6...
1,SLSTM16,5,5cb3fa02,Average,0.26223,predictor_5cb3fa02_5.parquet,fde0e327-340e2882-f27c56aa-5b9237bf-46fa0cb8-6...
2,SLSTM16,5,5cb3fa02,HWA,0.300827,predictor_5cb3fa02_5.parquet,fde0e327-340e2882-f27c56aa-5b9237bf-46fa0cb8-6...
3,RNN16,5,5cb3fa02,HWA,0.300642,predictor_5cb3fa02_5.parquet,fde0e327-340e2882-f27c56aa-5b9237bf-46fa0cb8-6...
4,LSTM16,5,5cb3fa02,HWD,0.379829,predictor_5cb3fa02_5.parquet,fde0e327-340e2882-f27c56aa-5b9237bf-46fa0cb8-6...
5,SRNN16,10,5cb3fa02,HWD,0.335776,predictor_5cb3fa02_10.parquet,fde0e327-340e2882-f27c56aa-5b9237bf-46fa0cb8-6...
6,LSTM16,5,5cb3fa02,HWF,0.365055,predictor_5cb3fa02_5.parquet,fde0e327-340e2882-f27c56aa-5b9237bf-46fa0cb8-6...
7,MLP16,10,5cb3fa02,HWF,0.296915,predictor_5cb3fa02_10.parquet,fde0e327-340e2882-f27c56aa-5b9237bf-46fa0cb8-6...
8,SLSTM16,5,5cb3fa02,HWM,0.304191,predictor_5cb3fa02_5.parquet,fde0e327-340e2882-f27c56aa-5b9237bf-46fa0cb8-6...
9,LSTM16,2,5cb3fa02,HWM,0.29841,predictor_5cb3fa02_2.parquet,fde0e327-340e2882-f27c56aa-5b9237bf-46fa0cb8-6...


In [9]:
cv_r2_seasons = {}
cv_mape_seasons = {}
r2_seasons = {}
mape_seasons = {}
tscv_r2_seasons = {}
tscv_mape_seasons = {}
for i in range(13):
    results_season = results[results["season"]==i]
    cv_r2_seasons[i] = summarize_best_results_by_index(results_season, metadata, metric="r2",stage="CV" , top_n=1)
    tscv_r2_seasons[i] = summarize_best_results_by_index(results_season, metadata, metric="r2",stage="TSCV" , top_n=1)
    r2_seasons[i] = summarize_best_results_by_index(results_season, metadata, metric="r2",stage="prediction", top_n=1)
    mape_seasons[i] = summarize_best_results_by_index(results_season, metadata, metric="mape",stage="prediction", top_n=1)
    cv_mape_seasons[i] = summarize_best_results_by_index(results_season, metadata, metric="mape",stage="CV", top_n=1)
    tscv_mape_seasons[i] = summarize_best_results_by_index(results_season, metadata, metric="mape",stage="TSCV", top_n=1)

cv_r2_seasons = pd.concat(list(cv_r2_seasons.values()))
cv_mape_seasons = pd.concat(list(cv_mape_seasons.values()))
tscv_r2_seasons = pd.concat(list(tscv_r2_seasons.values()))
tscv_mape_seasons = pd.concat(list(tscv_mape_seasons.values()))
r2_seasons = pd.concat(list(r2_seasons.values()))
mape_seasons = pd.concat(list(mape_seasons.values()))


In [10]:
display_full(tscv_r2_seasons)

Unnamed: 0,model,index,best_value,id_data,filename,season,indices
0,SLSTM16,Average,0.09,5cb3fa02,predictor_5cb3fa02_1.parquet,1,fde0e327-340e2882-f27c56aa-5b9237bf-46fa0cb8-6...
1,GRU16,HWA,0.15,5cb3fa02,predictor_5cb3fa02_1.parquet,1,fde0e327-340e2882-f27c56aa-5b9237bf-46fa0cb8-6...
2,RNN16,HWD,0.13,5cb3fa02,predictor_5cb3fa02_1.parquet,1,fde0e327-340e2882-f27c56aa-5b9237bf-46fa0cb8-6...
3,RNN16,HWF,0.24,5cb3fa02,predictor_5cb3fa02_1.parquet,1,fde0e327-340e2882-f27c56aa-5b9237bf-46fa0cb8-6...
4,SRNN16,HWM,0.12,5cb3fa02,predictor_5cb3fa02_1.parquet,1,fde0e327-340e2882-f27c56aa-5b9237bf-46fa0cb8-6...
5,CNNRNN16,HWN,0.3,5cb3fa02,predictor_5cb3fa02_1.parquet,1,fde0e327-340e2882-f27c56aa-5b9237bf-46fa0cb8-6...
0,LSTM16,Average,0.19,5cb3fa02,predictor_5cb3fa02_2.parquet,2,fde0e327-340e2882-f27c56aa-5b9237bf-46fa0cb8-6...
1,CNNRNN16,HWA,0.23,5cb3fa02,predictor_5cb3fa02_2.parquet,2,fde0e327-340e2882-f27c56aa-5b9237bf-46fa0cb8-6...
2,CNNRNN16,HWD,0.32,5cb3fa02,predictor_5cb3fa02_2.parquet,2,fde0e327-340e2882-f27c56aa-5b9237bf-46fa0cb8-6...
3,LSTM16,HWF,0.26,5cb3fa02,predictor_5cb3fa02_2.parquet,2,fde0e327-340e2882-f27c56aa-5b9237bf-46fa0cb8-6...


## Chile

In [12]:
region = "chile"
metadata_exp_path = f"data/climate_features/{region}/metadata.csv"
metadata = pd.read_csv(metadata_exp_path)
metadata.rename(columns={"id":"id_data"}, inplace=True)
metadata

Unnamed: 0,id_data,filename,season,indices
0,978f49d7,predictor_978f49d7_1.parquet,1,fde0e327-340e2882-43701738-e306f58b-e601b072-e...
1,978f49d7,predictor_978f49d7_2.parquet,2,fde0e327-340e2882-43701738-e306f58b-e601b072-e...
2,978f49d7,predictor_978f49d7_3.parquet,3,fde0e327-340e2882-43701738-e306f58b-e601b072-e...
3,978f49d7,predictor_978f49d7_4.parquet,4,fde0e327-340e2882-43701738-e306f58b-e601b072-e...
4,978f49d7,predictor_978f49d7_5.parquet,5,fde0e327-340e2882-43701738-e306f58b-e601b072-e...
...,...,...,...,...
247,458d357c,predictor_458d357c_8.parquet,8,32f131d2-69ffcfa8-4af95abb-4a86cb22-52eda853-3...
248,458d357c,predictor_458d357c_9.parquet,9,32f131d2-69ffcfa8-4af95abb-4a86cb22-52eda853-3...
249,458d357c,predictor_458d357c_10.parquet,10,32f131d2-69ffcfa8-4af95abb-4a86cb22-52eda853-3...
250,458d357c,predictor_458d357c_11.parquet,11,32f131d2-69ffcfa8-4af95abb-4a86cb22-52eda853-3...


In [13]:
results = pd.read_csv(f"data/climate_results_NN/{region}_results/results.csv")
results

Unnamed: 0,model,season,metric,stage,HWN,HWF,HWD,HWM,HWA,Average,id_data
0,RNN16,1,r2,prediction,-1.137981,-0.101467,-0.540862,0.004724,-1.245405,-0.604198,978f49d7
1,LSTM16,1,r2,prediction,-0.296615,-0.293727,-0.611602,-0.136574,0.003455,-0.267013,978f49d7
2,GRU16,1,r2,prediction,-0.451169,-0.875157,-0.520603,-0.549754,-0.113211,-0.501979,978f49d7
3,SRNN16,1,r2,prediction,-1.121952,-0.590102,0.306276,-0.561515,-1.116073,-0.616674,978f49d7
4,SLSTM16,1,r2,prediction,-0.134914,-0.415837,-0.338187,0.019838,-0.104378,-0.194695,978f49d7
...,...,...,...,...,...,...,...,...,...,...,...
14107,SRNN16,12,mae,TSCV,0.702118,0.575218,0.704677,0.457367,0.628156,0.613507,458d357c
14108,SLSTM16,12,mae,TSCV,0.732042,0.726959,0.763230,0.589607,0.621670,0.686702,458d357c
14109,CNNRNN16,12,mae,TSCV,0.553936,0.611771,0.560034,0.485108,0.473428,0.536855,458d357c
14110,CNNLSTM16,12,mae,TSCV,0.791487,0.808835,0.817128,0.598808,0.677477,0.738747,458d357c


In [15]:
# Example usage
summary_r2 = summarize_best_results_by_index(results, metadata, metric="r2", top_n=2, stage="prediction")
summary_mape = summarize_best_results_by_index(results, metadata, metric="mape", top_n=1, stage="prediction")
summary_cv_r2 = summarize_best_results_by_index(results, metadata, metric="r2", top_n=1, stage="CV")
summary_cv_mape = summarize_best_results_by_index(results, metadata, metric="mape", top_n=1, stage="CV")
summary_tscv_r2 = summarize_best_results_by_index(results, metadata, metric="r2", top_n=5, stage="TSCV")
summary_tscv_mape = summarize_best_results_by_index(results, metadata, metric="mape", top_n=3, stage="TSCV")
summary_tscv_mae = summarize_best_results_by_index(results, metadata, metric="mae", top_n=3, stage="TSCV")


In [16]:
display_full(summary_tscv_mae)

Unnamed: 0,model,season,id_data,index,best_value,filename,indices
0,GRU16,12,50a3f070,Average,0.49,predictor_50a3f070_12.parquet,fde0e327-340e2882-b91ccd4e-c6184040-c1c59e4d-8...
1,MLP16,12,511854f2,Average,0.51,predictor_511854f2_12.parquet,fde0e327-340e2882-b91ccd4e-c6184040-c1c59e4d-8...
2,CNNLSTM16,12,9bd58418,Average,0.51,predictor_9bd58418_12.parquet,fde0e327-340e2882-b91ccd4e-c6184040-c1c59e4d-8...
3,CNNRNN16,12,9bd58418,HWA,0.41,predictor_9bd58418_12.parquet,fde0e327-340e2882-b91ccd4e-c6184040-c1c59e4d-8...
4,RNN16,12,511854f2,HWA,0.42,predictor_511854f2_12.parquet,fde0e327-340e2882-b91ccd4e-c6184040-c1c59e4d-8...
5,GRU16,12,50a3f070,HWA,0.44,predictor_50a3f070_12.parquet,fde0e327-340e2882-b91ccd4e-c6184040-c1c59e4d-8...
6,CNNRNN16,6,511854f2,HWD,0.44,predictor_511854f2_6.parquet,fde0e327-340e2882-b91ccd4e-c6184040-c1c59e4d-8...
7,CNNRNN16,2,4d17ba1a,HWD,0.47,predictor_4d17ba1a_2.parquet,fde0e327-340e2882-b91ccd4e-c6184040-c1c59e4d-8...
8,CNNRNN16,7,511854f2,HWD,0.48,predictor_511854f2_7.parquet,fde0e327-340e2882-b91ccd4e-c6184040-c1c59e4d-8...
9,SRNN16,6,3832cbd6,HWF,0.45,predictor_3832cbd6_6.parquet,fde0e327-340e2882-13e2f761-aa75d48e-2748fd3a-f...


In [17]:
cv_r2_seasons = {}
cv_mape_seasons = {}
r2_seasons = {}
mape_seasons = {}
tscv_r2_seasons = {}
tscv_mape_seasons = {}
tscv_mae_seasons = {}

for i in range(13):
    results_season = results[results["season"]==i]
    cv_r2_seasons[i] = summarize_best_results_by_index(results_season, metadata, metric="r2",stage="CV" , top_n=1)
    tscv_r2_seasons[i] = summarize_best_results_by_index(results_season, metadata, metric="r2",stage="TSCV" , top_n=1)
    r2_seasons[i] = summarize_best_results_by_index(results_season, metadata, metric="r2",stage="prediction", top_n=1)
    mape_seasons[i] = summarize_best_results_by_index(results_season, metadata, metric="mape",stage="prediction", top_n=1)
    cv_mape_seasons[i] = summarize_best_results_by_index(results_season, metadata, metric="mape",stage="CV", top_n=1)
    tscv_mape_seasons[i] = summarize_best_results_by_index(results_season, metadata, metric="mape",stage="TSCV", top_n=1)
    tscv_mae_seasons[i] = summarize_best_results_by_index(results_season, metadata, metric="mape",stage="TSCV", top_n=1)

cv_r2_seasons = pd.concat(list(cv_r2_seasons.values()))
cv_mape_seasons = pd.concat(list(cv_mape_seasons.values()))
tscv_r2_seasons = pd.concat(list(tscv_r2_seasons.values()))
tscv_mape_seasons = pd.concat(list(tscv_mape_seasons.values()))
tscv_mae_seasons = pd.concat(list(tscv_mae_seasons.values()))

r2_seasons = pd.concat(list(r2_seasons.values()))
mape_seasons = pd.concat(list(mape_seasons.values()))


In [18]:
df = tscv_r2_seasons
display_full(df[df["best_value"] > 0])
# display_full(df)

Unnamed: 0,model,index,best_value,id_data,filename,season,indices
4,RNN16,HWM,0.05,69ae08a8,predictor_69ae08a8_1.parquet,1,fde0e327-340e2882-b91ccd4e-c6184040-c1c59e4d-8...
1,SRNN16,HWA,0.12,3832cbd6,predictor_3832cbd6_2.parquet,2,fde0e327-340e2882-13e2f761-aa75d48e-2748fd3a-f...
2,CNNRNN16,HWD,0.12,4d17ba1a,predictor_4d17ba1a_2.parquet,2,fde0e327-340e2882-b91ccd4e-c6184040-c1c59e4d-8...
4,MLP16,HWM,0.09,d7101242,predictor_d7101242_2.parquet,2,fde0e327-340e2882-13e2f761-aa75d48e-2748fd3a-f...
0,SRNN16,Average,0.16,3832cbd6,predictor_3832cbd6_3.parquet,3,fde0e327-340e2882-13e2f761-aa75d48e-2748fd3a-f...
1,CNNRNN16,HWA,0.3,d7101242,predictor_d7101242_3.parquet,3,fde0e327-340e2882-13e2f761-aa75d48e-2748fd3a-f...
2,SRNN16,HWD,0.39,d7101242,predictor_d7101242_3.parquet,3,fde0e327-340e2882-13e2f761-aa75d48e-2748fd3a-f...
3,SRNN16,HWF,0.25,3df87a13,predictor_3df87a13_3.parquet,3,fde0e327-340e2882-880b8b63-aa75d48e-88249a81-f...
4,SRNN16,HWM,0.32,3832cbd6,predictor_3832cbd6_3.parquet,3,fde0e327-340e2882-13e2f761-aa75d48e-2748fd3a-f...
5,RNN16,HWN,0.18,3832cbd6,predictor_3832cbd6_3.parquet,3,fde0e327-340e2882-13e2f761-aa75d48e-2748fd3a-f...


In [19]:
df = tscv_mae_seasons
display_full(df[["model", "index", "best_value", "id_data", "season"]])
# display_full(df)

Unnamed: 0,model,index,best_value,id_data,season
0,LSTM16,Average,1.08,458d357c,1
1,SLSTM16,HWA,0.73,9bd58418,1
2,CNNRNN16,HWD,1.02,1b939ac5,1
3,LSTM16,HWF,0.75,311dd366,1
4,LSTM16,HWM,0.83,69ae08a8,1
5,RNN16,HWN,0.76,9f8163e4,1
0,SLSTM16,Average,0.96,8c95fd00,2
1,LSTM16,HWA,0.84,b33fc639,2
2,CNNRNN16,HWD,0.67,4d17ba1a,2
3,LSTM16,HWF,0.9,4d17ba1a,2
