# Climate Prediction Results

In [1]:
import pandas as pd
import os
import sys
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from xgboost import XGBRegressor



In [2]:
# Add the folder to the Python path

os.chdir("../")
# change working directory to project's root path
print(os.getcwd())

folder_path = os.path.abspath("functions/") #INPUT_PATH)#'path_to_your_folder')  # Replace with the actual folder path
sys.path.insert(0, folder_path)

from Predictions import (
    get_info_experiment,
    summarize_best_results_by_index,
    plot_average_best_results,
    plot_best_results_per_season,
    PredictionExperiment
)

c:\Users\marti\Desktop\data\hw_extra



TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.
Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). 

For more information see: https://github.com/tensorflow/addons/issues/2807 



In [3]:
def train_single_label_exp(exp_id, season, label_interest, model_exp, name_model, region):
    labels_to_remove = ["HWN", "HWF", "HWD", "HWA", "HWM"]
    labels_to_remove.remove(label_interest)
    display(get_info_experiment(exp_id, metadata_exp_path=metadata_exp_path, metadata_index_path=my_indices_path, extra_indices_path=extra_indices_path))
    data = {season: pd.read_parquet(f"data/summer_features/{region}/predictor_{id}_{season}.parquet")}
    data[season].drop(columns=labels_to_remove, inplace=True)
    experiment_1 = PredictionExperiment(data, [label_interest], [model_exp], [name_model], 5, id)
    experiment_1.execute_experiment()
    experiment_1.get_metrics("r2", stage="TSCV", show=False)
    experiment_1.get_metrics("mape", stage="TSCV", show=False)
    display(experiment_1.results)
    return experiment_1

def display_full(df):
    with pd.option_context('display.max_rows', None, 'display.max_columns', None, 'display.float_format', '{:,.2f}'.format):  # more options can be specified also
        display(df)

In [4]:
my_indices_path = "data/my_indices/metadata.csv"
extra_indices_path = "data/extra_indices/metadata.csv"

## California

In [5]:
region = "california"
metadata_exp_path = f"data/summer_features/{region}/metadata.csv"
metadata = pd.read_csv(metadata_exp_path)
metadata.rename(columns={"id":"id_data"}, inplace=True)
metadata

Unnamed: 0,id_data,filename,season,indices


In [6]:
results = pd.read_csv(f"data/summer_results/{region}_results/results.csv")
results

Unnamed: 0,model,season,metric,stage,HWN,HWF,HWD,HWM,HWA,Average,id_data


In [7]:
# Example usage
summary_r2 = summarize_best_results_by_index(results, metadata, metric="r2", top_n=2, stage="prediction")
summary_mape = summarize_best_results_by_index(results, metadata, metric="mape", top_n=2, stage="prediction")
summary_mae = summarize_best_results_by_index(results, metadata, metric="mae", top_n=2, stage="prediction")
summary_cv_r2 = summarize_best_results_by_index(results, metadata, metric="r2", top_n=2, stage="CV")
summary_cv_mape = summarize_best_results_by_index(results, metadata, metric="mape", top_n=2, stage="CV")
summary_cv_mae = summarize_best_results_by_index(results, metadata, metric="mae", top_n=2, stage="CV")
summary_tscv_r2 = summarize_best_results_by_index(results, metadata, metric="r2", top_n=2, stage="TSCV")
summary_tscv_mape = summarize_best_results_by_index(results, metadata, metric="mape", top_n=2, stage="TSCV")
summary_tscv_mae = summarize_best_results_by_index(results, metadata, metric="mae", top_n=2, stage="TSCV")

In [8]:
summary_cv_mae

Unnamed: 0,model,index,best_value,id_data,filename,season,indices


## Chile

In [9]:
region = "chile"
metadata_exp_path = f"data/summer_features/{region}/metadata.csv"
metadata = pd.read_csv(metadata_exp_path)
metadata.rename(columns={"id":"id_data"}, inplace=True)
metadata

Unnamed: 0,id_data,filename,season,indices
0,6bfb94aa,predictor_6bfb94aa_1.parquet,1,54fdbdb3-2afba2c1-1d705895-6e909044-ef22ff7b-8...
1,d5ec141a,predictor_d5ec141a_1.parquet,1,PDO-ONI-SAM-DMI
2,3c59cc03,predictor_3c59cc03_1.parquet,1,c8260118-1e8ced6f-26976a3c-e0fba9b4-7e5d8e97-6...
3,cb34c388,predictor_cb34c388_1.parquet,1,c8260118-1e8ced6f-26976a3c-e0fba9b4-7e5d8e97-P...
4,4f14de2d,predictor_4f14de2d_1.parquet,1,c8260118-1e8ced6f-26976a3c-e0fba9b4-7e5d8e97-6...
5,891b838a,predictor_891b838a_1.parquet,1,c8260118-1e8ced6f-26976a3c-e0fba9b4-7e5d8e97-7...
6,5bcdae55,predictor_5bcdae55_1.parquet,1,c8260118-1e8ced6f-26976a3c-e0fba9b4-7e5d8e97-7...


In [10]:
results = pd.read_csv(f"data/summer_results/{region}_results/results.csv")
results

Unnamed: 0,model,season,metric,stage,HWN,HWF,HWD,HWM,HWA,Average,id_data
0,Linear,1,r2,prediction,-2.379569,-3.608049,-14.534629,-3.104584,-3.397765,-5.404919,6bfb94aa
1,RF5,1,r2,prediction,-4.483730,-3.379188,-4.334489,-1.154547,-1.173621,-2.905115,6bfb94aa
2,RF10,1,r2,prediction,-3.247421,-1.866671,-3.944252,-1.014608,-0.530522,-2.120695,6bfb94aa
3,SVR-rbf,1,r2,prediction,-2.257008,-3.090433,-3.414520,-0.940737,-0.568364,-2.054213,6bfb94aa
4,SVR-linear,1,r2,prediction,-1.779933,-2.433541,-5.236098,-2.019255,-0.999326,-2.493630,6bfb94aa
...,...,...,...,...,...,...,...,...,...,...,...
1591,LSTM16,1,mae,TSCV,0.914130,0.921879,0.786577,0.935042,0.913900,0.894306,5bcdae55
1592,SLSTM16,1,mae,TSCV,1.021220,1.022790,0.891200,1.027857,0.960046,0.984622,5bcdae55
1593,CNNRNN16,1,mae,TSCV,0.854211,0.898525,0.876345,0.901592,0.803092,0.866753,5bcdae55
1594,MLP16,1,mae,TSCV,0.763855,0.773634,0.781367,0.950876,0.832015,0.820350,5bcdae55


In [11]:
# Example usage
summary_r2 = summarize_best_results_by_index(results, metadata, metric="r2", top_n=2, stage="prediction")
summary_mape = summarize_best_results_by_index(results, metadata, metric="mape", top_n=2, stage="prediction")
summary_mae = summarize_best_results_by_index(results, metadata, metric="mae", top_n=2, stage="prediction")
summary_cv_r2 = summarize_best_results_by_index(results, metadata, metric="r2", top_n=2, stage="CV")
summary_cv_mape = summarize_best_results_by_index(results, metadata, metric="mape", top_n=2, stage="CV")
summary_cv_mae = summarize_best_results_by_index(results, metadata, metric="mae", top_n=2, stage="CV")
summary_tscv_r2 = summarize_best_results_by_index(results, metadata, metric="r2", top_n=2, stage="TSCV")
summary_tscv_mape = summarize_best_results_by_index(results, metadata, metric="mape", top_n=2, stage="TSCV")
summary_tscv_mae = summarize_best_results_by_index(results, metadata, metric="mae", top_n=2, stage="TSCV")

In [12]:
display_full(summary_r2)

Unnamed: 0,model,season,id_data,index,best_value,training_value,filename,indices
0,SVR-linear,1,5bcdae55,Average,0.16,0.81,predictor_5bcdae55_1.parquet,c8260118-1e8ced6f-26976a3c-e0fba9b4-7e5d8e97-7...
1,SVR-linear,1,3c59cc03,Average,0.16,0.81,predictor_3c59cc03_1.parquet,c8260118-1e8ced6f-26976a3c-e0fba9b4-7e5d8e97-6...
2,CNNRNN16,1,cb34c388,HWA,0.75,0.88,predictor_cb34c388_1.parquet,c8260118-1e8ced6f-26976a3c-e0fba9b4-7e5d8e97-P...
3,CNNRNN16,1,891b838a,HWA,0.65,0.85,predictor_891b838a_1.parquet,c8260118-1e8ced6f-26976a3c-e0fba9b4-7e5d8e97-7...
4,SVR-linear,1,891b838a,HWD,0.46,0.75,predictor_891b838a_1.parquet,c8260118-1e8ced6f-26976a3c-e0fba9b4-7e5d8e97-7...
5,SVR-linear,1,4f14de2d,HWD,0.46,0.75,predictor_4f14de2d_1.parquet,c8260118-1e8ced6f-26976a3c-e0fba9b4-7e5d8e97-6...
6,SVR-linear,1,5bcdae55,HWF,0.21,0.84,predictor_5bcdae55_1.parquet,c8260118-1e8ced6f-26976a3c-e0fba9b4-7e5d8e97-7...
7,SVR-linear,1,3c59cc03,HWF,0.21,0.84,predictor_3c59cc03_1.parquet,c8260118-1e8ced6f-26976a3c-e0fba9b4-7e5d8e97-6...
8,CNNRNN16,1,cb34c388,HWM,0.69,0.87,predictor_cb34c388_1.parquet,c8260118-1e8ced6f-26976a3c-e0fba9b4-7e5d8e97-P...
9,SVR-linear,1,4f14de2d,HWM,0.49,0.73,predictor_4f14de2d_1.parquet,c8260118-1e8ced6f-26976a3c-e0fba9b4-7e5d8e97-6...


In [13]:
display_full(summary_mae)

Unnamed: 0,model,season,id_data,index,best_value,training_value,filename,indices
0,RNN8,1,891b838a,Average,0.44,0.26,predictor_891b838a_1.parquet,c8260118-1e8ced6f-26976a3c-e0fba9b4-7e5d8e97-7...
1,CNNRNN16,1,3c59cc03,Average,0.48,0.17,predictor_3c59cc03_1.parquet,c8260118-1e8ced6f-26976a3c-e0fba9b4-7e5d8e97-6...
2,CNNRNN16,1,891b838a,HWA,0.4,0.25,predictor_891b838a_1.parquet,c8260118-1e8ced6f-26976a3c-e0fba9b4-7e5d8e97-7...
3,CNNRNN16,1,cb34c388,HWA,0.42,0.2,predictor_cb34c388_1.parquet,c8260118-1e8ced6f-26976a3c-e0fba9b4-7e5d8e97-P...
4,MLP16,1,891b838a,HWD,0.31,0.21,predictor_891b838a_1.parquet,c8260118-1e8ced6f-26976a3c-e0fba9b4-7e5d8e97-7...
5,CNNRNN16,1,3c59cc03,HWD,0.31,0.16,predictor_3c59cc03_1.parquet,c8260118-1e8ced6f-26976a3c-e0fba9b4-7e5d8e97-6...
6,RNN8,1,891b838a,HWF,0.4,0.19,predictor_891b838a_1.parquet,c8260118-1e8ced6f-26976a3c-e0fba9b4-7e5d8e97-7...
7,CNNRNN16,1,cb34c388,HWF,0.47,0.17,predictor_cb34c388_1.parquet,c8260118-1e8ced6f-26976a3c-e0fba9b4-7e5d8e97-P...
8,CNNRNN16,1,cb34c388,HWM,0.44,0.22,predictor_cb34c388_1.parquet,c8260118-1e8ced6f-26976a3c-e0fba9b4-7e5d8e97-P...
9,RNN8,1,891b838a,HWM,0.45,0.34,predictor_891b838a_1.parquet,c8260118-1e8ced6f-26976a3c-e0fba9b4-7e5d8e97-7...


In [14]:
display_full(summary_cv_mae)

Unnamed: 0,model,season,id_data,index,best_value,filename,indices
0,SVR-linear,1,4f14de2d,Average,0.59,predictor_4f14de2d_1.parquet,c8260118-1e8ced6f-26976a3c-e0fba9b4-7e5d8e97-6...
1,SVR-linear,1,891b838a,Average,0.59,predictor_891b838a_1.parquet,c8260118-1e8ced6f-26976a3c-e0fba9b4-7e5d8e97-7...
2,SVR-linear,1,891b838a,HWA,0.55,predictor_891b838a_1.parquet,c8260118-1e8ced6f-26976a3c-e0fba9b4-7e5d8e97-7...
3,SVR-linear,1,4f14de2d,HWA,0.55,predictor_4f14de2d_1.parquet,c8260118-1e8ced6f-26976a3c-e0fba9b4-7e5d8e97-6...
4,SVR-linear,1,891b838a,HWD,0.58,predictor_891b838a_1.parquet,c8260118-1e8ced6f-26976a3c-e0fba9b4-7e5d8e97-7...
5,SVR-linear,1,4f14de2d,HWD,0.58,predictor_4f14de2d_1.parquet,c8260118-1e8ced6f-26976a3c-e0fba9b4-7e5d8e97-6...
6,RNN16,1,891b838a,HWF,0.57,predictor_891b838a_1.parquet,c8260118-1e8ced6f-26976a3c-e0fba9b4-7e5d8e97-7...
7,RNN16,1,4f14de2d,HWF,0.58,predictor_4f14de2d_1.parquet,c8260118-1e8ced6f-26976a3c-e0fba9b4-7e5d8e97-6...
8,CNNRNN16,1,3c59cc03,HWM,0.64,predictor_3c59cc03_1.parquet,c8260118-1e8ced6f-26976a3c-e0fba9b4-7e5d8e97-6...
9,LSTM16,1,891b838a,HWM,0.65,predictor_891b838a_1.parquet,c8260118-1e8ced6f-26976a3c-e0fba9b4-7e5d8e97-7...


In [15]:
display_full(summary_tscv_mae)

Unnamed: 0,model,season,id_data,index,best_value,filename,indices
0,SVR-linear,1,891b838a,Average,0.77,predictor_891b838a_1.parquet,c8260118-1e8ced6f-26976a3c-e0fba9b4-7e5d8e97-7...
1,SVR-linear,1,4f14de2d,Average,0.77,predictor_4f14de2d_1.parquet,c8260118-1e8ced6f-26976a3c-e0fba9b4-7e5d8e97-6...
2,CNNRNN16,1,891b838a,HWA,0.62,predictor_891b838a_1.parquet,c8260118-1e8ced6f-26976a3c-e0fba9b4-7e5d8e97-7...
3,MLP16,1,891b838a,HWA,0.65,predictor_891b838a_1.parquet,c8260118-1e8ced6f-26976a3c-e0fba9b4-7e5d8e97-7...
4,CNNRNN16,1,891b838a,HWD,0.52,predictor_891b838a_1.parquet,c8260118-1e8ced6f-26976a3c-e0fba9b4-7e5d8e97-7...
5,MLP16,1,891b838a,HWD,0.59,predictor_891b838a_1.parquet,c8260118-1e8ced6f-26976a3c-e0fba9b4-7e5d8e97-7...
6,SVR-linear,1,4f14de2d,HWF,0.73,predictor_4f14de2d_1.parquet,c8260118-1e8ced6f-26976a3c-e0fba9b4-7e5d8e97-6...
7,SVR-linear,1,891b838a,HWF,0.73,predictor_891b838a_1.parquet,c8260118-1e8ced6f-26976a3c-e0fba9b4-7e5d8e97-7...
8,RNN8,1,891b838a,HWM,0.73,predictor_891b838a_1.parquet,c8260118-1e8ced6f-26976a3c-e0fba9b4-7e5d8e97-7...
9,MLP16,1,3c59cc03,HWM,0.76,predictor_3c59cc03_1.parquet,c8260118-1e8ced6f-26976a3c-e0fba9b4-7e5d8e97-6...
