# California Season Prediction

## Imports and Functions

In [2]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score, mean_absolute_percentage_error
from sklearn.gaussian_process.kernels import RBF, WhiteKernel
from sklearn.gaussian_process import GaussianProcessRegressor


from sklearn.svm import SVR
import os
import matplotlib.pyplot as plt
import numpy as np
from xgboost import XGBRegressor
import sys

In [3]:
# Add the folder to the Python path

os.chdir("../../")
# change working directory to project's root path
print(os.getcwd())

FIRST_YEAR= 1972

c:\Users\marti\Desktop\data\hw_extra


In [4]:
folder_path = os.path.abspath("functions/") #INPUT_PATH)#'path_to_your_folder')  # Replace with the actual folder path
sys.path.insert(0, folder_path)

from Predictions import (
    PredictionExperiment,
    PredictionModel
)

In [5]:
data = {i: pd.read_csv(f"data/features/california/from{FIRST_YEAR}/HWs_cali_features_biseason_{i}.csv") for i in range(1,13)}

##   DJ-JF-FM

In [6]:
kernel = RBF(length_scale=1.0) + WhiteKernel(noise_level=1)
regressors = [LinearRegression(),RandomForestRegressor(random_state=42, n_estimators=5), RandomForestRegressor(random_state=42, n_estimators=10), SVR(kernel='rbf'),
                XGBRegressor(random_state=42, n_estimators=10, learning_rate=0.1),XGBRegressor(random_state=42, n_estimators=15, learning_rate=0.1), GaussianProcessRegressor(kernel=kernel, random_state=42, n_restarts_optimizer=10)]
name_regressors = ["Linear", "RF5", "RF10", "SVR-rbf", "XGB10", "XGB15", "GPR"]

In [7]:
data_summer = {i: data[i] for i in [1,2,3]}
indices_of_interest = ["HWN", "HWF", "HWD", "HWM", "HWA"]

In [8]:
experiment_1 = PredictionExperiment(data_summer, indices_of_interest, regressors, name_regressors, 5)

In [9]:
experiment_1.execute_experiment()

In [10]:
experiment_1.plot_metrics("r2", "prediction", thresh=0.5)

Unnamed: 0,Model,Season,HWN,HWF,HWD,HWM,HWA,Average
0,Linear,1,0.45,0.31,0.21,0.13,0.15,0.25
1,RF5,1,0.73,0.24,0.2,0.36,0.15,0.34
2,RF10,1,0.71,0.18,0.29,0.26,0.14,0.32
3,SVR-rbf,1,0.43,0.42,0.29,0.36,0.5,0.4
4,XGB10,1,0.5,0.28,0.1,0.01,0.03,0.18
5,XGB15,1,0.58,0.34,0.15,0.04,0.03,0.23
6,GPR,1,0.61,0.28,0.25,0.25,0.29,0.34
7,Linear,2,-0.16,-0.28,-0.34,-0.65,-0.57,-0.4
8,RF5,2,-0.12,-0.63,-0.48,-1.1,-0.96,-0.66
9,RF10,2,-1.04,-0.58,-0.53,-1.45,-1.35,-0.99


In [11]:
experiment_1.plot_metrics("r2", "training", thresh=0.9)

Unnamed: 0,Model,Season,HWN,HWF,HWD,HWM,HWA,Average
0,Linear,1,0.55,0.53,0.52,0.51,0.51,0.52
1,RF5,1,0.9,0.83,0.76,0.82,0.81,0.82
2,RF10,1,0.92,0.86,0.82,0.87,0.88,0.87
3,SVR-rbf,1,0.65,0.44,0.41,0.51,0.44,0.49
4,XGB10,1,0.78,0.73,0.72,0.77,0.76,0.75
5,XGB15,1,0.89,0.85,0.85,0.88,0.87,0.87
6,GPR,1,0.5,0.41,0.4,0.4,0.41,0.42
7,Linear,2,0.69,0.74,0.72,0.65,0.69,0.7
8,RF5,2,0.85,0.9,0.9,0.86,0.89,0.88
9,RF10,2,0.9,0.93,0.93,0.9,0.92,0.92


In [12]:
experiment_1.plot_metrics("mape", "prediction", thresh=0.3, above=False)

Unnamed: 0,Model,Season,HWN,HWF,HWD,HWM,HWA,Average
0,Linear,1,0.79,1.0,0.84,1.74,1.95,1.27
1,RF5,1,0.89,0.53,0.5,0.67,1.41,0.8
2,RF10,1,0.68,0.45,0.55,0.62,1.18,0.7
3,SVR-rbf,1,0.45,0.47,0.39,0.6,0.48,0.48
4,XGB10,1,0.45,0.69,0.69,1.19,1.11,0.83
5,XGB15,1,0.43,0.7,0.7,1.04,0.93,0.76
6,GPR,1,0.47,0.48,0.42,0.89,0.81,0.61
7,Linear,2,0.3,0.25,0.25,0.52,0.59,0.38
8,RF5,2,0.48,0.34,0.54,0.91,0.85,0.62
9,RF10,2,0.56,0.64,0.59,1.02,1.07,0.78


In [13]:
experiment_1.plot_metrics("cv_r2")

Unnamed: 0,Model,Season,HWN,HWF,HWD,HWM,HWA,Average
0,Linear,1,-0.43,-0.74,-0.97,0.13,0.29,-0.34
1,RF5,1,0.39,-0.85,0.29,-0.38,0.34,-0.04
2,RF10,1,0.44,-0.93,0.37,-0.29,0.34,-0.01
3,SVR-rbf,1,0.26,-0.49,-0.24,-0.14,0.13,-0.1
4,XGB10,1,0.4,-0.12,0.34,-0.19,0.27,0.14
5,XGB15,1,0.4,-0.5,0.36,-0.32,0.35,0.06
6,GPR,1,0.28,-0.16,0.29,-0.02,0.25,0.13
7,Linear,2,-0.03,0.16,-0.64,-0.04,0.21,-0.07
8,RF5,2,0.22,0.33,-0.08,-0.16,0.12,0.08
9,RF10,2,0.17,0.5,-0.15,-0.43,0.13,0.04


## Winter

In [14]:
kernel = RBF(length_scale=1.0) + WhiteKernel(noise_level=1)
regressors = [LinearRegression(),RandomForestRegressor(random_state=42, n_estimators=5), RandomForestRegressor(random_state=42, n_estimators=10), SVR(kernel='rbf'),
                XGBRegressor(random_state=42, n_estimators=10, learning_rate=0.1),XGBRegressor(random_state=42, n_estimators=15, learning_rate=0.1), GaussianProcessRegressor(kernel=kernel, random_state=42, n_restarts_optimizer=10)]
name_regressors = ["Linear", "RF5", "RF10", "SVR-rbf", "XGB10", "XGB15", "GPR"]

In [15]:
data_summer = {i: data[i] for i in [7,8,9]}
indices_of_interest = ["HWN", "HWF", "HWD", "HWM", "HWA"]

In [16]:
experiment_1 = PredictionExperiment(data_summer, indices_of_interest, regressors, name_regressors, 5)

In [17]:
experiment_1.execute_experiment()

In [18]:
experiment_1.plot_metrics("r2", "prediction", thresh=0.5)

Unnamed: 0,Model,Season,HWN,HWF,HWD,HWM,HWA,Average
0,Linear,7,0.43,0.32,0.41,0.75,0.72,0.53
1,RF5,7,0.47,0.7,0.73,0.23,0.7,0.57
2,RF10,7,0.43,0.65,0.64,0.1,0.56,0.48
3,SVR-rbf,7,0.61,0.57,0.63,0.3,0.56,0.53
4,XGB10,7,0.35,0.43,0.46,0.14,0.31,0.34
5,XGB15,7,0.44,0.44,0.53,0.02,0.44,0.37
6,GPR,7,0.43,0.41,0.4,0.56,0.59,0.48
7,Linear,8,0.1,0.2,-0.56,0.54,0.58,0.17
8,RF5,8,-0.76,0.07,-0.18,-0.3,-0.14,-0.26
9,RF10,8,-0.44,-0.03,-0.28,-0.39,-0.33,-0.29


In [19]:
experiment_1.plot_metrics("r2", "training", thresh=0.9)

Unnamed: 0,Model,Season,HWN,HWF,HWD,HWM,HWA,Average
0,Linear,7,0.52,0.47,0.43,0.5,0.5,0.48
1,RF5,7,0.87,0.86,0.8,0.82,0.8,0.83
2,RF10,7,0.88,0.89,0.88,0.87,0.87,0.88
3,SVR-rbf,7,0.75,0.61,0.56,0.66,0.57,0.63
4,XGB10,7,0.79,0.79,0.77,0.77,0.76,0.78
5,XGB15,7,0.9,0.89,0.88,0.88,0.88,0.89
6,GPR,7,0.46,0.42,0.39,0.41,0.4,0.42
7,Linear,8,0.61,0.64,0.64,0.53,0.54,0.59
8,RF5,8,0.79,0.83,0.8,0.8,0.78,0.8
9,RF10,8,0.86,0.86,0.87,0.83,0.82,0.85


In [22]:
experiment_1.plot_metrics("mape", "prediction", thresh=0.3, above=False)

Unnamed: 0,Model,Season,HWN,HWF,HWD,HWM,HWA,Average
0,Linear,7,1.73,1.82,1.34,1.16,1.08,1.42
1,RF5,7,1.68,1.11,0.88,1.56,0.9,1.23
2,RF10,7,1.85,1.15,0.89,2.07,0.99,1.39
3,SVR-rbf,7,1.06,1.04,0.93,1.45,0.91,1.08
4,XGB10,7,1.77,1.71,1.47,1.73,1.47,1.63
5,XGB15,7,1.67,1.56,1.28,1.87,1.18,1.51
6,GPR,7,1.73,1.82,1.55,1.59,1.29,1.59
7,Linear,8,0.46,0.37,0.36,0.22,0.24,0.33
8,RF5,8,0.49,0.22,0.14,0.31,0.3,0.29
9,RF10,8,0.44,0.21,0.16,0.29,0.32,0.28


In [21]:
experiment_1.plot_metrics("cv_r2")

Unnamed: 0,Model,Season,HWN,HWF,HWD,HWM,HWA,Average
0,Linear,7,-1.27,0.17,-0.35,0.32,0.39,-0.15
1,RF5,7,-0.55,0.19,0.15,0.39,0.6,0.16
2,RF10,7,-0.54,0.24,0.1,0.45,0.53,0.16
3,SVR-rbf,7,-0.88,0.44,0.02,0.43,0.57,0.12
4,XGB10,7,-0.7,0.12,-0.07,0.33,0.51,0.04
5,XGB15,7,-0.78,0.1,-0.06,0.35,0.53,0.03
6,GPR,7,-1.08,0.24,0.09,0.39,0.55,0.04
7,Linear,8,0.17,0.48,0.62,-0.17,0.12,0.25
8,RF5,8,-0.52,-0.14,0.31,-0.15,-0.08,-0.12
9,RF10,8,-0.23,-0.11,0.22,-0.06,-0.19,-0.07
