In [40]:
import os
import datetime as dt
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import netCDF4 as nc
import xarray as xr
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.feature_selection import RFE
from sklearn import metrics
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import RandomizedSearchCV
from eofs.xarray import Eof
import glob

# This is a local file, make sure you download it!
import utils

# make sure you have all packages
# pip install scikit-learn

def get_rmse(truth, pred):
    return np.sqrt(np.mean((truth - pred)**2))

In [28]:
####
# MUST DEFINE YOUR OWN PATH
# default path is "./dataset/"
####
data_path = "./dataset/"
utils.set_data_path(data_path)

In [36]:
train_files_input = glob.glob(data_path + "inputs*.nc")

# We are using SSP245 as test
if data_path + "inputs_ssp245.nc" in train_files_input: 
    train_files_input.remove(data_path + "inputs_ssp245.nc")

train_files_input

['./dataset/inputs_1pctCO2.nc',
 './dataset/inputs_abrupt-4xCO2.nc',
 './dataset/inputs_ssp126.nc',
 './dataset/inputs_ssp370-lowNTCF.nc',
 './dataset/inputs_hist-GHG.nc',
 './dataset/inputs_historical.nc',
 './dataset/inputs_hist-aer.nc',
 './dataset/inputs_ssp370.nc',
 './dataset/inputs_ssp585.nc']

In [4]:
X_historical = xr.open_dataset(data_path + "inputs_historical.nc")

In [5]:
test_sets = [
    "historical",
    "ssp585",
    "ssp126",
    "ssp370",
    "hist-aer",
    "hist-GHG"
]

In [6]:
test_hist_co2 = utils.normalize_co2(X_historical["CO2"].data)
test_hist_ch4 = utils.normalize_ch4(X_historical["CH4"].data)

In [7]:
X_train, train_eof = utils.create_predictor_data(test_sets)

In [9]:
Y_train = utils.create_predictdand_data(test_sets)

In [10]:
Y_train

In [11]:
y_train_tas = Y_train["tas"].mean(["lat","lon"]).data
y_train_pr = Y_train["pr"].mean(["lat","lon"]).data
y_train_pr90 = Y_train["pr90"].mean(["lat","lon"]).data
y_train_DTR = Y_train["diurnal_temperature_range"].mean(["lat","lon"]).data

In [12]:
rf = RandomForestRegressor(
    n_estimators= 100,
    random_state=  50,
    n_jobs = -1
)

In [13]:
rf.fit(X_train,y_train_tas)

0,1,2
,n_estimators,100
,criterion,'squared_error'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,1.0
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [17]:
X_test = utils.get_test_data("ssp245" ,train_eof)
Y_test = utils.create_predictdand_data("ssp245")

In [18]:
y_tas_pred = rf.predict(X_test)
len(y_tas_pred)

86

In [19]:
Y_test_tas = Y_test["tas"].mean(["lat","lon"]).data
Y_test_pr = Y_test["pr"].mean(["lat","lon"]).data
Y_test_pr90 = Y_test["pr90"].mean(["lat","lon"]).data
Y_test_DTR = Y_test["diurnal_temperature_range"].mean(["lat","lon"]).data

In [37]:
pr = rf.fit(X_train,y_train_pr)
y_pr_pred = pr.predict(X_test)
print(get_rmse(Y_test_pr,y_pr_pred))

0.015105975959860851


In [38]:
pr90 = rf.fit(X_train,y_train_pr90)
y_pr90_pred = pr.predict(X_test)
print(get_rmse(Y_test_pr90,y_pr90_pred))

0.0399722318323548


In [25]:
DTR = rf.fit(X_train,y_train_DTR)
y_DTR_pred = pr.predict(X_test)
print(get_rmse(Y_test_DTR,y_DTR_pred))

0.019705957995763305

In [39]:
print(get_rmse(Y_test_tas,y_tas_pred))

0.2900129960834046
