In [None]:
import numpy as np
import matplotlib.pyplot as plt
import xarray as xr
import xesmf as xe
import pandas as pd
from flaml import AutoML
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_percentage_error, mean_absolute_error
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D, ZeroPadding2D
import os
import tensorflow as tf
from tensorflow.keras import datasets, layers, models, losses
from keras.callbacks import ModelCheckpoint, TensorBoard
from keras import backend
from sklearn.preprocessing import StandardScaler

In [None]:
def open_monthly_data():
    mace = open("monthly_activity_ace.csv","r")
    mns = open("monthly_activity_num_storms.csv","r")
    
    mace_contents = mace.readlines()
    mns_contents = mns.readlines()
    
    mace.close()
    mns.close()
    
    mace_dct = []
    mns_dct = []
    
    for i in range(9,len(mace_contents)):
        #key_mace = int(mace_contents[i].split()[0])
        #key_mns = int(mns_contents[i].split()[0])
        #monthly_mace = []
        #monthly_mns = []
        for j in range(1,13):
            mace_dct.append(float(mace_contents[i].split()[j]))
            mns_dct.append(float(mns_contents[i].split()[j]))
        #mace_dct[key_mace] = monthly_mace
        #mns_dct[key_mns] = monthly_mns
    
    return np.array(mace_dct), np.array(mns_dct)

In [None]:
def open_data():
    #Open the datasets
    factual = xr.open_mfdataset("factual/*.nc")
    cfl = xr.open_mfdataset("eth_cfl/*.nc", join='inner', compat='override')
    
    factual = factual.reduce(np.nansum, dim='expver',keep_attrs=True)
    cfl["lon"] = np.arange(-180,180,2.5)
    factual = factual.rename({"latitude":"lat","longitude":"lon"})
    cfl = cfl.sel(lat=slice(-60,60),lon=slice(-80,20))
    
    
    #Regrid the factual dataset to be the counterfactual's granularity
    ds_out = xr.Dataset(
        {
            "lat": np.array(cfl["lat"]),
            "lon": np.array(cfl["lon"]),
        }
    )
    regridder = xe.Regridder(factual, ds_out, "bilinear")
    factual = regridder(factual)
    
    factual = factual.isel(time=slice(0,732)) 
    
    pred_df = pd.read_csv("yearly_activity.csv")
    pred_df = pred_df.loc[pred_df['Year'] >= 1959]
    ace_raw = pred_df['Accumulated Cyclone Energy']
    ace = np.array(ace_raw)
    
    return factual,cfl,ace

In [None]:
factual,cfl,ace = open_data()
factual = factual.to_array().transpose("time","lat","lon","variable")
factual = factual.to_numpy()


In [None]:
m_factual_ace,m_factual_ns = open_monthly_data()
np.shape(factual)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(factual,m_factual_ace,test_size=0.4,shuffle=True, random_state=66)

In [None]:
indices = [i for i, x in enumerate(y_train) if x == 0]
np.shape(y_test)

In [None]:
for index in sorted(indices, reverse=True):
    y_train = np.delete(y_train,index)
    X_train = np.delete(X_train,index)

In [None]:
model = models.Sequential()
model.add(Conv2D(32, kernel_size=7, strides=2, activation='relu', padding='same',input_shape=(64,41,5)))
model.add(Conv2D(32, kernel_size=7, strides=1, activation='relu', padding='same'))
model.add(MaxPooling2D((2, 2), strides=(2, 2)))

#model.add(Dropout(0.5))

model.add(Conv2D(64, kernel_size=7, strides=1, activation='relu', padding='same'))
model.add(Conv2D(64, kernel_size=7, strides=1, activation='relu', padding='same'))
model.add(MaxPooling2D((2, 2), strides=(2, 2)))

model.add(Flatten())

#model.add(Dropout(0.5))

model.add(Dense(128, activation='relu', name="FC_1"))
model.add(Dense(64, activation='relu', name="FC_2"))
model.add(Dense(5, activation='softmax'))
model.add(Dense(1, activation='linear'))
model.summary()

'''
model = models.Sequential()
model.add(Dense(32, activation='relu', input_shape=(64,41,5,)))
model.add(Dense(16, activation='relu'))
model.add(Dense(10, activation='softmax'))
model.add(Dense(1, activation='linear'))
model.summary()
'''

In [None]:
def root_mean_squared_error(y_true, y_pred):
        return backend.sqrt(backend.mean(backd.square(y_pred - y_true)))

In [None]:
model.compile(
    optimizer='sgd', 
    loss=tf.keras.losses.MeanSquaredError(),
    metrics=[tf.keras.metrics.RootMeanSquaredError(), tf.keras.metrics.MeanAbsoluteError()])

In [None]:
history_notreg = model.fit(X_train, y_train, epochs=200,batch_size=32, 
                    validation_data=(X_test, y_test))

In [None]:
history.history

In [None]:
results = model.evaluate(X_test, y_test, batch_size=8)

In [None]:
results

In [None]:
print('CNN Model 4 - RMSE: ' + str(results[1]))
print('CNN Model 4 - MAE: ' + str(results[2]))