In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from scipy.stats import linregress

SEED = 123

In [None]:
#from google.colab import drive
#drive.mount('/content/drive', force_remount=True)

!git clone https://github.com/marcozanchi97/tutorial_meaveas_nn.git

In [None]:
USER_PATH = 'tutorial_meaveas_nn/'
DATA_PATH = USER_PATH + 'data/'

In [None]:
#load data

df = pd.read_pickle(DATA_PATH + 'era5_data_2023_tutorial1.p')
df

In [None]:
#spit data into train, validation and test sets

# First split: Splitting into 60% and 40%
df_train, temp_df = train_test_split(df, test_size=0.4, random_state=SEED)

# Second split: Splitting the 40% into 37.5% and 62.5% which corresponds to 15% and 25% of the entire dataset
df_val, df_test = train_test_split(temp_df, test_size=5/8, random_state=SEED)

assert len(df_train)+len(df_val)+len(df_test) == len(df)

In [None]:
#normalize data according to mean and std of train set

columns_to_normalize = [
                        'dewpoint_temperature',
                        'temperature',
                        'evaporation',
                        'lai_hv','lai_lv',
                        'soil_temperature',
                        'surface_pressure',
                        'solar_radiation',
                        'cloud_cover',
                        'precipitation',
                        'wind_speed',
                        'wind_direction'
                       ]

# Initialize dictionaries to store the mean and std for each column
norm_means = {}
norm_stds = {}

# Normalize df_train and save the mean and std
for column in columns_to_normalize:
    norm_means[column] = df_train[column].mean()
    norm_stds[column] = df_train[column].std()
    df_train[column] = (df_train[column] - norm_means[column]) / norm_stds[column]

# Normalize df_val and df_test using the mean and std from df_train
for column in columns_to_normalize:
    df_val[column] = (df_val[column] - norm_means[column]) / norm_stds[column]
    df_test[column] = (df_test[column] - norm_means[column]) / norm_stds[column]

In [None]:
#create (input,output) examples

input_variables = ['dewpoint_temperature', 'evaporation', 'lai_hv','lai_lv', 'soil_temperature', 'surface_pressure', 'solar_radiation',
                   'cloud_cover', 'precipitation', 'wind_speed', 'wind_direction']
target_variable = ['temperature']

X_train = df_train[input_variables].to_numpy()
y_train = df_train[target_variable].to_numpy()

X_val = df_val[input_variables].to_numpy()
y_val = df_val[target_variable].to_numpy()

X_test = df_test[input_variables].to_numpy()
y_test = df_test[target_variable].to_numpy()

In [None]:
print(X_train.shape)
print(y_train.shape)

In [None]:
#build the neural network

input_model = layers.Input(shape=X_train[0].shape)

fc1 = layers.Dense(256, activation = 'leaky_relu')(input_model)
fc2 = layers.Dense(128, activation = 'leaky_relu')(fc1)
fc3 = layers.Dense(64, activation = 'leaky_relu')(fc2)
fc4 = layers.Dense(32, activation = 'leaky_relu')(fc3)

output = layers.Dense(1)(fc4)

model = Model(inputs = input_model, outputs = output)
model.compile(loss = 'mse', optimizer = tf.keras.optimizers.Adam(learning_rate = 0.001),metrics = ['mae'])

model.summary()

In [None]:
#train the neural network

history = model.fit(X_train, y_train,
                    validation_data = (X_val, y_val),
                    epochs= 100,
                    callbacks = tf.keras.callbacks.EarlyStopping(patience = 20,
                                                                 monitor = 'val_loss',
                                                                 restore_best_weights = True
                                                                )
                       )


In [None]:
#evaluate the neural network

model.evaluate(X_train,y_train)
model.evaluate(X_val,y_val)
model.evaluate(X_test,y_test)

In [None]:
#make predictions on the test set

preds = model.predict(X_test)

In [None]:
#plot some results

fig,ax = plt.subplots(1, figsize = (6,4))
ax.set_aspect(1)
x = y_test.reshape(-1)*norm_stds['temperature'] + norm_means['temperature']
y = preds.reshape(-1)*norm_stds['temperature'] + norm_means['temperature']
sns.regplot(x = x, y = y, ax=ax, truncate=False, color="#007aff")
ax.set_xlabel('Temperature')
ax.set_ylabel('Prediction')
R2 = linregress(x,y).rvalue ** 2
ax.text(0.1, 0.95, f"$R^2 = {R2:.2f}$", fontsize=14, transform=ax.transAxes, va="top", ha="left")
fig.tight_layout()