<a href="https://colab.research.google.com/github/lebe1/ClimateChangeAI/blob/main/MLP-model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import random
import pandas as pd
import os
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Step 1 - Modeling

In [5]:
# Import data
df_train = pd.read_csv('scaled_train.csv', sep=';')
df_test = pd.read_csv('scaled_test.csv', sep=';')

In [None]:
# Define x_train and y_train
x_train = df_train.drop(['prec'], axis=1)
y_train = df_train['prec']

# Remove first row of y_train
y_train = y_train[1:]

# Remove last row of x_train
x_train = x_train[:-1]

# Define x_test and y_test
x_test = df_test.drop(['prec'], axis=1)
y_test = df_test['prec']

# Remove first row of y_test
y_test = y_test[1:]

# Remove last row of x_test
x_test = x_test[:-1]

In [6]:
# Use MLPRegressor
model = MLPRegressor(max_iter=100, activation='relu', solver='adam', random_state=1, verbose=True)

# Train model
model.fit(df_train.drop('prec', axis=1), df_train['prec'])

# Predict
pred = model.predict(df_test.drop('prec', axis=1))

Iteration 1, loss = 8.68180935
Iteration 2, loss = 0.51576890
Iteration 3, loss = 0.49501891
Iteration 4, loss = 0.45386019
Iteration 5, loss = 0.42789489




In [None]:
# The root mean squared error (RMSE), parameter squared=False gives the RMSE
print("Mean squared error: %.2f"
      % mean_squared_error(df_test['prec'], pred, squared=False))

# Explained variance score: 1 is perfect prediction
print('Variance score: %.2f' % r2_score(df_test['prec'], pred))

# Plot predictions in time series plot compared to actual values
plt.figure(figsize=(20, 10))
plt.plot(df_test['prec'], label='Actual')
plt.plot(pred, label='Predicted')

# Only plot the first year
plt.xlim(0, 365)

plt.legend()

# Step 2 - Feature importance

In [None]:
from sklearn.inspection import permutation_importance


perm_importance = permutation_importance(model, df_test.drop('prec', axis=1), df_test['prec'], n_repeats=5, n_jobs=-1,random_state=42)

# Sort the indices of the permutation importance values
sorted_idx = perm_importance.importances_mean.argsort()

# Plot the permutation importance values
plt.figure(figsize=(20, 10))
plt.barh(df_test.drop('prec', axis=1).columns[sorted_idx], perm_importance.importances_mean[sorted_idx])
plt.xlabel("Permutation Importance")

# Add the column names to the plot
plt.yticks(ticks=range(len(df_test.drop('prec', axis=1).columns)), labels=df_test.drop('prec', axis=1).columns[sorted_idx])
plt.show()