In [None]:
##Import required packages
import pandas as pd
import numpy as np
from sklearn.utils import shuffle
import jax.numpy as jnp
import jaxlib
from jax import jit
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.metrics import make_scorer, mean_squared_error
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import HistGradientBoostingRegressor
import matplotlib.pyplot as plt

# **Pre-Processing**

In [None]:
merged_data = pd.read_csv('fully_merged_2020.csv')

In [None]:
merged_data = merged_data.dropna()

In [None]:
print(merged_data.columns.tolist())

['Unnamed: 0', 'date', 'temp', 'pressure', 'salinity', 'lat', 'lon', 'ice_conc', 'height', 'windspeed', 'precipitation']


**Step 1 - set up arrays**

*   Split dataframe into 2 data frames, one for features(temp, pressure, lat, lon, ice_conc, height & windspeed) and for label (salinity).
*   Convert to arrays for JAX



In [None]:
features = merged_data.drop(['salinity', 'date'], axis=1)
labels = merged_data['salinity']
feature_array = features.to_numpy()
label_array = labels.to_numpy()
X = feature_array
Y = label_array

**Step 2 - get arrays ready for ML models**

* scale so that zero mean and unit variance (i.e. standardised distribution)
* split into train, validation, and test data (0.5 train, 0.25 validation, 0.25 test)

In [None]:
scaler = StandardScaler()
x_scaled = scaler.fit_transform(X)

In [None]:
x_train, x_temp, y_train, y_temp = train_test_split(x_scaled, Y, test_size=0.5, random_state=42)
x_val, x_test, y_val, y_test = train_test_split(x_temp, y_temp, test_size=0.5, random_state=42)

**Step 3 - Make new dataframes with one randomised feature each for feature importance testing**

In [None]:
##Shuffled windspeed column
x_train_windspeed = np.copy(x_train)
shuffled_windspeed = shuffle(x_train_windspeed[:, 7], random_state=42)
x_train_windspeed[:, 7] = shuffled_windspeed


In [None]:
##Shuffled precip column
x_train_precip = np.copy(x_train)
shuffled_precip = shuffle(x_train_precip[:, 8], random_state=42)
x_train_precip[:, 8] = shuffled_precip


In [None]:
##Shuffled ice_conc column
x_train_ice_conc = np.copy(x_train)
shuffled_ice_conc = shuffle(x_train_ice_conc[:, 5], random_state=42)
x_train_ice_conc[:, 5] = shuffled_ice_conc



In [None]:
##Shuffled height column
x_train_height = np.copy(x_train)
shuffled_height = shuffle(x_train_height[:, 6], random_state=42)
x_train_height[:, 6] = shuffled_height



In [None]:
##Shuffled lat column
x_train_lat = np.copy(x_train)
shuffled_lat = shuffle(x_train_lat[:, 3], random_state=42)
x_train_lat[:, 3] = shuffled_lat



In [None]:
##Shuffled lon column
x_train_lon = np.copy(x_train)
shuffled_lon = shuffle(x_train_lon[:, 4], random_state=42)
x_train_lon[:, 4] = shuffled_lon



In [None]:
##Shuffled pressure column
x_train_pressure = np.copy(x_train)
shuffled_pressure = shuffle(x_train_pressure[:, 2], random_state=42)
x_train_pressure[:, 2] = shuffled_pressure


In [None]:
##Shuffled temp column
x_train_temp = np.copy(x_train)
shuffled_temp = shuffle(x_train_temp[:, 1], random_state=42)
x_train_temp[:, 1] = shuffled_temp

# **Model 1 - KNN**

In [None]:
##Define the necessary functions

@jit
def euclidean_distance(x1, x2):
  return jnp.sqrt(jnp.sum((x1-x2)**2))

def rmse(y_true, y_pred):
    return jnp.sqrt(jnp.mean((y_true - y_pred) ** 2))

def knn_predict(x_train, y_train, x_test, k=3):
  # Calculate distances from each x_test to all x_train
  distances = jnp.array([[euclidean_distance(x_test_i, x_train_i) for x_train_i in x_train] for x_test_i in x_test])

  # Find the indices of the k nearest neighbors
  k_nearest_indices = jnp.argpartition(distances, kth=k, axis=1)[:, :k]

  # Predict by averaging the labels of the k nearest neighbors
  predictions = jnp.array([jnp.mean(y_train[k_indices]) for k_indices in k_nearest_indices])
  return predictions

def batched_knn_predict(x_train, y_train, x_test, k=3, batch_size=32):
  predictions = []

  for i in range(0, len(x_test), batch_size):
    batch_x_test = x_test[i:i + batch_size]
    batch_distances = jnp.array([[euclidean_distance(x_test_i, x_train_i) for x_train_i in x_train] for x_test_i in batch_x_test])
    k_nearest_indices = jnp.argpartition(batch_distances, kth=k, axis=1)[:, :k]
    batch_predictions = jnp.array([jnp.mean(y_train[k_indices]) for k_indices in k_nearest_indices])
    predictions.append(batch_predictions)
    return jnp.concatenate(predictions, axis=0)

In [None]:
##Need to do hyperparamter tuning to find the best value for k
#involves running the model on validation data, iterating over a range of ks to find the one with lowest mse and then using this k on test data

k_values = range(1,5)
rmse_values = []

for k in k_values:
  y_pred = batched_knn_predict(x_train, y_train, x_val, k, batch_size=32)
  error = rmse(y_val, y_pred)
  rmse_values.append(error)

best_k_index = jnp.argmin(rmse_values)
best_k = k_values[best_k_index]
knn_val_rmse = rmse_values[best_k_index]
print(f"Best Value for k: {best_k} with lowest RMSE: {knn_val_rmse}")

In [None]:
y_pred_val = batched_knn_predict(x_train, y_train, x_val, k=3, batch_size=32)
knn_val_rmse = rmse(y_val, y_pred_val)

In [None]:
##Run model on test data
predictions_test = knn_predict(x_train, y_train, x_test, k=best_k)
knn_test_rmse = rmse(y_test, predictions_test)

#print RMSE
print(f"KNN Validation RMSE: {knn_val_rmse}")
print(f"KNN Test RMSE: {knn_test_rmse}")

# **Model 2 - SVR**

In [None]:
##Define functions for kernel trick & computing kernel matrix K
@jit
#Define Matern 3/2 Kernel
def matern32_kernel(X1, X2, sigma, rho):
  #d is Euclidean Distance
  d = jnp.sqrt(jnp.sum((X1[:, None, :] - X2[None, :, :]) ** 2, axis=2))
  matern = (1 + jnp.sqrt(3) * d / rho) * jnp.exp(-jnp.sqrt(3) * d / rho)
  return sigma ** 2 * matern

#Define K - make K matrix of K(xi, xj) -> to make k matrix
def K(X, sigma, rho):
  return matern32_kernel(X, X, sigma, rho)

In [None]:
##Set up the optimisation problem (using JAX and convert to CVXOPT matrixes)
#Parameters
C = 1.0
sigma = 1.0
rho = 1.0

#Compute kernel matrix k
K = K(x_train, sigma, rho)

#Form the matrixes required for dual problem
n = x_train.shape[0]
p = jnp.outer(y_train, y_train) * K
q = jnp.ones(n)

#Converts from JAX array to numpy array to CVXOPT matrix
p = matrix(p.numpy())
q = matrix(q.numpy())

#Introduce inequality constraints
G = matrix(np.vstack((-np.eye(n), np.eye(n))))
h = matrix(np.hstack((np.zeros(n), np.ones(n) * C)))

#Introduce equality constraints
A = matrix(y.reshape(1, -1), tc='d')
b = matrix(0.0)

In [None]:
##Solve quadratic programming problem -> \min_{x} \frac{1}{2} x^T P x + q^T x
solution = solvers.qp(P, q, G, h, A, b)
alphas = np.array(solution['x']).flatten()


In [None]:
##Build Model
#convert to jax arrays
x_train_jax = jnp.array(x_train)
y_train_jax = jnp.array(y_train)
alphas_jax = jnp.array(alphas)

#Identify support vectors (i.e. values that had non-zero alpha)
threshold = 1e-5
support_vector_indices = jnp.where(alphas_jax > threshold)[0]
x_sv = x_train_jax[support_vector_indices]
y_sv = y_train_jax[support_vector_indices]
alphas_sv = alphas_jax[support_vector_indices]

#Compute bias term
def compute_bias(x_sv, y_sv, alphas_sv, kernel_func, sigma, rho):
    # Compute the kernel product for support vectors
    K_sv = kernel_func(x_sv, x_sv, sigma, rho)
    # Compute the sum for each support vector
    sums = jnp.sum(alphas_sv * y_sv * K_sv, axis=1)
    # Compute b as the average difference between target and sum
    b = jnp.mean(y_sv - sums)
    return b

b = compute_bias(X_sv, y_sv, alphas_sv, matern32_kernel, sigma, rho)

#Define prediction function
def predict(x_new, x_sv, y_sv, alphas_sv, b, kernel_func, sigma, rho):
    # Compute the kernel between new data points and support vectors
    k_new_sv = kernel_func(x_new, x_sv, sigma, rho)
    # Compute the decision function
    decision_values = jnp.dot((alphas_sv * y_sv), k_new_sv.T) + b
    # Return the decision values for SVR or the sign for SVM classification
    return decision_values

In [None]:
##Make predictions for validation & test data
val_predictions = predict(x_val, x_sv, y_sv, alphas_sv, b, matern32_kernel, sigma, rho)
svr_val_rmse = rmse(y_val, val_predictions)

test_predictions = predict(x_test, x_sv, y_sv, alphas_sv, b, matern32_kernel, sigma, rho)
svr_test_rmse = rmse(y_test, test_predictions)

print(f"SVR Validation RMSE: {svr_val_rmse}")
print(f"SVR Test RMSE: {svr_test_rmse}")

In [None]:
svr = SVR(kernel='linear')

In [None]:
sample_size = int(len(x_train) * 0.5)  # Adjust the 0.5 as necessary for the fraction you want

# Generate random indices
indices = np.random.choice(len(x_train), size=sample_size, replace=False)

# Subset the data
x_train_sub = x_train[indices]
y_train_sub = y_train[indices]

In [None]:
y_train_sub = y_train_sub.ravel()  # Ensuring y_train is a 1D array
param_distributions = {
    'C': [0.1, 1, 10],
    'epsilon': [0.01, 0.1, 1]
}
random_search = RandomizedSearchCV(estimator = svr, param_distributions=param_distributions, cv=5, scoring='neg_mean_squared_error', verbose=1, n_jobs=-1, random_state=42)
random_search.fit(x_train_sub, y_train_sub)

print("Best parameters:", random_search.best_params_)
best_params = random_search.best_params_

Fitting 5 folds for each of 9 candidates, totalling 45 fits


# **Model 3 - RF**

In [None]:
##Set up RFRegressor model from SKLearn
rf_regressor = RandomForestRegressor(random_state=42)

In [None]:
##Hyperparameter tuning using GridSearch to find the best no. of trees in the forest, max depth of tree, and min no of samples required to split internal node

#Set up the parameter grid for hyperparameters to be tune
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10]
}

#Set up the grid search
grid_search = GridSearchCV(estimator=rf_regressor, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error', verbose=1, n_jobs=-1)

#Run the grid search
grid_search.fit(x_train, y_train)

#Find the best parameters
best_n_estimators = grid_search.best_params_['n_estimators']
best_max_depth = grid_search.best_params_['max_depth']
best_min_samples_split = grid_search.best_params_['min_samples_split']

print(f"Best No. of Trees: {best_n_estimators}")
print(f"Max depth of tree: {best_max_depth}")
print(f"Min no of samples required to split internal node: {best_min_samples_split}")

Fitting 5 folds for each of 27 candidates, totalling 135 fits


  pid = os.fork()


Best No. of Trees: 100
Max depth of tree: 10
Min no of samples required to split internal node: 5


In [None]:
##Run model on validation data and test data
#Set up regressor with parameters found in tuning
rf_regressor = RandomForestRegressor(n_estimators=best_n_estimators,
                                     max_depth=best_max_depth,
                                     min_samples_split=best_min_samples_split,
                                     random_state=42,
                                     verbose=1)
#Fit model to training data
rf_regressor.fit(x_train, y_train)
#Predict on validation data
y_val_pred = rf_regressor.predict(x_val)
#Predict on test data
y_test_pred = rf_regressor.predict(x_test)

#Validation & test RMSE (NOTE: uses rmse function defined in KNN section)
rf_val_rmse = rmse(y_val, y_val_pred)
rf_test_rmse = rmse(y_test, y_test_pred)

print(f"Random Forest Validation RMSE: {rf_val_rmse}")
print(f"Random Forest Test RMSE: {rf_test_rmse}")

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   29.5s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.3s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.3s


Random Forest Validation RMSE: 2.9265762350405566e-05
Random Forest Test RMSE: 9.943395298250834e-07


**Feature Testing**

In [None]:
##Test temp feature
rf_regressor = RandomForestRegressor(n_estimators=best_n_estimators,
                                     max_depth=best_max_depth,
                                     min_samples_split=best_min_samples_split,
                                     random_state=42,
                                     verbose=1)
#Fit model to training data
rf_regressor.fit(x_train_temp, y_train)
#Predict on validation data
y_val_pred_temp = rf_regressor.predict(x_val)
#Predict on test data
y_test_pred_temp = rf_regressor.predict(x_test)

#Validation & test RMSE (NOTE: uses rmse function defined in KNN section)
rf_val_rmse_temp = rmse(y_val, y_val_pred_temp)
rf_test_rmse_temp = rmse(y_test, y_test_pred_temp)

print(f"Random Forest with Randomised Temperature Validation RMSE: {rf_val_rmse_temp}")
print(f"Random Forest with Randomised Temperature Test RMSE: {rf_test_rmse_temp}")

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   37.9s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.4s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.4s


Random Forest with Randomised Temperature Validation RMSE: 3.706616917042993e-05
Random Forest with Randomised Temperature Test RMSE: 2.2426850136980647e-06


In [None]:
##Test pressure feature
rf_regressor = RandomForestRegressor(n_estimators=best_n_estimators,
                                     max_depth=best_max_depth,
                                     min_samples_split=best_min_samples_split,
                                     random_state=42,
                                     verbose=1)
#Fit model to training data
rf_regressor.fit(x_train_pressure, y_train)
#Predict on validation data
y_val_pred_pressure = rf_regressor.predict(x_val)
#Predict on test data
y_test_pred_pressure = rf_regressor.predict(x_test)

#Validation & test RMSE (NOTE: uses rmse function defined in KNN section)
rf_val_rmse_pressure = rmse(y_val, y_val_pred_pressure)
rf_test_rmse_pressure = rmse(y_test, y_test_pred_pressure)

print(f"Random Forest with Randomised Pressure Validation RMSE: {rf_val_rmse_pressure}")
print(f"Random Forest with Randomised Pressure Test RMSE: {rf_test_rmse_pressure}")

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   38.2s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.3s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.3s


Random Forest with Randomised Pressure Validation RMSE: 4.504089883994311e-05
Random Forest with Randomised Pressure Test RMSE: 2.4337655304407235e-06


In [None]:
##Test lat feature
rf_regressor = RandomForestRegressor(n_estimators=best_n_estimators,
                                     max_depth=best_max_depth,
                                     min_samples_split=best_min_samples_split,
                                     random_state=42,
                                     verbose=1)
#Fit model to training data
rf_regressor.fit(x_train_lat, y_train)
#Predict on validation data
y_val_pred_lat = rf_regressor.predict(x_val)
#Predict on test data
y_test_pred_lat = rf_regressor.predict(x_test)

#Validation & test RMSE (NOTE: uses rmse function defined in KNN section)
rf_val_rmse_lat = rmse(y_val, y_val_pred_lat)
rf_test_rmse_lat = rmse(y_test, y_test_pred_lat)

print(f"Random Forest with Randomised Lat Validation RMSE: {rf_val_rmse_lat}")
print(f"Random Forest with Randomised Lat Test RMSE: {rf_test_rmse_lat}")

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   33.6s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.3s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.3s


Random Forest with Randomised Lat Validation RMSE: 2.5297778847743757e-05
Random Forest with Randomised Lat Test RMSE: 1.286071096728847e-06


In [None]:
##Test lon feature
rf_regressor = RandomForestRegressor(n_estimators=best_n_estimators,
                                     max_depth=best_max_depth,
                                     min_samples_split=best_min_samples_split,
                                     random_state=42,
                                     verbose=1)
#Fit model to training data
rf_regressor.fit(x_train_lon, y_train)
#Predict on validation data
y_val_pred_lon = rf_regressor.predict(x_val)
#Predict on test data
y_test_pred_lon = rf_regressor.predict(x_test)

#Validation & test RMSE (NOTE: uses rmse function defined in KNN section)
rf_val_rmse_lon = rmse(y_val, y_val_pred_lon)
rf_test_rmse_lon = rmse(y_test, y_test_pred_lon)

print(f"Random Forest with Randomised Lon Validation RMSE: {rf_val_rmse_lon}")
print(f"Random Forest with Randomised Lon Test RMSE: {rf_test_rmse_lon}")

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   32.2s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.3s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.3s


Random Forest with Randomised Lon Validation RMSE: 2.896838850574568e-05
Random Forest with Randomised Lon Test RMSE: 1.872725647444895e-06


In [None]:
##Test height feature
rf_regressor = RandomForestRegressor(n_estimators=best_n_estimators,
                                     max_depth=best_max_depth,
                                     min_samples_split=best_min_samples_split,
                                     random_state=42,
                                     verbose=1)
#Fit model to training data
rf_regressor.fit(x_train_height, y_train)
#Predict on validation data
y_val_pred_height = rf_regressor.predict(x_val)
#Predict on test data
y_test_pred_height = rf_regressor.predict(x_test)

#Validation & test RMSE (NOTE: uses rmse function defined in KNN section)
rf_val_rmse_height = rmse(y_val, y_val_pred_height)
rf_test_rmse_height = rmse(y_test, y_test_pred_height)

print(f"Random Forest with Randomised Height Validation RMSE: {rf_val_rmse_height}")
print(f"Random Forest with Randomised Height Test RMSE: {rf_test_rmse_height}")

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   29.2s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.3s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.3s


Random Forest with Randomised Height Validation RMSE: 2.9265762350405566e-05
Random Forest with Randomised Height Test RMSE: 9.943395298250834e-07


In [None]:
##Test ice_conc feature
rf_regressor = RandomForestRegressor(n_estimators=best_n_estimators,
                                     max_depth=best_max_depth,
                                     min_samples_split=best_min_samples_split,
                                     random_state=42,
                                     verbose=1)
#Fit model to training data
rf_regressor.fit(x_train_ice_conc, y_train)
#Predict on validation data
y_val_pred_ice_conc = rf_regressor.predict(x_val)
#Predict on test data
y_test_pred_ice_conc = rf_regressor.predict(x_test)

#Validation & test RMSE (NOTE: uses rmse function defined in KNN section)
rf_val_rmse_ice_conc = rmse(y_val, y_val_pred_ice_conc)
rf_test_rmse_ice_conc = rmse(y_test, y_test_pred_ice_conc)

print(f"Random Forest with Randomised Ice Conc Validation RMSE: {rf_val_rmse_ice_conc}")
print(f"Random Forest with Randomised Ice Conc Test RMSE: {rf_test_rmse_ice_conc}")

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   44.5s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.3s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.3s


Random Forest with Randomised Ice Conc Validation RMSE: 3.099209789070301e-05
Random Forest with Randomised Ice Conc Test RMSE: 7.518740972045634e-07


In [None]:
##Test windspeed feature
rf_regressor = RandomForestRegressor(n_estimators=best_n_estimators,
                                     max_depth=best_max_depth,
                                     min_samples_split=best_min_samples_split,
                                     random_state=42,
                                     verbose=1)
#Fit model to training data
rf_regressor.fit(x_train_windspeed, y_train)
#Predict on validation data
y_val_pred_windspeed = rf_regressor.predict(x_val)
#Predict on test data
y_test_pred_windspeed = rf_regressor.predict(x_test)

#Validation & test RMSE (NOTE: uses rmse function defined in KNN section)
rf_val_rmse_windspeed = rmse(y_val, y_val_pred_windspeed)
rf_test_rmse_windspeed = rmse(y_test, y_test_pred_windspeed)

print(f"Random Forest with Randomised Windspeed Validation RMSE: {rf_val_rmse_windspeed}")
print(f"Random Forest with Randomised Windspeed Test RMSE: {rf_test_rmse_windspeed}")

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   34.2s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.3s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.3s


Random Forest with Randomised Windspeed Validation RMSE: 2.921557825175114e-05
Random Forest with Randomised Windspeed Test RMSE: 1.0460299790793215e-06


In [None]:
##Test precip feature
rf_regressor = RandomForestRegressor(n_estimators=best_n_estimators,
                                     max_depth=best_max_depth,
                                     min_samples_split=best_min_samples_split,
                                     random_state=42,
                                     verbose=1)
#Fit model to training data
rf_regressor.fit(x_train_precip, y_train)
#Predict on validation data
y_val_pred_precip = rf_regressor.predict(x_val)
#Predict on test data
y_test_pred_precip = rf_regressor.predict(x_test)

#Validation & test RMSE (NOTE: uses rmse function defined in KNN section)
rf_val_rmse_precip = rmse(y_val, y_val_pred_precip)
rf_test_rmse_precip = rmse(y_test, y_test_pred_precip)

print(f"Random Forest with Randomised Precip Validation RMSE: {rf_val_rmse_precip}")
print(f"Random Forest with Randomised Precip Test RMSE: {rf_test_rmse_precip}")

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   33.9s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.3s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.3s


Random Forest with Randomised Precip Validation RMSE: 2.5808571081142873e-05
Random Forest with Randomised Precip Test RMSE: 1.4759299347133492e-06


In [None]:
##RMSE ratings
rf_temp_rmse = rf_test_rmse_temp / rf_test_rmse
rf_pressure_rmse = rf_test_rmse_pressure / rf_test_rmse
rf_lat_rmse = rf_test_rmse_lat / rf_test_rmse
rf_lon_rmse = rf_test_rmse_lon / rf_test_rmse
rf_height_rmse = rf_test_rmse_height / rf_test_rmse
rf_ice_conc_rmse = rf_test_rmse_ice_conc / rf_test_rmse
rf_windspeed_rmse = rf_test_rmse_windspeed / rf_test_rmse
rf_precip_rmse = rf_test_rmse_precip / rf_test_rmse

In [None]:
print(rf_temp_rmse)
print(rf_pressure_rmse)
print(rf_lat_rmse)
print(rf_lon_rmse)
print(rf_height_rmse)
print(rf_ice_conc_rmse)
print(rf_windspeed_rmse)
print(rf_precip_rmse)

2.255452
2.4476202
1.2933923
1.8833865
1.0
0.7561543
1.0519847
1.484332


# **Model 4 - LSTM**

https://www.kaggle.com/code/navjindervirdee/lstm-neural-network-from-scratch
https://medium.com/@CallMeTwitch/building-a-neural-network-zoo-from-scratch-the-long-short-term-memory-network-1cec5cf31b7
https://pub.towardsai.net/building-a-lstm-from-scratch-in-python-1dedd89de8fe


In [None]:
x_train_reshaped = x_train.reshape((x_train.shape[0], 1, x_train.shape[1]))
x_val_reshaped = x_val.reshape((x_val.shape[0], 1, x_val.shape[1]))
x_test_reshaped = x_test.reshape((x_test.shape[0], 1, x_test.shape[1]))
model = Sequential()
model.add(LSTM(50, activation='tanh', input_shape=(1, x_train_reshaped.shape[2])))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(x_train_reshaped, y_train, epochs=100, batch_size=32, verbose=1)
# Predict with the model
y_val_pred = model.predict(x_val_reshaped)
y_test_pred = model.predict(x_test_reshaped)

lstm_val_rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))
lstm_test_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))

print(f"LSTM Validation RMSE: {lstm_val_rmse}")
print(f"LSTM Test RMSE: {lstm_test_rmse}")

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

**Feature Testing**

In [None]:
##Test temp feature
x_train_temp_reshaped = x_train_temp.reshape((x_train_temp.shape[0], 1, x_train_temp.shape[1]))
x_val_reshaped = x_val.reshape((x_val.shape[0], 1, x_val.shape[1]))
x_test_reshaped = x_test.reshape((x_test.shape[0], 1, x_test.shape[1]))
model = Sequential()
model.add(LSTM(50, activation='tanh', input_shape=(1, x_train_temp_reshaped.shape[2])))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(x_train_temp_reshaped, y_train, epochs=100, batch_size=32, verbose=1)
# Predict with the model
y_val_temp_pred = model.predict(x_val_reshaped)
y_test_temp_pred = model.predict(x_test_reshaped)

lstm_val_rmse_temp = np.sqrt(mean_squared_error(y_val, y_val_temp_pred))
lstm_test_rmse_temp = np.sqrt(mean_squared_error(y_test, y_test_temp_pred))

print(f"LSTM Validation on Randomised Temp RMSE: {lstm_val_rmse_temp}")
print(f"LSTM Test on Randomised Temp RMSE: {lstm_test_rmse_temp}")

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [None]:
##Test pressure feature
x_train_pressure_reshaped = x_train_pressure.reshape((x_train_pressure.shape[0], 1, x_train_pressure.shape[1]))
x_val_reshaped = x_val.reshape((x_val.shape[0], 1, x_val.shape[1]))
x_test_reshaped = x_test.reshape((x_test.shape[0], 1, x_test.shape[1]))
model = Sequential()
model.add(LSTM(50, activation='tanh', input_shape=(1, x_train_pressure_reshaped.shape[2])))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(x_train_pressure_reshaped, y_train, epochs=100, batch_size=32, verbose=1)
# Predict with the model
y_val_pressure_pred = model.predict(x_val_reshaped)
y_test_pressure_pred = model.predict(x_test_reshaped)

lstm_val_rmse_pressure = np.sqrt(mean_squared_error(y_val, y_val_pressure_pred))
lstm_test_rmse_pressure = np.sqrt(mean_squared_error(y_test, y_test_pressure_pred))

print(f"LSTM Validation on Randomised Pressure RMSE: {lstm_val_rmse_pressure}")
print(f"LSTM Test on Randomised Pressure RMSE: {lstm_test_rmse_pressure}")

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [None]:
##Test lat feature
x_train_lat_reshaped = x_train_lat.reshape((x_train_lat.shape[0], 1, x_train_lat.shape[1]))
x_val_reshaped = x_val.reshape((x_val.shape[0], 1, x_val.shape[1]))
x_test_reshaped = x_test.reshape((x_test.shape[0], 1, x_test.shape[1]))
model = Sequential()
model.add(LSTM(50, activation='tanh', input_shape=(1, x_train_lat_reshaped.shape[2])))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(x_train_lat_reshaped, y_train, epochs=100, batch_size=32, verbose=1)
# Predict with the model
y_val_lat_pred = model.predict(x_val_reshaped)
y_test_lat_pred = model.predict(x_test_reshaped)

lstm_val_rmse_lat = np.sqrt(mean_squared_error(y_val, y_val_lat_pred))
lstm_test_rmse_lat = np.sqrt(mean_squared_error(y_test, y_test_lat_pred))

print(f"LSTM Validation on Randomised Lat RMSE: {lstm_val_rmse_lat}")
print(f"LSTM Test on Randomised Lat RMSE: {lstm_test_rmse_lat}")

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [None]:
##Test lon feature
x_train_lon_reshaped = x_train_lon.reshape((x_train_lon.shape[0], 1, x_train_lon.shape[1]))
x_val_reshaped = x_val.reshape((x_val.shape[0], 1, x_val.shape[1]))
x_test_reshaped = x_test.reshape((x_test.shape[0], 1, x_test.shape[1]))
model = Sequential()
model.add(LSTM(50, activation='tanh', input_shape=(1, x_train_lon_reshaped.shape[2])))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(x_train_lon_reshaped, y_train, epochs=100, batch_size=32, verbose=1)
# Predict with the model
y_val_lon_pred = model.predict(x_val_reshaped)
y_test_lon_pred = model.predict(x_test_reshaped)

lstm_val_rmse_lon = np.sqrt(mean_squared_error(y_val, y_val_lon_pred))
lstm_test_rmse_lon = np.sqrt(mean_squared_error(y_test, y_test_lon_pred))

print(f"LSTM Validation on Randomised Lon RMSE: {lstm_val_rmse_lon}")
print(f"LSTM Test on Randomised Lon RMSE: {lstm_test_rmse_lon}")

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [None]:
##Test height feature
x_train_height_reshaped = x_train_height.reshape((x_train_height.shape[0], 1, x_train_height.shape[1]))
x_val_reshaped = x_val.reshape((x_val.shape[0], 1, x_val.shape[1]))
x_test_reshaped = x_test.reshape((x_test.shape[0], 1, x_test.shape[1]))
model = Sequential()
model.add(LSTM(50, activation='tanh', input_shape=(1, x_train_height_reshaped.shape[2])))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(x_train_height_reshaped, y_train, epochs=100, batch_size=32, verbose=1)
# Predict with the model
y_val_height_pred = model.predict(x_val_reshaped)
y_test_height_pred = model.predict(x_test_reshaped)

lstm_val_rmse_height = np.sqrt(mean_squared_error(y_val, y_val_height_pred))
lstm_test_rmse_height = np.sqrt(mean_squared_error(y_test, y_test_height_pred))

print(f"LSTM Validation on Randomised Height RMSE: {lstm_val_rmse_height}")
print(f"LSTM Test on Randomised Height RMSE: {lstm_test_rmse_height}")

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [None]:
##Test ice_conc feature
x_train_ice_conc_reshaped = x_train_ice_conc.reshape((x_train_ice_conc.shape[0], 1, x_train_ice_conc.shape[1]))
x_val_reshaped = x_val.reshape((x_val.shape[0], 1, x_val.shape[1]))
x_test_reshaped = x_test.reshape((x_test.shape[0], 1, x_test.shape[1]))
model = Sequential()
model.add(LSTM(50, activation='tanh', input_shape=(1, x_train_ice_conc_reshaped.shape[2])))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(x_train_ice_conc_reshaped, y_train, epochs=100, batch_size=32, verbose=1)
# Predict with the model
y_val_ice_conc_pred = model.predict(x_val_reshaped)
y_test_ice_conc_pred = model.predict(x_test_reshaped)

lstm_val_rmse_ice_conc = np.sqrt(mean_squared_error(y_val, y_val_ice_conc_pred))
lstm_test_rmse_ice_conc = np.sqrt(mean_squared_error(y_test, y_test_ice_conc_pred))

print(f"LSTM Validation on Randomised Ice Conc RMSE: {lstm_val_rmse_ice_conc}")
print(f"LSTM Test on Randomised Ice Conc RMSE: {lstm_test_rmse_ice_conc}")

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [None]:
##Test windspeed feature
x_train_windspeed_reshaped = x_train_windspeed.reshape((x_train_windspeed.shape[0], 1, x_train_windspeed.shape[1]))
x_val_reshaped = x_val.reshape((x_val.shape[0], 1, x_val.shape[1]))
x_test_reshaped = x_test.reshape((x_test.shape[0], 1, x_test.shape[1]))
model = Sequential()
model.add(LSTM(50, activation='tanh', input_shape=(1, x_train_windspeed_reshaped.shape[2])))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(x_train_windspeed_reshaped, y_train, epochs=100, batch_size=32, verbose=1)
# Predict with the model
y_val_windspeed_pred = model.predict(x_val_reshaped)
y_test_windspeed_pred = model.predict(x_test_reshaped)

lstm_val_rmse_windspeed = np.sqrt(mean_squared_error(y_val, y_val_windspeed_pred))
lstm_test_rmse_windspeed = np.sqrt(mean_squared_error(y_test, y_test_windspeed_pred))

print(f"LSTM Validation on Randomised Windspeed RMSE: {lstm_val_rmse_windspeed}")
print(f"LSTM Test on Randomised Windspeed RMSE: {lstm_test_rmse_windspeed}")

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [None]:
##Test precip feature
x_train_precip_reshaped = x_train_precip.reshape((x_train_precip.shape[0], 1, x_train_precip.shape[1]))
x_val_reshaped = x_val.reshape((x_val.shape[0], 1, x_val.shape[1]))
x_test_reshaped = x_test.reshape((x_test.shape[0], 1, x_test.shape[1]))
model = Sequential()
model.add(LSTM(50, activation='tanh', input_shape=(1, x_train_precip_reshaped.shape[2])))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(x_train_precip_reshaped, y_train, epochs=100, batch_size=32, verbose=1)
# Predict with the model
y_val_precip_pred = model.predict(x_val_reshaped)
y_test_precip_pred = model.predict(x_test_reshaped)

lstm_val_rmse_precip = np.sqrt(mean_squared_error(y_val, y_val_precip_pred))
lstm_test_rmse_precip = np.sqrt(mean_squared_error(y_test, y_test_precip_pred))

print(f"LSTM Validation on Randomised Precip RMSE: {lstm_val_rmse_precip}")
print(f"LSTM Test on Randomised Precip RMSE: {lstm_test_rmse_precip}")

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [None]:
lstm_test_rmse = 0.0023513991326912353
lstm_test_rmse_temp = 0.0126046431332642
lstm_test_rmse_pressure = 0.005053670048732205
lstm_test_rmse_lat = 0.0034471933991493315
lstm_test_rmse_lon = 0.007441779995265137
lstm_test_rmse_height = 0.0030425407220521002
lstm_test_rmse_ice_conc = 0.0027644770218497217
lstm_test_rmse_windspeed = 0.004895030846810138
lstm_test_rmse_precip = 0.0033065700595167795

In [None]:
##RMSE ratings
lstm_temp_rmse = lstm_test_rmse_temp / lstm_test_rmse
lstm_pressure_rmse = lstm_test_rmse_pressure / lstm_test_rmse
lstm_lat_rmse = lstm_test_rmse_lat / lstm_test_rmse
lstm_lon_rmse = lstm_test_rmse_lon / lstm_test_rmse
lstm_height_rmse = lstm_test_rmse_height / lstm_test_rmse
lstm_ice_conc_rmse = lstm_test_rmse_ice_conc / lstm_test_rmse
lstm_windspeed_rmse = lstm_test_rmse_windspeed / lstm_test_rmse
lstm_precip_rmse = lstm_test_rmse_precip / lstm_test_rmse

In [None]:
print(lstm_temp_rmse)
print(lstm_pressure_rmse)
print(lstm_lat_rmse)
print(lstm_lon_rmse)
print(lstm_height_rmse)
print(lstm_ice_conc_rmse)
print(lstm_windspeed_rmse)
print(lstm_precip_rmse)

5.360486426155083
2.14921830091353
1.46601797679662
3.1648306286257037
1.2939278065352673
1.1756732336146218
2.081752425079639
1.4062138637145483


# **Model 5 - MLP**
https://www.kaggle.com/code/vitorgamalemos/multilayer-perceptron-from-scratch

In [None]:
model = Sequential()

In [None]:
model.add(Dense(128, activation='relu', input_shape=(x_train.shape[1],)))
model.add(Dense(128, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(1, activation='linear'))

In [None]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 128)               1152      
                                                                 
 dense_1 (Dense)             (None, 128)               16512     
                                                                 
 dense_2 (Dense)             (None, 128)               16512     
                                                                 
 dense_3 (Dense)             (None, 128)               16512     
                                                                 
 dense_4 (Dense)             (None, 128)               16512     
                                                                 
 dense_5 (Dense)             (None, 128)               16512     
                                                                 
 dense_6 (Dense)             (None, 128)               1

In [None]:
model.compile(optimizer='adam', loss='mean_squared_error')

In [None]:
training_results = model.fit(x_train,
                             y_train,
                             epochs=21,
                             batch_size=64,
                             validation_data=(x_val, y_val))

Epoch 1/21
Epoch 2/21
Epoch 3/21
Epoch 4/21
Epoch 5/21
Epoch 6/21
Epoch 7/21
Epoch 8/21
Epoch 9/21
Epoch 10/21
Epoch 11/21
Epoch 12/21
Epoch 13/21
Epoch 14/21
Epoch 15/21
Epoch 16/21
Epoch 17/21
Epoch 18/21
Epoch 19/21
Epoch 20/21
Epoch 21/21


In [None]:
##Run model
y_val_pred = model.predict(x_val)
y_test_pred = model.predict(x_test)

mlp_val_rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))
mlp_test_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))

print(f"MLP Validation RMSE: {mlp_val_rmse}")
print(f"MLP Test RMSE: {mlp_test_rmse}")

MLP Validation RMSE: 0.016950407228873195
MLP Test RMSE: 0.016949189685648076


**Feature Testing**

In [None]:
##Test temp feature
training_results_temp = model.fit(x_train_temp,
                             y_train,
                             epochs=21,
                             batch_size=64,
                             validation_data=(x_val, y_val))

y_val_pred_temp = model.predict(x_val)
y_test_pred_temp = model.predict(x_test)

mlp_val_rmse_temp = np.sqrt(mean_squared_error(y_val, y_val_pred_temp))
mlp_test_rmse_temp = np.sqrt(mean_squared_error(y_test, y_test_pred_temp))

print(f"MLP Validation on Randomised Temp RMSE: {mlp_val_rmse_temp}")
print(f"MLP Test on Randomised Temp RMSE: {mlp_test_rmse_temp}")

Epoch 1/21
Epoch 2/21
Epoch 3/21
Epoch 4/21
Epoch 5/21
Epoch 6/21
Epoch 7/21
Epoch 8/21
Epoch 9/21
Epoch 10/21
Epoch 11/21
Epoch 12/21
Epoch 13/21
Epoch 14/21
Epoch 15/21
Epoch 16/21
Epoch 17/21
Epoch 18/21
Epoch 19/21
Epoch 20/21
Epoch 21/21
MLP Validation on Randomised Temp RMSE: 0.045873789757715125
MLP Test on Randomised Temp RMSE: 0.046101674375602766


In [None]:
##Test pressure feature
training_results_pressure = model.fit(x_train_pressure,
                             y_train,
                             epochs=21,
                             batch_size=64,
                             validation_data=(x_val, y_val))

y_val_pred_pressure = model.predict(x_val)
y_test_pred_pressure = model.predict(x_test)

mlp_val_rmse_pressure = np.sqrt(mean_squared_error(y_val, y_val_pred_pressure))
mlp_test_rmse_pressure = np.sqrt(mean_squared_error(y_test, y_test_pred_pressure))

print(f"MLP Validation on Randomised Pressure RMSE: {mlp_val_rmse_pressure}")
print(f"MLP Test on Randomised Pressure RMSE: {mlp_test_rmse_pressure}")

Epoch 1/21
Epoch 2/21
Epoch 3/21
Epoch 4/21
Epoch 5/21
Epoch 6/21
Epoch 7/21
Epoch 8/21
Epoch 9/21
Epoch 10/21
Epoch 11/21
Epoch 12/21
Epoch 13/21
Epoch 14/21
Epoch 15/21
Epoch 16/21
Epoch 17/21
Epoch 18/21
Epoch 19/21
Epoch 20/21
Epoch 21/21
MLP Validation on Randomised Pressure RMSE: 0.05493083817861914
MLP Test on Randomised Pressure RMSE: 0.0549060780470108


In [None]:
##Test lat feature
training_results_lat = model.fit(x_train_lat,
                             y_train,
                             epochs=21,
                             batch_size=64,
                             validation_data=(x_val, y_val))

y_val_pred_lat = model.predict(x_val)
y_test_pred_lat = model.predict(x_test)

mlp_val_rmse_lat = np.sqrt(mean_squared_error(y_val, y_val_pred_lat))
mlp_test_rmse_lat = np.sqrt(mean_squared_error(y_test, y_test_pred_lat))

print(f"MLP Validation on Randomised Lat RMSE: {mlp_val_rmse_lat}")
print(f"MLP Test on Randomised Lat RMSE: {mlp_test_rmse_lat}")

Epoch 1/21
Epoch 2/21
Epoch 3/21
Epoch 4/21
Epoch 5/21
Epoch 6/21
Epoch 7/21
Epoch 8/21
Epoch 9/21
Epoch 10/21
Epoch 11/21
Epoch 12/21
Epoch 13/21
Epoch 14/21
Epoch 15/21
Epoch 16/21
Epoch 17/21
Epoch 18/21
Epoch 19/21
Epoch 20/21
Epoch 21/21
MLP Validation on Randomised Lat RMSE: 0.010631077179936553
MLP Test on Randomised Lat RMSE: 0.010647562778071676


In [None]:
##Test lon feature
training_results_lon = model.fit(x_train_lon,
                             y_train,
                             epochs=21,
                             batch_size=64,
                             validation_data=(x_val, y_val))

y_val_pred_lon = model.predict(x_val)
y_test_pred_lon = model.predict(x_test)

mlp_val_rmse_lon = np.sqrt(mean_squared_error(y_val, y_val_pred_lon))
mlp_test_rmse_lon = np.sqrt(mean_squared_error(y_test, y_test_pred_lon))

print(f"MLP Validation on Randomised Lon RMSE: {mlp_val_rmse_lon}")
print(f"MLP Test on Randomised Lon RMSE: {mlp_test_rmse_lon}")

Epoch 1/21
Epoch 2/21
Epoch 3/21
Epoch 4/21
Epoch 5/21
Epoch 6/21
Epoch 7/21
Epoch 8/21
Epoch 9/21
Epoch 10/21
Epoch 11/21
Epoch 12/21
Epoch 13/21
Epoch 14/21
Epoch 15/21
Epoch 16/21
Epoch 17/21
Epoch 18/21
Epoch 19/21
Epoch 20/21
Epoch 21/21
MLP Validation on Randomised Lon RMSE: 0.025048729709281636
MLP Test on Randomised Lon RMSE: 0.025053657221873


In [None]:
##Test height feature
training_results_height = model.fit(x_train_height,
                             y_train,
                             epochs=21,
                             batch_size=64,
                             validation_data=(x_val, y_val))

y_val_pred_height = model.predict(x_val)
y_test_pred_height = model.predict(x_test)

mlp_val_rmse_height = np.sqrt(mean_squared_error(y_val, y_val_pred_height))
mlp_test_rmse_height = np.sqrt(mean_squared_error(y_test, y_test_pred_height))

print(f"MLP Validation on Randomised Height RMSE: {mlp_val_rmse_height}")
print(f"MLP Test on Randomised Height RMSE: {mlp_test_rmse_height}")

Epoch 1/21
Epoch 2/21
Epoch 3/21
Epoch 4/21
Epoch 5/21
Epoch 6/21
Epoch 7/21
Epoch 8/21
Epoch 9/21
Epoch 10/21
Epoch 11/21
Epoch 12/21
Epoch 13/21
Epoch 14/21
Epoch 15/21
Epoch 16/21
Epoch 17/21
Epoch 18/21
Epoch 19/21
Epoch 20/21
Epoch 21/21
MLP Validation on Randomised Height RMSE: 0.0018357519303910381
MLP Test on Randomised Height RMSE: 0.0018383198369107076


In [None]:
##Test ice_conc feature
training_results_ice_conc = model.fit(x_train_ice_conc,
                             y_train,
                             epochs=21,
                             batch_size=64,
                             validation_data=(x_val, y_val))

y_val_pred_ice_conc = model.predict(x_val)
y_test_pred_ice_conc = model.predict(x_test)

mlp_val_rmse_ice_conc = np.sqrt(mean_squared_error(y_val, y_val_pred_ice_conc))
mlp_test_rmse_ice_conc = np.sqrt(mean_squared_error(y_test, y_test_pred_ice_conc))

print(f"MLP Validation on Randomised Ice Conc RMSE: {mlp_val_rmse_ice_conc}")
print(f"MLP Test on Randomised Ice Conc RMSE: {mlp_test_rmse_ice_conc}")

Epoch 1/21
Epoch 2/21
Epoch 3/21
Epoch 4/21
Epoch 5/21
Epoch 6/21
Epoch 7/21
Epoch 8/21
Epoch 9/21
Epoch 10/21
Epoch 11/21
Epoch 12/21
Epoch 13/21
Epoch 14/21
Epoch 15/21
Epoch 16/21
Epoch 17/21
Epoch 18/21
Epoch 19/21
Epoch 20/21
Epoch 21/21
MLP Validation on Randomised Ice Conc RMSE: 0.006667009677699084
MLP Test on Randomised Ice Conc RMSE: 0.006666484594917919


In [None]:
##Test windspeed feature
training_results_windspeed = model.fit(x_train_windspeed,
                             y_train,
                             epochs=21,
                             batch_size=64,
                             validation_data=(x_val, y_val))

y_val_pred_windspeed = model.predict(x_val)
y_test_pred_windspeed = model.predict(x_test)

mlp_val_rmse_windspeed = np.sqrt(mean_squared_error(y_val, y_val_pred_windspeed))
mlp_test_rmse_windspeed = np.sqrt(mean_squared_error(y_test, y_test_pred_windspeed))

print(f"MLP Validation on Randomised Windspeed RMSE: {mlp_val_rmse_windspeed}")
print(f"MLP Test on Randomised Windspeed RMSE: {mlp_test_rmse_windspeed}")

Epoch 1/21
Epoch 2/21
Epoch 3/21
Epoch 4/21
Epoch 5/21
Epoch 6/21
Epoch 7/21
Epoch 8/21
Epoch 9/21
Epoch 10/21
Epoch 11/21
Epoch 12/21
Epoch 13/21
Epoch 14/21
Epoch 15/21
Epoch 16/21
Epoch 17/21
Epoch 18/21
Epoch 19/21
Epoch 20/21
Epoch 21/21
MLP Validation on Randomised Windspeed RMSE: 0.010643138408271852
MLP Test on Randomised Windspeed RMSE: 0.010646009687240308


In [None]:
##Test precip feature
training_results_precip = model.fit(x_train_precip,
                             y_train,
                             epochs=21,
                             batch_size=64,
                             validation_data=(x_val, y_val))

y_val_pred_precip = model.predict(x_val)
y_test_pred_precip = model.predict(x_test)

mlp_val_rmse_precip = np.sqrt(mean_squared_error(y_val, y_val_pred_precip))
mlp_test_rmse_precip = np.sqrt(mean_squared_error(y_test, y_test_pred_precip))

print(f"MLP Validation on Randomised Precip RMSE: {mlp_val_rmse_precip}")
print(f"MLP Test on Randomised Precip RMSE: {mlp_test_rmse_precip}")

Epoch 1/21
Epoch 2/21
Epoch 3/21
Epoch 4/21
Epoch 5/21
Epoch 6/21
Epoch 7/21
Epoch 8/21
Epoch 9/21
Epoch 10/21
Epoch 11/21
Epoch 12/21
Epoch 13/21
Epoch 14/21
Epoch 15/21
Epoch 16/21
Epoch 17/21
Epoch 18/21
Epoch 19/21
Epoch 20/21
Epoch 21/21
MLP Validation on Randomised Precip RMSE: 0.005059791185778849
MLP Test on Randomised Precip RMSE: 0.005047179159307311


In [None]:
##RMSE ratings
mlp_temp_rmse = mlp_test_rmse_temp / mlp_test_rmse
mlp_pressure_rmse =  mlp_test_rmse_pressure / mlp_test_rmse
mlp_lat_rmse =  mlp_test_rmse_lat / mlp_test_rmse
mlp_lon_rmse =  mlp_test_rmse_lon / mlp_test_rmse
mlp_height_rmse =  mlp_test_rmse_height / mlp_test_rmse
mlp_ice_conc_rmse =  mlp_test_rmse_ice_conc / mlp_test_rmse
mlp_windspeed_rmse =  mlp_test_rmse_windspeed / mlp_test_rmse
mlp_precip_rmse =  mlp_test_rmse_precip / mlp_test_rmse


In [None]:
print(mlp_temp_rmse)
print(mlp_pressure_rmse)
print(mlp_lat_rmse)
print(mlp_lon_rmse)
print(mlp_height_rmse)
print(mlp_ice_conc_rmse)
print(mlp_windspeed_rmse)
print(mlp_precip_rmse)

2.719992827423478
3.2394515056671507
0.628204827224727
1.4781625367664317
0.1084606326913272
0.3933217291539809
0.6281131950664839
0.29778291782179234
