In [20]:
import pandas as pd
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

## Simple Ridge Regression

In [21]:
# Load cleaned data
bottle_df = pd.read_csv('../CLEAN/bottle_clean.csv')

In [22]:
X = bottle_df[['T_degC', 'Depthm', 'O2ml_L', 'STheta', 'O2Sat']]
y = bottle_df['Salnty']

In [23]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [24]:
scaler = StandardScaler()

# Fit the scaler on the training data and transform both the training and test data
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [25]:
# Initialize the MLPRegressor with some basic parameters
mlp = MLPRegressor(hidden_layer_sizes=(100, 50), max_iter=1000, random_state=42)

# Train the model
mlp.fit(X_train, y_train)

In [26]:
# Predict on the test set
y_pred = mlp.predict(X_test)

In [27]:
# Calculate the mean squared error
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')

# Calculate the R-squared value
r2 = r2_score(y_test, y_pred)
print(f'R-squared: {r2}')

Mean Squared Error: 0.0005392154704440376
R-squared: 0.9974488199493196


## Calibrating the model and improving metrics

In [28]:
# Establishing a grid of hyperparameters to test
options_grid = {
    'hidden_layer_sizes': [(100, 50), (50, 50), (50, 100, 50)],
    'activation': ['tanh', 'relu'],
    'alpha': [0.0001, 0.05, 0.000001]
}

mlpr = MLPRegressor(max_iter=1000, random_state=42)
# Perform a grid search with the options in options_grid
grid_search = GridSearchCV(mlpr, options_grid, cv=5, n_jobs=-1)

# Fit the grid search
grid_search.fit(X_train, y_train)

print(grid_search.best_params_)

{'activation': 'tanh', 'alpha': 1e-06, 'hidden_layer_sizes': (50, 50)}


In [29]:
# Predict on the test set
new_y_pred = grid_search.predict(X_test)

# Calculate the mean squared error
new_mse = mean_squared_error(y_test, new_y_pred)
print(f'Mean Squared Error: {new_mse}')

# Calculate the R-squared value
new_r2 = r2_score(y_test, new_y_pred)
print(f'R-squared: {new_r2}')

Mean Squared Error: 2.8949836205207898e-05
R-squared: 0.9998630301824679
