MLP for Regression

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error
from sklearn.model_selection import KFold
import torch
from torch import nn, optim
import torch.nn.functional as F
from torch.autograd import Variable
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import pandas as pd

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

df = pd.read_csv('/content/Book1.csv')
#df.info()

dataset = df.values
X = dataset[:, 0:7]

input_dims = X.shape[1]
Y = dataset[:, 7:9]
#print(Y)
output_dims = Y.shape[1]


def evaluate_model(model, X_val, Y_val):
    model.eval()
    with torch.no_grad():
        predictions = model(X_val)
        mae = mean_absolute_error(Y_val.cpu().detach().numpy(), predictions.cpu().detach().numpy())
        r2 = r2_score(Y_val.cpu().detach().numpy(), predictions.cpu().detach().numpy())
        mape = mean_absolute_percentage_error(Y_val.cpu().detach().numpy(), predictions.cpu().detach().numpy())
        return mae, r2, mape

def train_model(model, criterion, optimizer, X_train, Y_train, epochs):
    for e in range(epochs):
        optimizer.zero_grad()
        output = model(X_train)
        loss = criterion(output.squeeze(), Y_train)
        loss.backward()
        optimizer.step()
    return model

class Model(nn.Module):
    def __init__(self, input_dim, output_dim, hidden_layers, neurons, dropout_rate):
        super(Model, self).__init__()

        self.hidden_layers = nn.ModuleList()

        self.hidden_layers.append(nn.Linear(input_dim, neurons))
        for i in range(hidden_layers - 1):
            self.hidden_layers.append(nn.Linear(neurons, neurons))

        self.output_layer = nn.Linear(neurons, output_dim)
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x):
        for hidden_layer in self.hidden_layers:
            x = F.relu(hidden_layer(x))
            x = self.dropout(x)
        x = self.output_layer(x)
        return x

hidden_layers = [2, 3, 4, 5]
neurons = [128, 256, 512, 1024]
dropout_rates = [0.2, 0.3, 0.5]
epochs_list = [100, 200, 300]
optimizers = [optim.SGD, optim.RMSprop, optim.Adam]
learning_rates = [0.01, 0.001, 0.0005]

kfold = KFold(n_splits = 10, shuffle = True)
best_mae, best_r2, best_mape = float('inf'), float('-inf'), float('inf')
best_params = {}

for hidden_layer in hidden_layers:
    for neuron in neurons:
        for dropout_rate in dropout_rates:
            for learning_rate in learning_rates:
                for epoch in epochs_list:
                    for opt in optimizers:
                        mae_scores, r2_scores, mape_scores = [], [], []
                        for train_index, val_index in kfold.split(X):
                            X_train, X_val = X[train_index], X[val_index]
                            Y_train, Y_val = Y[train_index], Y[val_index]

                            scaler = MinMaxScaler()
                            X_train = scaler.fit_transform(X_train)
                            X_val = scaler.transform(X_val)

                            X_train = torch.tensor(X_train, dtype=torch.float32).to(device)
                            Y_train = torch.tensor(Y_train, dtype=torch.float32).to(device)
                            X_val = torch.tensor(X_val, dtype=torch.float32).to(device)
                            Y_val = torch.tensor(Y_val, dtype=torch.float32).to(device)

                            model = Model(input_dims, output_dims, hidden_layer, neuron, dropout_rate).to(device)
                            criterion = nn.SmoothL1Loss()
                            optimizer = opt(model.parameters(), lr=learning_rate, weight_decay=0.01)
                            model = train_model(model, criterion, optimizer, X_train, Y_train, epochs=epoch)
                            mae, r2, mape = evaluate_model(model, X_val, Y_val)
                            mae_scores.append(mae)
                            r2_scores.append(r2)
                            mape_scores.append(mape)

                        avg_mae, avg_r2, avg_mape = np.mean(mae_scores), np.mean(r2_scores), np.mean(mape_scores)
                        std_mae, std_r2, std_mape = np.std(mae_scores), np.std(r2_scores), np.std(mape_scores)


                        if avg_mae < best_mae and avg_r2 > best_r2 and avg_mape<best_mape:
                            best_mae, best_r2, best_mape = avg_mae, avg_r2, avg_mape
                            best_params = {'hidden_layers': hidden_layer, 'neurons': neuron, 'dropout_rate': dropout_rate, 'learning_rate': learning_rate, 'optimizer': opt, 'epochs': epoch}

print('Best Parameters: ', best_params)
print("Best Mean Absolute Error (MAE): ", best_mae)
print("Best Mean Absolute Percentage Error (MAPE): ", best_mape)
print('Best R^2 Score: ', best_r2)
print('Std of MAE: ', std_mae)
print('Std of MAPE: ', std_mape)
print('Std of R^2: ', std_r2)