In [3]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler
from sklearn.model_selection import train_test_split

data = pd.read_csv('abalone.data.csv')

categorical_mask = data.dtypes == object
categorical_cols = data.columns[categorical_mask].tolist()
numerical_cols = data.columns[~categorical_mask].tolist()

encoders = {}
data_encoded = data.copy()
for col in categorical_cols:
   le = LabelEncoder()
   data_encoded[col] = le.fit_transform(data[col])
   encoders[col] = (le, OneHotEncoder())
   encoded_cols = encoders[col][1].fit_transform(data_encoded[col].values.reshape(-1, 1)).toarray()
   data_encoded = data_encoded.drop(col, axis=1)
   data_encoded = pd.concat([data_encoded, pd.DataFrame(encoded_cols, columns=[f"{col}_{i}" for i in range(encoded_cols.shape[1])])], axis=1)

scaler = StandardScaler()
data_encoded[numerical_cols] = scaler.fit_transform(data_encoded[numerical_cols])

X = data_encoded.drop('Rings', axis=1)
y = data_encoded['Rings']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train_tensor = torch.from_numpy(X_train.values).float()
y_train_tensor = torch.from_numpy(y_train.values).float().unsqueeze(1)
X_test_tensor = torch.from_numpy(X_test.values).float()
y_test_tensor = torch.from_numpy(y_test.values).float().unsqueeze(1)

class AbaloneModel(nn.Module):
   def __init__(self, input_size, hidden_sizes, output_size):
       super(AbaloneModel, self).__init__()
       self.layers = nn.ModuleList([nn.Linear(input_size, hidden_sizes[0])])
       self.layers.extend([nn.Linear(hidden_sizes[i], hidden_sizes[i+1]) for i in range(len(hidden_sizes)-1)])
       self.layers.append(nn.Linear(hidden_sizes[-1], output_size))
       self.relu = nn.ReLU()

   def forward(self, x):
       for i, layer in enumerate(self.layers[:-1]):
           x = self.relu(layer(x))
       x = self.layers[-1](x)
       return x

input_size = X_train_tensor.shape[1]
hidden_sizes = [64, 32]
output_size = 1
learning_rate = 0.01
num_epochs = 100

model = AbaloneModel(input_size, hidden_sizes, output_size)
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

for epoch in range(num_epochs):
   model.train()
   optimizer.zero_grad()
   outputs = model(X_train_tensor)
   loss = criterion(outputs, y_train_tensor)
   loss.backward()
   optimizer.step()

def evaluate(model, X_test, y_test):
   model.eval()
   with torch.no_grad():
       outputs = model(X_test)
       mse = nn.MSELoss()
       loss = mse(outputs, y_test)
   return loss.item()

test_loss = evaluate(model, X_test_tensor, y_test_tensor)
print(f"Test MSE: {test_loss}")


Test MSE: 0.7282235026359558


In [10]:
# Defining a function to train and evaluate the model with different hyperparameters
def train_and_evaluate(learning_rate, batch_size, hidden_sizes, num_epochs=100):
    model = AbaloneModel(input_size, hidden_sizes, output_size)
    optimizer = optim.SGD(model.parameters(), lr=learning_rate)
    criterion = nn.MSELoss()

    for epoch in range(num_epochs):
        model.train()
        for i in range(0, X_train_tensor.shape[0], batch_size):
            inputs = X_train_tensor[i:i+batch_size]
            labels = y_train_tensor[i:i+batch_size]
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

    test_loss = evaluate(model, X_test_tensor, y_test_tensor)
    return test_loss

learning_rates = [0.01, 0.05, 0.1, 0.5]
batch_sizes = [16, 32, 64, 128]
hidden_sizes_list = [[32, 16], [64, 32], [128, 64], [256, 128]]

results = []
for lr in learning_rates:
    for bs in batch_sizes:
        for hs in hidden_sizes_list:
            test_loss = train_and_evaluate(lr, bs, hs)
            results.append({"learning_rate": lr, "batch_size": bs, "hidden_sizes": hs, "test_mse": test_loss})

import pandas as pd
results_df = pd.DataFrame(results)
print(results_df)

    learning_rate  batch_size hidden_sizes  test_mse
0            0.01          16     [32, 16]  0.467627
1            0.01          16     [64, 32]  0.474029
2            0.01          16    [128, 64]  0.474439
3            0.01          16   [256, 128]  0.474443
4            0.01          32     [32, 16]  0.483013
..            ...         ...          ...       ...
59           0.50          64   [256, 128]  0.709121
60           0.50         128     [32, 16]  0.516233
61           0.50         128     [64, 32]  0.475749
62           0.50         128    [128, 64]  1.279263
63           0.50         128   [256, 128]  1.279401

[64 rows x 4 columns]


In [8]:
# Adding more layers to the model
class AbaloneModel(nn.Module):
    def __init__(self, input_size, hidden_sizes, output_size):
        super(AbaloneModel, self).__init__()
        layers = []
        layers.append(nn.Linear(input_size, hidden_sizes[0]))
        layers.append(nn.ReLU())
        for i in range(len(hidden_sizes)-1):
            layers.append(nn.Linear(hidden_sizes[i], hidden_sizes[i+1]))
            layers.append(nn.ReLU())
        layers.append(nn.Linear(hidden_sizes[-1], output_size))
        self.model = nn.Sequential(*layers)

    def forward(self, x):
        return self.model(x)


model = AbaloneModel(input_size, [128, 64, 32], output_size)
optimizer = optim.Adagrad(model.parameters(), lr=0.01)
criterion = nn.MSELoss()


num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    loss.backward()
    optimizer.step()


test_loss = evaluate(model, X_test_tensor, y_test_tensor)
print(f"Test MSE (Adagrad): {test_loss}")

sgd_optimizer = optim.SGD(model.parameters(), lr=0.01)
criterion = nn.MSELoss()

num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    sgd_optimizer.zero_grad()
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    loss.backward()
    sgd_optimizer.step()

sgd_test_loss = evaluate(model, X_test_tensor, y_test_tensor)
print(f"Test MSE (SGD): {sgd_test_loss}")

Test MSE (Adagrad): 0.4400298595428467
Test MSE (SGD): 0.43521052598953247


In [9]:
# Defining the model with 10-15 hidden layers and Sigmoid activation
class AbaloneModel(nn.Module):
    def __init__(self, input_size, hidden_sizes, output_size):
        super(AbaloneModel, self).__init__()
        layers = []
        layers.append(nn.Linear(input_size, hidden_sizes[0]))
        layers.append(nn.Sigmoid())
        for i in range(len(hidden_sizes)-1):
            layers.append(nn.Linear(hidden_sizes[i], hidden_sizes[i+1]))
            layers.append(nn.Sigmoid())
        layers.append(nn.Linear(hidden_sizes[-1], output_size))
        self.model = nn.Sequential(*layers)

    def forward(self, x):
        return self.model(x)

hidden_sizes = [64] * 10  # or [32] * 15
model = AbaloneModel(input_size, hidden_sizes, output_size)
optimizer = optim.SGD(model.parameters(), lr=0.01)
criterion = nn.MSELoss()

num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    loss.backward()
    optimizer.step()

test_loss = evaluate(model, X_test_tensor, y_test_tensor)
print(f"Test MSE (Sigmoid, 10-15 hidden layers): {test_loss}")


Test MSE (Sigmoid, 10-15 hidden layers): 1.0418965816497803
