                                      AI VIET NAM – AI COURSE 2024

                                Exercise: Multilayer Perceptrons (MLPs) and

                                         Activation Functions

                                Dinh-Thang Duong, Yen-Linh Vu, Quang-Vinh Dinh

                                        Ngày 12 tháng 11 năm 2024

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

In [2]:
random_state = 59
np.random.seed(random_state)
if torch.cuda.is_available():
    torch.cude.manual_seed(random_state)

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [4]:
data_path = '/kaggle/input/aio-datasets/Auto_MPG_data.csv'
dataset = pd.read_csv(data_path)

In [5]:
dataset

Unnamed: 0,MPG,Cylinders,Displacement,Horsepower,Weight,Acceleration,Model Year,Europe,Japan,USA
0,18.0,8,307.0,130.0,3504.0,12.0,70,0,0,1
1,15.0,8,350.0,165.0,3693.0,11.5,70,0,0,1
2,18.0,8,318.0,150.0,3436.0,11.0,70,0,0,1
3,16.0,8,304.0,150.0,3433.0,12.0,70,0,0,1
4,17.0,8,302.0,140.0,3449.0,10.5,70,0,0,1
...,...,...,...,...,...,...,...,...,...,...
387,27.0,4,140.0,86.0,2790.0,15.6,82,0,0,1
388,44.0,4,97.0,52.0,2130.0,24.6,82,1,0,0
389,32.0,4,135.0,84.0,2295.0,11.6,82,0,0,1
390,28.0,4,120.0,79.0,2625.0,18.6,82,0,0,1


In [6]:
X = dataset.drop(columns='MPG').values
y = dataset['MPG'].values
print(X.shape, y.shape)

(392, 9) (392,)


In [7]:
X_train, X_valid, y_train, y_valid = train_test_split(X, y, 
                                                      test_size=0.2, 
                                                      shuffle=True,
                                                      random_state=random_state,
                                                     )
X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, 
                                                      test_size=0.125, 
                                                      shuffle=True,
                                                      random_state=random_state,
                                                    )

In [8]:
normalizer = StandardScaler()
X_train = normalizer.fit_transform(X_train)
X_valid = normalizer.transform(X_valid)
X_test = normalizer.transform(X_test)

In [9]:
X_train = torch.tensor(X_train, dtype=torch.float32)
X_valid = torch.tensor(X_valid, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
y_valid = torch.tensor(y_valid, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)

In [10]:
class CustomDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y
        
    def __len__(self):
        return len(self.y)
    
    def __getitem__(self, index):
        return self.X[index], self.y[index]

In [11]:
batch_size = 32
train_dataset = CustomDataset(X_train, y_train)
val_dataset = CustomDataset(X_valid, y_valid)
train_loader = DataLoader(train_dataset,
                          batch_size=batch_size,
                          shuffle=True,                         
                         )
val_loader = DataLoader(val_dataset,
                        batch_size=batch_size,
                        shuffle=True,
                       )

In [12]:
class MLP(nn.Module):
    def __init__(self, input_dims, hidden_dims, output_dims):
        super().__init__()
        self.linear1 = nn.Linear(input_dims, hidden_dims)
        self.linear2 = nn.Linear(hidden_dims, hidden_dims)
        self.output = nn.Linear(hidden_dims, output_dims)
        
    def forward(self, x):
        x = self.linear1(x)
        x = F.relu(x)
        x = self.linear2(x)
        x = F.relu(x)
        output = self.output(x)
        return output.squeeze(1)

In [32]:
class LinearRegression(nn.Module):
    def __init__(self, input_dims):
        super().__init__()
        self.linear = nn.Linear(input_dims, 1)
        
    def forward(self, x):
        x = self.linear(x)
        return x.squeeze(1)

In [51]:
class MLP_sigmoid(nn.Module):
    def __init__(self, input_dims, hidden_dims, output_dims):
        super().__init__()
        self.linear1 = nn.Linear(input_dims, hidden_dims)
        self.linear2 = nn.Linear(hidden_dims, hidden_dims)
        self.output = nn.Linear(hidden_dims, output_dims)
        
    def forward(self, x):
        x = self.linear1(x)
        x = F.sigmoid(x)
        x = self.linear2(x)
        x = F.sigmoid(x)
        output = self.output(x)
        return output.squeeze(1)

In [65]:
class MLP_tanh(nn.Module):
    def __init__(self, input_dims, hidden_dims, output_dims):
        super().__init__()
        self.linear1 = nn.Linear(input_dims, hidden_dims)
        self.linear2 = nn.Linear(hidden_dims, hidden_dims)
        self.output = nn.Linear(hidden_dims, output_dims)
        
    def forward(self, x):
        x = self.linear1(x)
        x = F.tanh(x)
        x = self.linear2(x)
        x = F.tanh(x)
        output = self.output(x)
        return output.squeeze(1)

In [66]:
input_dims = X_train.shape[1]
output_dims = 1
hidden_dims = 64

# model = LinearRegression(input_dims)
# model = MLP(input_dims, hidden_dims, output_dims)
# model = MLP_sigmoid(input_dims, hidden_dims, output_dims)
model = MLP_tanh(input_dims, hidden_dims, output_dims)




In [67]:
def r_squared(y_true, y_pred):
    y_true = torch.Tensor(y_true).to(device)
    y_pred = torch.Tensor(y_pred).to(device)
    mean_y = torch.mean(y_true)
    ss_res = torch.sum((y_true - y_pred) ** 2)
    ss_tot = torch.sum((y_true - mean_y) ** 2)
    return 1 - (ss_res / ss_tot)

In [68]:
n_epochs = 100
lr = 0.01
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=lr)

In [69]:
train_losses = []
val_losses = []
train_r2s = []
val_r2s = []

In [70]:
for ep in range(n_epochs):
    train_loss = 0.0
    train_y_pred = []
    train_y_true = []
    val_y_pred = []
    val_y_true = []
    model.train()
    for X_batch, y_batch in train_loader:
        X_batch = X_batch.to(device)
        y_batch = y_batch.to(device)
        optimizer.zero_grad()
        output = model(X_batch)
        loss = criterion(output, y_batch)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
        train_y_pred += output.tolist()
        train_y_true += y_batch.tolist()
    train_loss = train_loss / len(train_loader)
    train_r2 = r_squared(train_y_true, train_y_pred)
    train_losses.append(train_loss)
    train_r2s.append(train_r2)
    # evaluation:
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            X_batch = X_batch.to(device)
            y_batch = y_batch.to(device)
            output = model(X_batch)
            loss = criterion(output, y_batch)
            val_loss += loss.item()
            val_y_pred += output.tolist()
            val_y_true += y_batch.tolist()
        val_loss = val_loss / len(val_loader)
        val_r2 = r_squared(val_y_true, val_y_pred)     
    val_losses.append(val_loss)
    val_r2s.append(val_r2)
    print(f'EPOCH {ep + 1}: train_loss: {train_loss:.3f} || train_r2: {train_r2:.3f} || valid_loss: {val_loss:.3f} || valid_r2: {val_r2:.3f}')

EPOCH 1: train_loss: 263.972 || train_r2: -3.684 || valid_loss: 20.256 || valid_r2: 0.653
EPOCH 2: train_loss: 28.253 || train_r2: 0.527 || valid_loss: 19.683 || valid_r2: 0.690
EPOCH 3: train_loss: 19.658 || train_r2: 0.676 || valid_loss: 11.269 || valid_r2: 0.836
EPOCH 4: train_loss: 15.033 || train_r2: 0.746 || valid_loss: 8.117 || valid_r2: 0.867
EPOCH 5: train_loss: 11.706 || train_r2: 0.796 || valid_loss: 10.630 || valid_r2: 0.826
EPOCH 6: train_loss: 11.586 || train_r2: 0.806 || valid_loss: 8.421 || valid_r2: 0.868
EPOCH 7: train_loss: 10.905 || train_r2: 0.816 || valid_loss: 6.323 || valid_r2: 0.896
EPOCH 8: train_loss: 9.247 || train_r2: 0.841 || valid_loss: 4.868 || valid_r2: 0.912
EPOCH 9: train_loss: 9.412 || train_r2: 0.838 || valid_loss: 6.764 || valid_r2: 0.905
EPOCH 10: train_loss: 8.427 || train_r2: 0.854 || valid_loss: 7.025 || valid_r2: 0.906
EPOCH 11: train_loss: 8.798 || train_r2: 0.855 || valid_loss: 5.353 || valid_r2: 0.918
EPOCH 12: train_loss: 8.981 || train_r2

In [71]:
model.eval()
with torch.no_grad():
    y_hat = model(X_test)
    test_r2 = r_squared(y_test, y_hat)
    print('Evaluation on validation set:')
    print(f'R2: {test_r2}')

Evaluation on validation set:
R2: 0.9111863374710083


In [40]:
# !pip install torch-summary

Collecting torch-summary
  Downloading torch_summary-1.4.5-py3-none-any.whl.metadata (18 kB)
Downloading torch_summary-1.4.5-py3-none-any.whl (16 kB)
Installing collected packages: torch-summary
Successfully installed torch-summary-1.4.5


In [41]:
# from torchsummary import summary
# summary(model, (1, 9))

Layer (type:depth-idx)                   Output Shape              Param #
├─Linear: 1-1                            [-1, 1, 1]                10
Total params: 10
Trainable params: 10
Non-trainable params: 0
Total mult-adds (M): 0.00
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00


Layer (type:depth-idx)                   Output Shape              Param #
├─Linear: 1-1                            [-1, 1, 1]                10
Total params: 10
Trainable params: 10
Non-trainable params: 0
Total mult-adds (M): 0.00
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00