# MLP with PyTorch

## Getting Ready

Run all "step0"s literally as they are.

### Step 0: Libraries and Data

There are two data sets, one for classification (wine) and one for regression (diabetes).

In [3]:
# CodeGrade step0

# Libraries
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from sklearn.datasets import load_diabetes
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load data
wine_data = load_wine()
diabetes_data = load_diabetes()

Preprocess both data sets.

In [4]:
# CodeGrade step0

# Preprocessing

# Classification

X1 = wine_data.data
y1 = wine_data.target

# Split data (80% train, 20% test)
X1_train, X1_test, y1_train, y1_test = train_test_split(
    X1, y1, test_size=0.2, random_state=42
)

# Standardize features
scaler = StandardScaler()
X1_train = scaler.fit_transform(X1_train)
X1_test = scaler.transform(X1_test)

# Convert to PyTorch tensors
X1_train_t = torch.tensor(X1_train, dtype=torch.float32)
y1_train_t = torch.tensor(y1_train, dtype=torch.long)
X1_test_t = torch.tensor(X1_test, dtype=torch.float32)
y1_test_t = torch.tensor(y1_test, dtype=torch.long)

X1_train_t.shape, y1_train_t.shape, X1_test_t.shape, y1_test_t.shape


# Regression

X2 = diabetes_data.data
y2 = diabetes_data.target

# Split data (80% train, 20% test)
X2_train, X2_test, y2_train, y2_test = train_test_split(
    X2, y2, test_size=0.2, random_state=42
)

# Standardize features
scaler = StandardScaler()
X2_train = scaler.fit_transform(X2_train)
X2_test = scaler.transform(X2_test)

# Convert to PyTorch tensors
X2_train_t = torch.tensor(X2_train, dtype=torch.float32)
y2_train_t = torch.tensor(y2_train, dtype=torch.float32).view(-1, 1)
X2_test_t = torch.tensor(X2_test, dtype=torch.float32)
y2_test_t = torch.tensor(y2_test, dtype=torch.float32).view(-1, 1)


## Part 1: Classification

Define the MLP model for classification

In [5]:
# CodeGrade step0

# Define the MLP model for classification
class WineMLP(nn.Module):
    def __init__(self, input_size=13, hidden_size=16, num_classes=3):
        super(WineMLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x): # Or even, fruit_forward?
        # Hidden layer + ReLU
        x = F.relu(self.fc1(x))
        # Output layer (no activation, CrossEntropyLoss will apply softmax internally)
        x = self.fc2(x)
        return x

### Step 1: Initialize model, loss function, and optimizer

* Let `model` be equal to `WineMLP`
* Let 'criterion' be equal to the `nn` with cross entropy loss
* Let `optimizer` have the Adam optimizer with 'model.paramters' as default and the learning rate set to 0.01

Then return 'model, criterion, optimizer'.

In [6]:
# CodeGrade step1

# Initialize model, loss function, and optimizer
model = WineMLP()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)


model, criterion, optimizer


(WineMLP(
   (fc1): Linear(in_features=13, out_features=16, bias=True)
   (fc2): Linear(in_features=16, out_features=3, bias=True)
 ),
 CrossEntropyLoss(),
 Adam (
 Parameter Group 0
     amsgrad: False
     betas: (0.9, 0.999)
     capturable: False
     differentiable: False
     eps: 1e-08
     foreach: None
     fused: None
     lr: 0.01
     maximize: False
     weight_decay: 0
 ))

Let's train the model.
* Note: Do not print  the epoch's since this will lead to a CodeGrade error.

(Which would be an epoch fail)

In [9]:
# CodeGrade step0

# Train model
train_ds = TensorDataset(X1_train_t, y1_train_t)
train_loader = DataLoader(train_ds, batch_size=16, shuffle=True)

# Training Model

num_epochs = 50
for epoch in range(num_epochs):
    for batch_x, batch_y in train_loader:
        optimizer.zero_grad()
        outputs = model(batch_x)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()

  #  if (epoch+1) % 10 == 0:
   #     print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")


### Step 2: Model Evaluation


* Set Model to Evaluation Mode:
  * `model.eval()` → disables certain layers/behaviors like dropout or batch normalization in training mode (not used in this exact code, but good practice).
* Disable Gradient Calculation:
 * `torch.no_grad()` → speeds up evaluation, doesn’t track gradients.
* Forward Pass on Test Set:
 * `outputs_test = model(X1_test_t)` → obtains predictions on test data.
* Predicted Class:
 * `torch.argmax(outputs_test, dim=1)` → selects the index (class) with the highest probability for each test instance.
* Calculate Accuracy:
 * Compare predictions (predicted) against ground truth `(y1_test_t)`, take the mean over all test samples.
* Round:
  * `round(accuracy,2)` → just for display, e.g. 0.97 meaning 97% accuracy.



In [14]:
# CodeGrade step2

# Model Evaluation
model.eval()
with torch.no_grad():
    outputs_test = model(X1_test_t)
    predicted = torch.argmax(outputs_test, dim=1)
    accuracy = (predicted == y1_test_t).float().mean().item()

accuracy

# Suspciously high accuracy, but it is a very small test set

1.0

### Step 3: Sample Prediction

Let the sample index be 17, and then fill in the rest of the code.

In [15]:
# CodeGrade step3

# make a sample prediction
sample_idx = 17
sample_input = X1_test_t[sample_idx].unsqueeze(0)

sample_input

tensor([[-1.1002, -1.0909,  0.5361,  1.3149, -1.5373, -0.4549, -0.4218,  0.2501,
         -0.3577, -1.2303,  1.5159,  0.1767, -0.3482]])

### Step 4: Predicted class

In [16]:
# CodeGrade step0

# Predicted classes

with torch.no_grad():
    sample_output = model(sample_input)
    predicted_class = torch.argmax(sample_output, dim=1).item()


Use a boolean to getermine if `y1_test` for the same datum above is the same as the `predicted_class`

In [17]:
# CodeGrade step4

# Test sample class prediction
predicted_class == y1_test_t[sample_idx].item()

True

## Part 2: Regression

Define the MLP model for regression

In [18]:
# CodeGrade step0

# MLP Model for Regression
class DiabetesMLP(nn.Module):
    def __init__(self, input_size=10, hidden_size=32):
        super(DiabetesMLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, 1)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        # No activation in final layer for regression
        x = self.fc2(x)
        return x

### Step 5: Model, loss, optimizer

* Let 'model_reg` be equal to the `DiabetesMLP`
* Let `criterion_reg` ne equal to `nn` with MSE loss
* Let 'optimizer_reg` have the Adam optimizer with 'model.paramters' as default and the learning rate set to 0.01

Return, these three.

In [19]:
# CodeGrade step5

# Initialize model, loss function, and optimizer
model_reg = DiabetesMLP()
criterion_reg = nn.MSELoss()
optimizer_reg = optim.Adam(model_reg.parameters(), lr=0.01)

model_reg, criterion_reg, optimizer_reg

(DiabetesMLP(
   (fc1): Linear(in_features=10, out_features=32, bias=True)
   (fc2): Linear(in_features=32, out_features=1, bias=True)
 ),
 MSELoss(),
 Adam (
 Parameter Group 0
     amsgrad: False
     betas: (0.9, 0.999)
     capturable: False
     differentiable: False
     eps: 1e-08
     foreach: None
     fused: None
     lr: 0.01
     maximize: False
     weight_decay: 0
 ))

Training and model evaluation

In [20]:
# CodeGrade step0

# Training the MLP regression model
train_ds_reg = TensorDataset(X2_train_t, y2_train_t)
train_loader_reg = DataLoader(train_ds_reg, batch_size=16, shuffle=True)

num_epochs = 50
for epoch in range(num_epochs):
    for batch_x, batch_y in train_loader_reg:
        optimizer_reg.zero_grad()
        outputs = model_reg(batch_x)
        loss = criterion_reg(outputs, batch_y)
        loss.backward()
        optimizer_reg.step()

  #  if (epoch+1) % 10 == 0:
  #      print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

# Evaluate the model
model_reg.eval()
with torch.no_grad():
    predictions = model_reg(X2_test_t)
    test_mse = criterion_reg(predictions, y2_test_t).item()
    mae_loss = nn.L1Loss()(predictions, y2_test_t).item()




### Step 6: Metrics

Rounding to two decimal places, return the boolean for `test_mse` and `mae_loss`.

In [21]:
# CodeGrade step6

# Metrics
round(test_mse, 2) == round(mae_loss, 2)


False

### Step 7: Sample Prediction


* Here it’s the first test sample (sample_idx=0).

 * '.unsqueeze(0)' again to form a single-batch input.
* Return `sample_input`



In [22]:
# CodeGrade step7

# Make a sample prediction
sample_idx = 0
sample_input = X2_test_t[sample_idx].unsqueeze(0)
sample_input


tensor([[ 0.9488, -0.9422, -0.1682, -0.3549,  2.6316,  2.6492,  0.4295,  0.7110,
          0.6536, -0.1472]])

In [23]:
# CodeGrade step0

model_reg.eval()
with torch.no_grad():
    sample_output = model_reg(sample_input).item()

### Step 8: Sample output

Return `y2_test_t[sample_idx]`

In [24]:
# CodeGrade step8

# Sample output
y2_test_t[sample_idx]

tensor([219.])

Return `sample_output`

In [25]:
# Sample output

sample_output

156.84930419921875