In [23]:
import pandas as pd

# Load the dataset
file_path = "heart.csv"
data = pd.read_csv(file_path)

# Display the first few rows of the dataset to understand its structure
data.head()





Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,52,1,0,125,212,0,1,168,0,1.0,2,2,3,0
1,53,1,0,140,203,1,0,155,1,3.1,0,0,3,0
2,70,1,0,145,174,0,1,125,1,2.6,0,0,3,0
3,61,1,0,148,203,0,1,161,0,0.0,2,1,3,0
4,62,0,0,138,294,1,1,106,0,1.9,1,3,2,0


In [24]:
# Check for missing values
missing_values = data.isnull().sum()

# Display missing values in the dataset
missing_values


age         0
sex         0
cp          0
trestbps    0
chol        0
fbs         0
restecg     0
thalach     0
exang       0
oldpeak     0
slope       0
ca          0
thal        0
target      0
dtype: int64

In [25]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Split the dataset into features (X) and target (y)
X = data.drop(columns=['target'])  # Features
y = data['target']  # Target label

# Standardize the features using StandardScaler
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training and testing sets (80-20 split)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42, stratify=y)

# Check the shapes of the resulting splits
X_train.shape, X_test.shape, y_train.shape, y_test.shape


((820, 13), (205, 13), (820,), (205,))

In [27]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

# Convert the data into PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.long)

# Create DataLoader for batching
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Define a function to create an MLP model dynamically
class MLPModel(nn.Module):
    def __init__(self, input_size, hidden_layers):
        super(MLPModel, self).__init__()
        layers = []
        current_size = input_size
        for neurons in hidden_layers:
            layers.append(nn.Linear(current_size, neurons))
            layers.append(nn.ReLU())  # Activation function
            current_size = neurons
        layers.append(nn.Linear(current_size, 2))  # Output layer (binary classification)
        self.model = nn.Sequential(*layers)
    
    def forward(self, x):
        return self.model(x)

# Function to train the model
def train_model(model, train_loader, criterion, optimizer, epochs=20):
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss/len(train_loader):.4f}")

# Function to evaluate the model
def evaluate_model(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            outputs = model(X_batch)
            _, predicted = torch.max(outputs, 1)
            total += y_batch.size(0)
            correct += (predicted == y_batch).sum().item()
    accuracy = correct / total
    print(f"Accuracy: {accuracy:.4f}")
    return accuracy

# Experiment with 1 hidden layer (16 neurons)
input_size = X_train.shape[1]
hidden_layers = [16]

mlp_model = MLPModel(input_size, hidden_layers)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(mlp_model.parameters(), lr=0.001)

print("Training MLP with 1 hidden layer (16 neurons)...")
train_model(mlp_model, train_loader, criterion, optimizer)
print("Evaluating MLP with 1 hidden layer (16 neurons)...")
accuracy_1_hidden = evaluate_model(mlp_model, test_loader)


Training MLP with 1 hidden layer (16 neurons)...
Epoch 1/20, Loss: 0.6907
Epoch 2/20, Loss: 0.6006
Epoch 3/20, Loss: 0.5343
Epoch 4/20, Loss: 0.4777
Epoch 5/20, Loss: 0.4341
Epoch 6/20, Loss: 0.4029
Epoch 7/20, Loss: 0.3838
Epoch 8/20, Loss: 0.3696
Epoch 9/20, Loss: 0.3605
Epoch 10/20, Loss: 0.3521
Epoch 11/20, Loss: 0.3456
Epoch 12/20, Loss: 0.3375
Epoch 13/20, Loss: 0.3341
Epoch 14/20, Loss: 0.3283
Epoch 15/20, Loss: 0.3263
Epoch 16/20, Loss: 0.3201
Epoch 17/20, Loss: 0.3195
Epoch 18/20, Loss: 0.3132
Epoch 19/20, Loss: 0.3132
Epoch 20/20, Loss: 0.3097
Evaluating MLP with 1 hidden layer (16 neurons)...
Accuracy: 0.8537


In [29]:
# Experiment with 2 hidden layers (16, 32 neurons)
hidden_layers_2 = [16, 32]

mlp_model_2 = MLPModel(input_size, hidden_layers_2)
optimizer_2 = optim.Adam(mlp_model_2.parameters(), lr=0.001)

print("\nTraining MLP with 2 hidden layers (16, 32 neurons)...")
train_model(mlp_model_2, train_loader, criterion, optimizer_2)
print("Evaluating MLP with 2 hidden layers (16, 32 neurons)...")
accuracy_2_hidden = evaluate_model(mlp_model_2, test_loader)

# Experiment with 3 hidden layers (16, 32, 64 neurons)
hidden_layers_3 = [16, 32, 64]

mlp_model_3 = MLPModel(input_size, hidden_layers_3)
optimizer_3 = optim.Adam(mlp_model_3.parameters(), lr=0.001)

print("\nTraining MLP with 3 hidden layers (16, 32, 64 neurons)...")
train_model(mlp_model_3, train_loader, criterion, optimizer_3)
print("Evaluating MLP with 3 hidden layers (16, 32, 64 neurons)...")
accuracy_3_hidden = evaluate_model(mlp_model_3, test_loader)

# Compare accuracies
accuracy_1_hidden, accuracy_2_hidden, accuracy_3_hidden



Training MLP with 2 hidden layers (16, 32 neurons)...
Epoch 1/20, Loss: 0.6722
Epoch 2/20, Loss: 0.6259
Epoch 3/20, Loss: 0.5509
Epoch 4/20, Loss: 0.4560
Epoch 5/20, Loss: 0.3760
Epoch 6/20, Loss: 0.3363
Epoch 7/20, Loss: 0.3166
Epoch 8/20, Loss: 0.3063
Epoch 9/20, Loss: 0.2944
Epoch 10/20, Loss: 0.2865
Epoch 11/20, Loss: 0.2817
Epoch 12/20, Loss: 0.2739
Epoch 13/20, Loss: 0.2679
Epoch 14/20, Loss: 0.2634
Epoch 15/20, Loss: 0.2571
Epoch 16/20, Loss: 0.2520
Epoch 17/20, Loss: 0.2472
Epoch 18/20, Loss: 0.2433
Epoch 19/20, Loss: 0.2335
Epoch 20/20, Loss: 0.2313
Evaluating MLP with 2 hidden layers (16, 32 neurons)...
Accuracy: 0.8732

Training MLP with 3 hidden layers (16, 32, 64 neurons)...
Epoch 1/20, Loss: 0.6512
Epoch 2/20, Loss: 0.5154
Epoch 3/20, Loss: 0.3914
Epoch 4/20, Loss: 0.3629
Epoch 5/20, Loss: 0.3339
Epoch 6/20, Loss: 0.3229
Epoch 7/20, Loss: 0.3091
Epoch 8/20, Loss: 0.2937
Epoch 9/20, Loss: 0.2771
Epoch 10/20, Loss: 0.2678
Epoch 11/20, Loss: 0.2496
Epoch 12/20, Loss: 0.2377

(0.8536585365853658, 0.8731707317073171, 0.9512195121951219)

LINEAR

In [31]:
# Modify the MLPModel class to allow dynamic activation functions
class MLPModelWithActivation(nn.Module):
    def __init__(self, input_size, hidden_layers, activation_fn):
        super(MLPModelWithActivation, self).__init__()
        layers = []
        current_size = input_size
        for neurons in hidden_layers:
            layers.append(nn.Linear(current_size, neurons))
            layers.append(activation_fn())  # Use the dynamic activation function
            current_size = neurons
        layers.append(nn.Linear(current_size, 2))  # Output layer (binary classification)
        self.model = nn.Sequential(*layers)
    
    def forward(self, x):
        return self.model(x)

# Define a function to compare activation functions
activation_functions = {
    "Linear": nn.Identity,  # Linear activation (Identity)
    "Sigmoid": nn.Sigmoid,
    "ReLU": nn.ReLU,
    "Softmax": lambda: nn.Softmax(dim=1),  # Softmax along the output dimension
    "Tanh": nn.Tanh,
}

results = {}

for name, activation_fn in activation_functions.items():
    print(f"\nTraining MLP with {name} activation function...")
    model = MLPModelWithActivation(input_size, [16, 32, 64], activation_fn)
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    
    train_model(model, train_loader, criterion, optimizer, epochs=20)
    print(f"Evaluating MLP with {name} activation function...")
    accuracy = evaluate_model(model, test_loader)
    results[name] = accuracy

# Display the results
results



Training MLP with Linear activation function...
Epoch 1/20, Loss: 0.5665
Epoch 2/20, Loss: 0.4077
Epoch 3/20, Loss: 0.3673
Epoch 4/20, Loss: 0.3619
Epoch 5/20, Loss: 0.3572
Epoch 6/20, Loss: 0.3618
Epoch 7/20, Loss: 0.3630
Epoch 8/20, Loss: 0.3581
Epoch 9/20, Loss: 0.3568
Epoch 10/20, Loss: 0.3573
Epoch 11/20, Loss: 0.3585
Epoch 12/20, Loss: 0.3578
Epoch 13/20, Loss: 0.3565
Epoch 14/20, Loss: 0.3584
Epoch 15/20, Loss: 0.3577
Epoch 16/20, Loss: 0.3575
Epoch 17/20, Loss: 0.3539
Epoch 18/20, Loss: 0.3580
Epoch 19/20, Loss: 0.3584
Epoch 20/20, Loss: 0.3555
Evaluating MLP with Linear activation function...
Accuracy: 0.8098

Training MLP with Sigmoid activation function...
Epoch 1/20, Loss: 0.6969
Epoch 2/20, Loss: 0.6915
Epoch 3/20, Loss: 0.6903
Epoch 4/20, Loss: 0.6852
Epoch 5/20, Loss: 0.6724
Epoch 6/20, Loss: 0.6477
Epoch 7/20, Loss: 0.5884
Epoch 8/20, Loss: 0.5022
Epoch 9/20, Loss: 0.4269
Epoch 10/20, Loss: 0.3887
Epoch 11/20, Loss: 0.3746
Epoch 12/20, Loss: 0.3673
Epoch 13/20, Loss: 0

{'Linear': 0.8097560975609757,
 'Sigmoid': 0.8146341463414634,
 'ReLU': 0.9463414634146341,
 'Softmax': 0.824390243902439,
 'Tanh': 0.8780487804878049}

SIgmoid

In [32]:
# Redefine the necessary imports and classes to ensure the environment is ready
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

# Redefine the class to allow dynamic activation functions
class MLPModelWithActivation(nn.Module):
    def __init__(self, input_size, hidden_layers, activation_fn):
        super(MLPModelWithActivation, self).__init__()
        layers = []
        current_size = input_size
        for neurons in hidden_layers:
            layers.append(nn.Linear(current_size, neurons))
            layers.append(activation_fn())  # Use the dynamic activation function
            current_size = neurons
        layers.append(nn.Linear(current_size, 2))  # Output layer (binary classification)
        self.model = nn.Sequential(*layers)
    
    def forward(self, x):
        return self.model(x)

# Redefine the training and evaluation functions
def train_model(model, train_loader, criterion, optimizer, epochs=20):
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss/len(train_loader):.4f}")

def evaluate_model(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            outputs = model(X_batch)
            _, predicted = torch.max(outputs, 1)
            total += y_batch.size(0)
            correct += (predicted == y_batch).sum().item()
    accuracy = correct / total
    print(f"Accuracy: {accuracy:.4f}")
    return accuracy

# Define activation functions
activation_functions = {
    "Linear": nn.Identity,  # Linear activation (Identity)
    "Sigmoid": nn.Sigmoid,
    "ReLU": nn.ReLU,
    "Softmax": lambda: nn.Softmax(dim=1),  # Softmax along the output dimension
    "Tanh": nn.Tanh,
}

# Prepare to store results
results = {}

# Input size and hidden layers configuration
input_size = X_train.shape[1]
hidden_layers_config = [16, 32, 64]

# Loop through activation functions and train/evaluate models
for name, activation_fn in activation_functions.items():
    print(f"\nTraining MLP with {name} activation function...")
    model = MLPModelWithActivation(input_size, hidden_layers_config, activation_fn)
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    
    train_model(model, train_loader, nn.CrossEntropyLoss(), optimizer, epochs=20)
    print(f"Evaluating MLP with {name} activation function...")
    accuracy = evaluate_model(model, test_loader)
    results[name] = accuracy

# Display the results
results



Training MLP with Linear activation function...
Epoch 1/20, Loss: 0.5676
Epoch 2/20, Loss: 0.3865
Epoch 3/20, Loss: 0.3676
Epoch 4/20, Loss: 0.3612
Epoch 5/20, Loss: 0.3595
Epoch 6/20, Loss: 0.3570
Epoch 7/20, Loss: 0.3662
Epoch 8/20, Loss: 0.3584
Epoch 9/20, Loss: 0.3547
Epoch 10/20, Loss: 0.3564
Epoch 11/20, Loss: 0.3543
Epoch 12/20, Loss: 0.3593
Epoch 13/20, Loss: 0.3593
Epoch 14/20, Loss: 0.3583
Epoch 15/20, Loss: 0.3617
Epoch 16/20, Loss: 0.3558
Epoch 17/20, Loss: 0.3557
Epoch 18/20, Loss: 0.3567
Epoch 19/20, Loss: 0.3546
Epoch 20/20, Loss: 0.3579
Evaluating MLP with Linear activation function...
Accuracy: 0.8244

Training MLP with Sigmoid activation function...
Epoch 1/20, Loss: 0.7015
Epoch 2/20, Loss: 0.6934
Epoch 3/20, Loss: 0.6884
Epoch 4/20, Loss: 0.6798
Epoch 5/20, Loss: 0.6640
Epoch 6/20, Loss: 0.6264
Epoch 7/20, Loss: 0.5590
Epoch 8/20, Loss: 0.4727
Epoch 9/20, Loss: 0.4094
Epoch 10/20, Loss: 0.3835
Epoch 11/20, Loss: 0.3699
Epoch 12/20, Loss: 0.3639
Epoch 13/20, Loss: 0

{'Linear': 0.824390243902439,
 'Sigmoid': 0.8634146341463415,
 'ReLU': 0.9512195121951219,
 'Softmax': 0.8292682926829268,
 'Tanh': 0.8682926829268293}

ReLU

EPOCH

In [33]:
# Define the epochs to test
epoch_values = [1, 10, 25, 50, 100, 250]
epoch_results = {}

# Use the same model configuration with ReLU activation
for epochs in epoch_values:
    print(f"\nTraining MLP with ReLU activation for {epochs} epochs...")
    model = MLPModelWithActivation(input_size, [16, 32, 64], nn.ReLU)
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    
    # Train the model
    train_model(model, train_loader, nn.CrossEntropyLoss(), optimizer, epochs=epochs)
    print(f"Evaluating MLP with ReLU activation for {epochs} epochs...")
    
    # Evaluate and store the accuracy
    accuracy = evaluate_model(model, test_loader)
    epoch_results[epochs] = accuracy

# Display the results
epoch_results



Training MLP with ReLU activation for 1 epochs...
Epoch 1/1, Loss: 0.6749
Evaluating MLP with ReLU activation for 1 epochs...
Accuracy: 0.7024

Training MLP with ReLU activation for 10 epochs...
Epoch 1/10, Loss: 0.6716
Epoch 2/10, Loss: 0.5805
Epoch 3/10, Loss: 0.4351
Epoch 4/10, Loss: 0.3699
Epoch 5/10, Loss: 0.3438
Epoch 6/10, Loss: 0.3256
Epoch 7/10, Loss: 0.3089
Epoch 8/10, Loss: 0.2952
Epoch 9/10, Loss: 0.2810
Epoch 10/10, Loss: 0.2689
Evaluating MLP with ReLU activation for 10 epochs...
Accuracy: 0.8927

Training MLP with ReLU activation for 25 epochs...
Epoch 1/25, Loss: 0.6799
Epoch 2/25, Loss: 0.6011
Epoch 3/25, Loss: 0.4556
Epoch 4/25, Loss: 0.3725
Epoch 5/25, Loss: 0.3345
Epoch 6/25, Loss: 0.3127
Epoch 7/25, Loss: 0.2954
Epoch 8/25, Loss: 0.2800
Epoch 9/25, Loss: 0.2656
Epoch 10/25, Loss: 0.2508
Epoch 11/25, Loss: 0.2333
Epoch 12/25, Loss: 0.2183
Epoch 13/25, Loss: 0.2063
Epoch 14/25, Loss: 0.1882
Epoch 15/25, Loss: 0.1726
Epoch 16/25, Loss: 0.1554
Epoch 17/25, Loss: 0.141

{1: 0.7024390243902439,
 10: 0.8926829268292683,
 25: 0.9804878048780488,
 50: 1.0,
 100: 1.0,
 250: 1.0}

sds

In [34]:
# Redefine the MLP model with dynamic activation functions
class MLPModelWithActivation(nn.Module):
    def __init__(self, input_size, hidden_layers, activation_fn):
        super(MLPModelWithActivation, self).__init__()
        layers = []
        current_size = input_size
        for neurons in hidden_layers:
            layers.append(nn.Linear(current_size, neurons))
            layers.append(activation_fn())  # Use the dynamic activation function
            current_size = neurons
        layers.append(nn.Linear(current_size, 2))  # Output layer (binary classification)
        self.model = nn.Sequential(*layers)
    
    def forward(self, x):
        return self.model(x)

# Redefine the training and evaluation functions
def train_model(model, train_loader, criterion, optimizer, epochs=20):
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss/len(train_loader):.4f}")

def evaluate_model(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            outputs = model(X_batch)
            _, predicted = torch.max(outputs, 1)
            total += y_batch.size(0)
            correct += (predicted == y_batch).sum().item()
    accuracy = correct / total
    print(f"Accuracy: {accuracy:.4f}")
    return accuracy

# Experiment with different epoch values
epoch_values = [1, 10, 25, 50, 100, 250]
epoch_results = {}

# Train and evaluate for each epoch value
for epochs in epoch_values:
    print(f"\nTraining MLP with ReLU activation for {epochs} epochs...")
    model = MLPModelWithActivation(input_size, [16, 32, 64], nn.ReLU)
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    
    # Train the model
    train_model(model, train_loader, nn.CrossEntropyLoss(), optimizer, epochs=epochs)
    print(f"Evaluating MLP with ReLU activation for {epochs} epochs...")
    
    # Evaluate and store the accuracy
    accuracy = evaluate_model(model, test_loader)
    epoch_results[epochs] = accuracy

# Display the results
epoch_results



Training MLP with ReLU activation for 1 epochs...
Epoch 1/1, Loss: 0.6656
Evaluating MLP with ReLU activation for 1 epochs...
Accuracy: 0.7902

Training MLP with ReLU activation for 10 epochs...
Epoch 1/10, Loss: 0.6712
Epoch 2/10, Loss: 0.5532
Epoch 3/10, Loss: 0.3847
Epoch 4/10, Loss: 0.3421
Epoch 5/10, Loss: 0.3221
Epoch 6/10, Loss: 0.2999
Epoch 7/10, Loss: 0.2900
Epoch 8/10, Loss: 0.2783
Epoch 9/10, Loss: 0.2636
Epoch 10/10, Loss: 0.2496
Evaluating MLP with ReLU activation for 10 epochs...
Accuracy: 0.8683

Training MLP with ReLU activation for 25 epochs...
Epoch 1/25, Loss: 0.6742
Epoch 2/25, Loss: 0.5469
Epoch 3/25, Loss: 0.4020
Epoch 4/25, Loss: 0.3656
Epoch 5/25, Loss: 0.3418
Epoch 6/25, Loss: 0.3273
Epoch 7/25, Loss: 0.3183
Epoch 8/25, Loss: 0.3088
Epoch 9/25, Loss: 0.2966
Epoch 10/25, Loss: 0.2854
Epoch 11/25, Loss: 0.2761
Epoch 12/25, Loss: 0.2659
Epoch 13/25, Loss: 0.2566
Epoch 14/25, Loss: 0.2416
Epoch 15/25, Loss: 0.2266
Epoch 16/25, Loss: 0.2154
Epoch 17/25, Loss: 0.212

{1: 0.7902439024390244,
 10: 0.8682926829268293,
 25: 0.9512195121951219,
 50: 1.0,
 100: 1.0,
 250: 1.0}

learning rate

In [35]:
# Define the learning rates to test
learning_rates = [10, 1, 0.1, 0.01, 0.001, 0.0001]
lr_results = {}

# Train and evaluate for each learning rate
for lr in learning_rates:
    print(f"\nTraining MLP with learning rate {lr}...")
    model = MLPModelWithActivation(input_size, [16, 32, 64], nn.ReLU)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    
    # Train the model
    train_model(model, train_loader, nn.CrossEntropyLoss(), optimizer, epochs=20)
    print(f"Evaluating MLP with learning rate {lr}...")
    
    # Evaluate and store the accuracy
    accuracy = evaluate_model(model, test_loader)
    lr_results[lr] = accuracy

# Display the results
lr_results



Training MLP with learning rate 10...
Epoch 1/20, Loss: 5238958.3678
Epoch 2/20, Loss: 43164.0615
Epoch 3/20, Loss: 0.7745
Epoch 4/20, Loss: 0.8194
Epoch 5/20, Loss: 0.7642
Epoch 6/20, Loss: 0.7280
Epoch 7/20, Loss: 0.8356
Epoch 8/20, Loss: 1.0800
Epoch 9/20, Loss: 0.9173
Epoch 10/20, Loss: 0.7979
Epoch 11/20, Loss: 0.8378
Epoch 12/20, Loss: 0.8428
Epoch 13/20, Loss: 0.8440
Epoch 14/20, Loss: 0.8834
Epoch 15/20, Loss: 1.1487
Epoch 16/20, Loss: 0.7685
Epoch 17/20, Loss: 0.8320
Epoch 18/20, Loss: 0.9086
Epoch 19/20, Loss: 0.8524
Epoch 20/20, Loss: 0.8273
Evaluating MLP with learning rate 10...
Accuracy: 0.5171

Training MLP with learning rate 1...
Epoch 1/20, Loss: 637.1342
Epoch 2/20, Loss: 16.9691
Epoch 3/20, Loss: 1.3581
Epoch 4/20, Loss: 1.0291
Epoch 5/20, Loss: 0.7654
Epoch 6/20, Loss: 0.6814
Epoch 7/20, Loss: 0.6639
Epoch 8/20, Loss: 35.0437
Epoch 9/20, Loss: 2.4257
Epoch 10/20, Loss: 0.7069
Epoch 11/20, Loss: 0.6973
Epoch 12/20, Loss: 0.6932
Epoch 13/20, Loss: 0.7388
Epoch 14/20,

{10: 0.5170731707317073,
 1: 0.6097560975609756,
 0.1: 0.8878048780487805,
 0.01: 0.9902439024390244,
 0.001: 0.9170731707317074,
 0.0001: 0.8195121951219512}

Batch Size

In [37]:
# Define batch sizes to test
batch_sizes = [16, 32, 64, 128, 256, 512]
batch_results = {}

# Train and evaluate for each batch size
for batch_size in batch_sizes:
    print(f"\nTraining MLP with batch size {batch_size}...")
    
    # Create new DataLoader with the current batch size
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    
    # Define the model, optimizer, and loss function
    model = MLPModelWithActivation(input_size, [16, 32, 64], nn.ReLU)
    optimizer = optim.Adam(model.parameters(), lr=0.01)
    
    # Train the model
    train_model(model, train_loader, nn.CrossEntropyLoss(), optimizer, epochs=20)
    print(f"Evaluating MLP with batch size {batch_size}...")
    
    # Evaluate and store the accuracy
    accuracy = evaluate_model(model, test_loader)
    batch_results[batch_size] = accuracy

# Display the results
batch_results



Training MLP with batch size 16...
Epoch 1/20, Loss: 0.4724
Epoch 2/20, Loss: 0.3146
Epoch 3/20, Loss: 0.2604
Epoch 4/20, Loss: 0.2278
Epoch 5/20, Loss: 0.1814
Epoch 6/20, Loss: 0.1671
Epoch 7/20, Loss: 0.1339
Epoch 8/20, Loss: 0.0827
Epoch 9/20, Loss: 0.1987
Epoch 10/20, Loss: 0.0871
Epoch 11/20, Loss: 0.0469
Epoch 12/20, Loss: 0.1111
Epoch 13/20, Loss: 0.0918
Epoch 14/20, Loss: 0.0455
Epoch 15/20, Loss: 0.0515
Epoch 16/20, Loss: 0.0630
Epoch 17/20, Loss: 0.0498
Epoch 18/20, Loss: 0.0538
Epoch 19/20, Loss: 0.0387
Epoch 20/20, Loss: 0.0341
Evaluating MLP with batch size 16...
Accuracy: 0.9756

Training MLP with batch size 32...
Epoch 1/20, Loss: 0.4633
Epoch 2/20, Loss: 0.3346
Epoch 3/20, Loss: 0.2800
Epoch 4/20, Loss: 0.2233
Epoch 5/20, Loss: 0.2104
Epoch 6/20, Loss: 0.1895
Epoch 7/20, Loss: 0.1376
Epoch 8/20, Loss: 0.0849
Epoch 9/20, Loss: 0.1118
Epoch 10/20, Loss: 0.1382
Epoch 11/20, Loss: 0.0645
Epoch 12/20, Loss: 0.0431
Epoch 13/20, Loss: 0.0296
Epoch 14/20, Loss: 0.0170
Epoch 15

{16: 0.975609756097561,
 32: 1.0,
 64: 1.0,
 128: 1.0,
 256: 0.9658536585365853,
 512: 0.9170731707317074}