# Project 4: Introduction to Deep Learning
## Name - Mahvash Maghrabi

 ## Task 2 - Experiment with Network Variations

In [212]:
# Importing necessary libraries
import torch
import torchvision
import numpy as np
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
from torch import nn, optim
from torch.utils.data import DataLoader, random_split
from torchvision.transforms import transforms
import copy
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
from sklearn.metrics import accuracy_score

In [181]:
# Loading the MNIST Fashion dataset
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
trainset = torchvision.datasets.FashionMNIST(root='/Users/mahvashmaghrabi/Desktop/CS6140/MNISTFashion', train=True, download=True, transform=transform)
testset = torchvision.datasets.FashionMNIST(root='/Users/mahvashmaghrabi/Desktop/CS6140/MNISTFashion', train=False, download=True, transform=transform)


In [182]:
# Building the network
# NetEstimator class extends Net class which is a neural network model
class NetEstimator(Net, BaseEstimator):
    def __init__(self, n_conv_layers, conv_filter_size, n_conv_filters, dense_nodes, dropout_rate, pool_filter_size, activation_func, batch_size=32, epochs=10, learning_rate=0.001):
        super(NetEstimator, self).__init__(n_conv_layers, conv_filter_size, n_conv_filters, dense_nodes, dropout_rate, pool_filter_size, activation_func)
        self.batch_size = batch_size
        self.epochs = epochs
        self.learning_rate = learning_rate
# This function trains the neural network using Adam optimizer and cross-entropy loss function    
    def fit(self, X, y=None):
        optimizer = optim.Adam(self.parameters(), lr=self.learning_rate)
        lossfunction = nn.CrossEntropyLoss()
        train_loader = DataLoader(X, batch_size=self.batch_size, shuffle=True)
        
        for epoch in range(self.epochs):
            for i, data in enumerate(train_loader, 0):
                inputs, labels = data
                optimizer.zero_grad()
                outputs = self(inputs)
                loss = lossfunction(outputs, labels)
                loss.backward()
                optimizer.step()
                
# This function makes predictions on the input data using the trained model                                
    def predict(self, X):
        test_loader = DataLoader(X, batch_size=self.batch_size, shuffle=False)
        with torch.no_grad():
            predictions = []
            for inputs in test_loader:
                outputs = self(inputs)
                _, predicted = torch.max(outputs.data, 1)
                predictions.append(predicted.numpy())
            predictions = np.concatenate(predictions)
        return predictions

# This function calculates the accuracy of the model predictions
    def score(self, X, y):
        y_pred = self.predict(X)
        return accuracy_score(y, y_pred)

# This function returns a dictionary of the hyperparameters     
    def get_params(self, deep=True):
        return {
            'n_conv_layers': self.n_conv_layers,
            'conv_filter_size': self.conv_filter_size,
            'n_conv_filters': self.n_conv_filters,
            'dense_nodes': self.dense_nodes,
            'dropout_rate': self.dropout_rate,
            'pool_filter_size': self.pool_filter_size,
            'activation_func': self.activation_func,
            'batch_size': self.batch_size,
            'epochs': self.epochs,
            'learning_rate': self.learning_rate
        }


In [183]:
# Defining the parameter grid for hyperparameter tuning of GridSearch CV
param_grid = {
    'n_conv_layers': [2, 3],
    'conv_filter_size': [3, 5],
    'n_conv_filters': [16, 32],
    'dense_nodes': [128, 256],
    'dropout_rate': [0.1, 0.3],
    'pool_filter_size': [2],
    'activation_func': ['relu', 'sigmoid', 'tanh']
}

In [184]:
# Defining the cross-validation value
cv = 3

In [185]:
# Defining the data loaders for the original dataset
train_loader = DataLoader(trainset, batch_size=100, shuffle=True)
test_loader = DataLoader(testset, batch_size=100, shuffle=True)

# Splitting the training set into training sets and validation sets
train_size = int(0.8 * len(trainset))
val_size = len(trainset) - train_size
trainset, val_dataset = random_split(trainset, [train_size, val_size])

# Defining the data loaders for both the training and validation sets
train_loader = DataLoader(trainset, batch_size=100, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=100, shuffle=True)

In [186]:
# Defining the training function
def train(model, train_loader, val_loader, criterion, optimizer, epochs):
    train_losses = []
    val_losses = []
    for epoch in range(epochs):
        train_loss = 0.0
        val_loss = 0.0
        model.train()
        for data, target in train_loader:
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            train_loss += loss.item() * data.size(0)
        
        model.eval()
        for data, target in val_loader:
            output = model(data)
            loss = criterion(output, target)
            val_loss += loss.item() * data.size(0)
        
        train_loss = train_loss / len(train_loader.dataset)
        val_loss = val_loss / len(val_loader.dataset)
        
        train_losses.append(train_loss)
        val_losses.append(val_loss)
        
        print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(epoch+1, train_loss, val_loss))
    
    return train_losses, val_losses

In [187]:
# Defining the test function
def test(model, test_loader):
    model.eval()
    test_acc = 0.0
    for data, target in test_loader:
        output = model(data)
        _, pred = torch.max(output, 1)
        test_acc += accuracy_score(target.numpy(), pred.numpy(), normalize=False)
    
    test_acc = test_acc / len(test_loader.dataset)
    print('Test Accuracy: {:.6f}%'.format(100*test_acc))
    
    return test_acc


In [188]:
# Creating the neural network model
model = Net(n_conv_layers=2, conv_filter_size=3, n_conv_filters=16, dense_nodes=128, dropout_rate=0.1, pool_filter_size=2, activation_func='relu')

In [189]:
# Defining the loss function and the optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())

In [190]:
# Training the model
train_losses, val_losses = train(model, train_loader, val_loader, criterion, optimizer, epochs=10)

Epoch: 1 	Training Loss: 0.637946 	Validation Loss: 0.445476
Epoch: 2 	Training Loss: 0.411614 	Validation Loss: 0.380074
Epoch: 3 	Training Loss: 0.358836 	Validation Loss: 0.346245
Epoch: 4 	Training Loss: 0.329728 	Validation Loss: 0.348336
Epoch: 5 	Training Loss: 0.305989 	Validation Loss: 0.319920
Epoch: 6 	Training Loss: 0.286590 	Validation Loss: 0.313371
Epoch: 7 	Training Loss: 0.275302 	Validation Loss: 0.296243
Epoch: 8 	Training Loss: 0.261209 	Validation Loss: 0.296186
Epoch: 9 	Training Loss: 0.251139 	Validation Loss: 0.290870
Epoch: 10 	Training Loss: 0.238688 	Validation Loss: 0.288106


In [191]:
# Testing the model accuracy
test_acc = test(model, test_loader)
#print('Test Accuracy: {:.6f}'.format(test_acc))

Test Accuracy: 89.810000%


In [192]:
# Defining GridSearchCV
grid_search = GridSearchCV(estimator=NetEstimator(n_conv_layers, conv_filter_size, n_conv_filters, dense_nodes, dropout_rate, pool_filter_size, activation_func),
                           param_grid=param_grid,
                           scoring='accuracy',
                           cv=cv,
                           n_jobs=-1)


In [193]:
# Fitting the Grid Search CV
grid_search.fit(trainset)

288 fits failed out of a total of 288.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
72 fits failed with the following error:
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/model_selection/_validation.py", line 678, in _fit_and_score
    estimator.fit(X_train, **fit_params)
  File "/var/folders/qf/z5th9xgd4vsfl5hf0qbyntdr0000gn/T/ipykernel_4425/1980059986.py", line 17, in fit
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
    return forward_call(*input, **kwargs)
  File "/var/folders/qf/z5th9xgd4vsfl5hf0qbyntdr0000gn/T/ipykernel_4425/429576448.py", line 36, in for

GridSearchCV(cv=3,
             estimator=NetEstimator(activation_func='relu', conv_filter_size=3, dense_nodes=64, dropout_rate=0.1, n_conv_filters=64, n_conv_layers=3, pool_filter_size=3),
             n_jobs=-1,
             param_grid={'activation_func': ['relu', 'sigmoid', 'tanh'],
                         'conv_filter_size': [3, 5], 'dense_nodes': [128, 256],
                         'dropout_rate': [0.1, 0.3], 'n_conv_filters': [16, 32],
                         'n_conv_layers': [2, 3], 'pool_filter_size': [2]},
             scoring='accuracy')

In [195]:
# Printing the best hyperparameters
print("Best hyperparameters: ", grid_search.best_params_)

Best hyperparameters:  {'activation_func': 'relu', 'conv_filter_size': 3, 'dense_nodes': 128, 'dropout_rate': 0.1, 'n_conv_filters': 16, 'n_conv_layers': 2, 'pool_filter_size': 2}


In [210]:
# Training the model using the best hyperparameter values from Grid Search CV
# Defining the neural network
model = Net(n_conv_layers=2, conv_filter_size=3, n_conv_filters=16,
            pool_filter_size=2, dense_nodes=128, dropout_rate=0.1,
            activation_func='relu')

# Defining the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training the network with number of epochs = 20
num_epochs = 20
best_val_acc = 0.0
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for i, (inputs, labels) in enumerate(train_loader):
        optimizer.zero_grad()
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    train_loss = running_loss / len(train_loader)
    train_acc = 100 * correct / total

# Evaluating the performance
    val_loss, val_acc = test(model, val_loader)


# Saving the best model based on accuracy
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        best_model = copy.deepcopy(model.state_dict())

    print(f"Epoch [{epoch+1}/{num_epochs}] Train Loss: {train_loss:.4f} "
          f"Train Acc: {train_acc:.2f}% Val Loss: {val_loss:.4f} Val Acc: {val_acc:.2f}%")

Epoch [1/20] Train Loss: 0.6557 Train Acc: 76.00% Val Loss: 0.4652 Val Acc: 82.99%
Epoch [2/20] Train Loss: 0.4204 Train Acc: 84.59% Val Loss: 0.3890 Val Acc: 85.70%
Epoch [3/20] Train Loss: 0.3634 Train Acc: 86.75% Val Loss: 0.3506 Val Acc: 86.86%
Epoch [4/20] Train Loss: 0.3283 Train Acc: 87.96% Val Loss: 0.3295 Val Acc: 87.58%
Epoch [5/20] Train Loss: 0.3035 Train Acc: 88.90% Val Loss: 0.3403 Val Acc: 87.07%
Epoch [6/20] Train Loss: 0.2864 Train Acc: 89.54% Val Loss: 0.3173 Val Acc: 88.08%
Epoch [7/20] Train Loss: 0.2697 Train Acc: 90.00% Val Loss: 0.3100 Val Acc: 88.56%
Epoch [8/20] Train Loss: 0.2617 Train Acc: 90.42% Val Loss: 0.3000 Val Acc: 88.78%
Epoch [9/20] Train Loss: 0.2487 Train Acc: 90.75% Val Loss: 0.2884 Val Acc: 89.28%
Epoch [10/20] Train Loss: 0.2358 Train Acc: 91.32% Val Loss: 0.2885 Val Acc: 89.60%
Epoch [11/20] Train Loss: 0.2279 Train Acc: 91.62% Val Loss: 0.2894 Val Acc: 89.33%
Epoch [12/20] Train Loss: 0.2192 Train Acc: 91.82% Val Loss: 0.2977 Val Acc: 89.28%
E

In [213]:
#  Printing the Test Accuracy
model.load_state_dict(best_model)
test_loss, test_acc = test(model, val_loader)
print(f"Test Loss: {test_loss:.4f} Test Accuracy: {test_acc:.2f}%")

Test Loss: 0.2838 Test Accuracy: 90.19%
