<a href="https://colab.research.google.com/github/pradeepfckumar/ANN-Using-Pytorch---MNIST-Fashion-Model-Training/blob/main/ANN_Using_Pytorch_MNIST_Fashion_Model_Training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

This Google Colab notebook implements a basic neural network using PyTorch to classify images from the Fashion MNIST dataset. It covers the following steps:

Data Loading and Preprocessing: Reads the Fashion MNIST dataset using pandas, splits it into training and testing sets, and scales the pixel values.
Custom Dataset and DataLoader: Defines a custom PyTorch Dataset and DataLoader for efficient data handling during training.
Model Definition: Creates a simple feed-forward neural network (mynn) with Linear layers and ReLU activations.
Training Loop: Trains the model using CrossEntropyLoss as the loss function and SGD as the optimizer, iterating for a specified number of epochs.
Evaluation: Evaluates the trained model's accuracy on both the training and testing datasets.

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import Dataset, dataloader
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv(r'/content/fashion-mnist_train.csv')
df

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,6,0,0,0,0,0,0,0,5,0,...,0,0,0,30,43,0,0,0,0,0
3,0,0,0,0,1,2,0,0,0,0,...,3,0,0,0,0,1,0,0,0,0
4,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59995,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
59996,1,0,0,0,0,0,0,0,0,0,...,73,0,0,0,0,0,0,0,0,0
59997,8,0,0,0,0,0,0,0,0,0,...,160,162,163,135,94,0,0,0,0,0
59998,8,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
# Check for GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [None]:
df.head()

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,6,0,0,0,0,0,0,0,5,0,...,0,0,0,30,43,0,0,0,0,0
3,0,0,0,0,1,2,0,0,0,0,...,3,0,0,0,0,1,0,0,0,0
4,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
# train_test_split in values

x = df.drop('label', axis=1).values
y = df['label'].values

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size= 0.2, random_state=42)

In [None]:
# Scaling the features
x_train = x_train/255.0
x_test = x_test/255.0

In [None]:
# Creating Custom Dataset Class

class CustomDataset(Dataset):
  def __init__(self,features,labels):
    self.features = torch.tensor(features, dtype=torch.float32)
    self.labels = torch.tensor(labels, dtype=torch.long)

  def __len__(self):
    return self.features.shape[0]

  def __getitem__(self,index):
    return self.features[index],self.labels[index]

In [None]:
train_dataset = CustomDataset(x_train,y_train)

In [None]:
test_dataset = CustomDataset(x_test,y_test)

In [None]:
train_loader = dataloader.DataLoader(train_dataset,batch_size = 32, shuffle = True, pin_memory = True)
test_loader = dataloader.DataLoader(test_dataset,batch_size = 32, shuffle = False, pin_memory= True)

In [None]:
# Dropout technique is used for pausing some neurons to regularize the model
# Batch Normalisation is used for normalising input neurons to hidden layer
from torch.nn.modules.linear import Linear
class mynn(nn.Module):

  def __init__(self,num_features):

    super().__init__()
    self.model = nn.Sequential(
        nn.Linear(num_features,128),
        # nn.BatchNorm1d(128)
        nn.ReLU(),
        # nn.Dropout(p=0.3)
        nn.Linear(128,64),
        # nn.BatchNorm1d(64)
        nn.ReLU(),
        # nn.Dropout(p=0.3)
        nn.Linear(64,10)
    )

  def forward(self,x):
    return self.model(x)

In [None]:
learning_rate = 0.1
epoch = 100

In [None]:
# instantiate the model
model = mynn(x_train.shape[1])
model = model.to(device)

# loss function
criterion = nn.CrossEntropyLoss()

# optimizer
optimizer = optim.SGD(model.parameters(), lr = learning_rate )
# optimizer = optim.SGD(model.parameters(), lr = learning_rate, weight_decay = 1e-4 )



In [None]:
from torch._higher_order_ops.invoke_subgraph import OutputMetadata
# Training Loop

for epoch in range(epoch):

  total_epoch_loss = 0

  for batch_features, batch_labels in train_loader:

    # Move data to gpu
    batch_features = batch_features.to(device)
    batch_labels = batch_labels.to(device)

    # forward pass
    output = model(batch_features)

    # loss
    loss = criterion(output, batch_labels)

    #backward
    optimizer.zero_grad()
    loss.backward()

    #update weights
    optimizer.step()

    total_epoch_loss = total_epoch_loss + loss.item()

  avg_loss = total_epoch_loss/len(train_loader)
  print(f'Epoch: {epoch+1}, loss:{avg_loss}')

In [None]:
model.eval()

mynn(
  (model): Sequential(
    (0): Linear(in_features=784, out_features=128, bias=True)
    (1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.3, inplace=False)
    (4): Linear(in_features=128, out_features=64, bias=True)
    (5): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU()
    (7): Dropout(p=0.3, inplace=False)
    (8): Linear(in_features=64, out_features=10, bias=True)
  )
)

In [None]:
# Evaluation on Training Data

total = 0
correct = 0

with torch.no_grad():

  for batch_features, batch_labels in train_loader:

    #move data to gpu
    batch_features = batch_features.to(device)
    batch_labels = batch_labels.to(device)

    output = model(batch_features)

    _, predicted = torch.max(output.data,1)

    # The print statements are for debugging and can be removed after the fix is confirmed.
    # print(f"Shape of predicted: {predicted.shape}")
    # print(f"Shape of batch_labels: {batch_labels.shape}")

    total = total + batch_labels.shape[0]

    correct = correct + (predicted == batch_labels).sum().item()

  print(correct/total)

0.8657916666666666


In [None]:
# Evaluation on Testing Data
total = 0
correct = 0

with torch.no_grad():

  for batch_features, batch_labels in test_loader:

    #move data to gpu
    batch_features = batch_features.to(device)
    batch_labels = batch_labels.to(device)

    output = model(batch_features)

    _, predicted = torch.max(output.data,1)

    # The print statements are for debugging and can be removed after the fix is confirmed.
    # print(f"Shape of predicted: {predicted.shape}")
    # print(f"Shape of batch_labels: {batch_labels.shape}")

    total = total + batch_labels.shape[0]

    correct = correct + (predicted == batch_labels).sum().item()

  print(correct/total)

0.97025
