# Building a ANN using PyTorch

# Import Libraries

In [34]:
import pandas as pd
import numpy as np

In [35]:
from sklearn.model_selection import train_test_split

In [36]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

In [37]:
import kagglehub
import os

# Import Data

In [38]:
path = kagglehub.dataset_download("zalando-research/fashionmnist")
print("Path to dataset files:", path)

Using Colab cache for faster access to the 'fashionmnist' dataset.
Path to dataset files: /kaggle/input/fashionmnist


In [39]:
df = pd.read_csv(f'{path}/fashion-mnist_train.csv')
df.shape

(60000, 785)

In [40]:
df.head()

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,6,0,0,0,0,0,0,0,5,0,...,0,0,0,30,43,0,0,0,0,0
3,0,0,0,0,1,2,0,0,0,0,...,3,0,0,0,0,1,0,0,0,0
4,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


# Check `GPU` availability

In [41]:
device = 'cpu'
x = torch.rand(2, 3)

# Move the tensor to CUDA (if available)
if torch.cuda.is_available():
  device = torch.device("cuda")
  x_cuda = x.to(device)
  print(f"Tensor on: {x_cuda.device}") # Output: Tensor on: cuda:0

device

Tensor on: cuda:0


device(type='cuda')

# Extract Features and Labels

In [42]:
X = df.drop('label', axis=1)
X = X.values
y = df['label'].values

In [43]:
X[0][100:150]

array([136,  61,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,  88, 201, 228, 225, 255, 115,  62,
       137, 255, 235, 222, 255, 135,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,  47, 252, 234, 238, 224])

# Scale Data

In [44]:
X = X/255.0
X[0][100:150]

array([0.53333333, 0.23921569, 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.34509804,
       0.78823529, 0.89411765, 0.88235294, 1.        , 0.45098039,
       0.24313725, 0.5372549 , 1.        , 0.92156863, 0.87058824,
       1.        , 0.52941176, 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.18431373, 0.98823529, 0.91764706, 0.93333333, 0.87843137])

# Train Test Split

In [45]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=17)

X_train.shape, X_test.shape

((48000, 784), (12000, 784))

# `Dataset` Class

In [46]:
class CustomDataset(Dataset):
  def __init__(self, features, labels):
    self.features = torch.from_numpy(features).float()
    self.labels = torch.from_numpy(labels).long()

  def __len__(self):
    return len(self.labels)

  def __getitem__(self, index):

    return self.features[index], self.labels[index]


In [47]:
train_dataset = CustomDataset(features=X_train, labels=y_train)
test_dataset = CustomDataset(features=X_test, labels=y_test)

# DataLoader

In [48]:
train_loader = DataLoader(
    dataset = train_dataset,
    batch_size = 64,
    shuffle= True,
    pin_memory=True
  )

In [49]:
test_loader = DataLoader(
    dataset = test_dataset,
    batch_size = 64,
    shuffle= True,
    pin_memory=True
  )

# Custom Model

In [54]:
class MyANN(nn.Module):
  def __init__(self, num_features):

    super(MyANN, self).__init__()
    self.model = nn.Sequential(

        # First Neural Network
        nn.Linear(in_features=num_features, out_features=128),
        nn.BatchNorm1d(num_features=128),
        nn.ReLU(),
        nn.Dropout(p=0.4),

        # Second Neural Network
        nn.Linear(in_features=128, out_features=64),
        nn.BatchNorm1d(num_features=64),
        nn.ReLU(),
        nn.Dropout(p=0.2),


        # Output Neural Network
        nn.Linear(in_features=64, out_features=10)
    )

  def forward(self, features):
    return self.model(features)

## Set Learning Rate and Epochs

In [55]:
learning_rate = 0.01
epochs = 25

In [56]:
# Model Creation
model = MyANN(X_train.shape[1])

# model Device Changes
model = model.to(device)

# Initiate Loss Function
ce_loss = nn.CrossEntropyLoss()

# Optimizer
optimizer = torch.optim.SGD(params=model.parameters(), lr=learning_rate, weight_decay=1e-5)

## Training Pipeline

In [57]:
for epoch in range(epochs):
  losses = []

  model.train()

  for batch_features, batch_labels in  train_loader:

    # move data to gpu
    batch_features = batch_features.to(device)
    batch_labels = batch_labels.to(device)

    # forward pass
    y_pred = model(batch_features)

    # loss calculate
    loss = ce_loss(y_pred, batch_labels)

    # back pass
    optimizer.zero_grad()
    loss.backward()

    # update params
    optimizer.step()

    # store losses
    losses.append(loss.item())

  avg_loss = np.mean(losses)
  print(f'Epochs: {epoch+1}, Loss: {avg_loss}')

Epochs: 1, Loss: 0.9597236139376958
Epochs: 2, Loss: 0.6105171763499578
Epochs: 3, Loss: 0.5399086424708366
Epochs: 4, Loss: 0.5108404765923817
Epochs: 5, Loss: 0.48462627492348354
Epochs: 6, Loss: 0.46864605889717736
Epochs: 7, Loss: 0.4503606447776159
Epochs: 8, Loss: 0.43997386038303377
Epochs: 9, Loss: 0.43008324390649794
Epochs: 10, Loss: 0.421126886288325
Epochs: 11, Loss: 0.4084388159314791
Epochs: 12, Loss: 0.4068382988969485
Epochs: 13, Loss: 0.3972141807675362
Epochs: 14, Loss: 0.39543649142980575
Epochs: 15, Loss: 0.38912285035848615
Epochs: 16, Loss: 0.38238993054628373
Epochs: 17, Loss: 0.3755762405395508
Epochs: 18, Loss: 0.3746612903475761
Epochs: 19, Loss: 0.3680612505276998
Epochs: 20, Loss: 0.3664745630423228
Epochs: 21, Loss: 0.36364471447467805
Epochs: 22, Loss: 0.3586209842363993
Epochs: 23, Loss: 0.35487195912996927
Epochs: 24, Loss: 0.35177705629666645
Epochs: 25, Loss: 0.3452080920139948


# Evaluation

## Set `eval` Model (Crucial)

In [58]:
model.eval()

MyANN(
  (model): Sequential(
    (0): Linear(in_features=784, out_features=128, bias=True)
    (1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.4, inplace=False)
    (4): Linear(in_features=128, out_features=64, bias=True)
    (5): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU()
    (7): Dropout(p=0.2, inplace=False)
    (8): Linear(in_features=64, out_features=10, bias=True)
  )
)

## Eval Code

In [59]:
total_rows = 0
correct = 0

with torch.no_grad():
  for features, labels in test_loader:

      # move data to gpu
      features = features.to(device)
      labels = labels.to(device)

      # Output from network: [batch, 10]
      y_pred = model(features)

      # Predicted class index
      _, predict = torch.max(y_pred, 1)

      # Count correct predictions
      correct += (predict == labels).sum().item()
      total_rows += labels.size(0)

  avg_accuracy = correct / total_rows
  print(f'Average Accuracy: {avg_accuracy:.4f}')


Average Accuracy: 0.8828


In [60]:
total_rows = 0
correct = 0

with torch.no_grad():
  for features, labels in train_loader:

      # move data to gpu
      features = features.to(device)
      labels = labels.to(device)

      # Output from network: [batch, 10]
      y_pred = model(features)

      # Predicted class index
      _, predict = torch.max(y_pred, 1)

      # Count correct predictions
      correct += (predict == labels).sum().item()
      total_rows += labels.size(0)

  avg_accuracy = correct / total_rows
  print(f'Average Accuracy: {avg_accuracy:.4f}')

Average Accuracy: 0.9063
