<a href="https://colab.research.google.com/github/quinyang/svhn_dl/blob/yassine/notebooks/cnn_y.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Dependnecies and setting up**
   - Make sure you read comments to know which ones to run

In [1]:
## run only in colab after cloning repo to be able to access other files easily
import sys
import os
GIT_PATH = '/content/svhn_dl'
CODE_PATH = '/content/svhn_dl/src'

if GIT_PATH not in sys.path:
  sys.path.append(GIT_PATH)

if CODE_PATH not in sys.path:
    sys.path.append(CODE_PATH)


In [2]:
# DO NOT RUN THIS CELL IF RUNNING LOCALLY
# run to install dependencide IN COLAB if running locally use next cell
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
!pip install numpy matplotlib scipy scikit-learn jupyter

Looking in indexes: https://download.pytorch.org/whl/cu121
Collecting jupyter
  Downloading jupyter-1.1.1-py2.py3-none-any.whl.metadata (2.0 kB)
Collecting jupyterlab (from jupyter)
  Downloading jupyterlab-4.5.0-py3-none-any.whl.metadata (16 kB)
Collecting async-lru>=1.0.0 (from jupyterlab->jupyter)
  Downloading async_lru-2.0.5-py3-none-any.whl.metadata (4.5 kB)
Collecting jupyter-lsp>=2.0.0 (from jupyterlab->jupyter)
  Downloading jupyter_lsp-2.3.0-py3-none-any.whl.metadata (1.8 kB)
Collecting jupyterlab-server<3,>=2.28.0 (from jupyterlab->jupyter)
  Downloading jupyterlab_server-2.28.0-py3-none-any.whl.metadata (5.9 kB)
Collecting jedi>=0.16 (from ipython>=7.23.1->ipykernel->jupyter)
  Downloading jedi-0.19.2-py2.py3-none-any.whl.metadata (22 kB)
Collecting json5>=0.9.0 (from jupyterlab-server<3,>=2.28.0->jupyterlab->jupyter)
  Downloading json5-0.12.1-py3-none-any.whl.metadata (36 kB)
Downloading jupyter-1.1.1-py2.py3-none-any.whl (2.7 kB)
Downloading jupyterlab-4.5.0-py3-none-any

In [3]:
#progress bars
!pip install tqdm



In [4]:
#!pip install -r ./requirements.txt

In [5]:
#run to download dataset unless already downloaded locally
# must have already cloned repo to colab workspace
!cd svhn_dl
!mkdir -p data
!cd data && wget http://ufldl.stanford.edu/housenumbers/train_32x32.mat
!cd data && wget http://ufldl.stanford.edu/housenumbers/test_32x32.mat

--2025-12-13 06:04:39--  http://ufldl.stanford.edu/housenumbers/train_32x32.mat
Resolving ufldl.stanford.edu (ufldl.stanford.edu)... 171.64.68.10
Connecting to ufldl.stanford.edu (ufldl.stanford.edu)|171.64.68.10|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 182040794 (174M) [text/plain]
Saving to: ‘train_32x32.mat’


2025-12-13 06:04:47 (22.9 MB/s) - ‘train_32x32.mat’ saved [182040794/182040794]

--2025-12-13 06:04:47--  http://ufldl.stanford.edu/housenumbers/test_32x32.mat
Resolving ufldl.stanford.edu (ufldl.stanford.edu)... 171.64.68.10
Connecting to ufldl.stanford.edu (ufldl.stanford.edu)|171.64.68.10|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 64275384 (61M) [text/plain]
Saving to: ‘test_32x32.mat’


2025-12-13 06:04:53 (11.5 MB/s) - ‘test_32x32.mat’ saved [64275384/64275384]



In [6]:
# set path to data directory
DATA_PATH = './data'

In [7]:
# imports

import torch
import torch.nn as nn
from tqdm import tqdm
from data_loader import load_svhn_data
#import sys
#import os

In [8]:
# setting up initial CNN for testing

class SimpleCNN(nn.Module):

  def __init__(self):
    super().__init__()
    self.conv1 = nn.Conv2d(3,32, kernel_size=5)
    self.conv2 = nn.Conv2d(32, 64, kernel_size=5)
    self.pool = nn.MaxPool2d(2,2)
    self.fc1 = nn.Linear(64*5*5, 128)
    self.fc2 = nn.Linear(128, 10)
    self.relu = nn.ReLU()

  def forward(self, x):
    x = self.pool(self.relu(self.conv1(x)))
    x = self.pool(self.relu(self.conv2(x)))
    x =  x.view(x.size(0), -1)
    x = self.relu(self.fc1(x))
    x = self.fc2(x)
    return x


In [9]:
# loading data for training

X, y = load_svhn_data(DATA_PATH)
X = torch.from_numpy(X).permute(0, 3, 1, 2)

print(y)

Loading train data from ./data/train_32x32.mat...
Loaded 73257 samples.
X shape: (73257, 32, 32, 3)
y shape: (73257,)
[1 9 2 ... 1 6 9]


In [10]:
### Simple CNN evaluation on training set

class Dataset(torch.utils.data.Dataset):
  def __init__(self, X, y):
      self.X = torch.FloatTensor(X)
      self.y = torch.LongTensor(y)

  def __len__(self):
      return len(self.X)

  def __getitem__(self, idx):
      return self.X[idx], self.y[idx]


dataset = Dataset(X, y)
train, val = torch.utils.data.random_split(
    dataset, [int(0.8*len(dataset)), len(dataset) - int(0.8*len(dataset))])

#batch size 64 since we had alot of data to work with
train_loader = torch.utils.data.DataLoader(train, batch_size=64, shuffle=True)
val_loader = torch.utils.data.DataLoader(val, batch_size=64, shuffle=False)

model = SimpleCNN()
loss_fn = nn.CrossEntropyLoss()
#0.001 lr for now
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# 10 epochs fair number since fairly large dataset


for epoch in range(10):
  #training
  model.train()

  total_loss = 0


  pbar = tqdm(train_loader, desc=f"Epoch {epoch + 1}")
  for imgs, targets in pbar:
    optimizer.zero_grad()
    outputs = model(imgs)
    loss = loss_fn(outputs, targets)
    loss.backward()
    optimizer.step()
    pbar.set_postfix(loss=loss.item())
    total_loss += loss.item()

  avg_loss = total_loss / len(train_loader)
  print(f"Epoch {epoch+1}, Average Loss: {avg_loss:.4f}")

  model.eval()
  correct = 0
  total = 0
  with torch.no_grad():
    for images, labels in val_loader:
      outputs = model(images)
      _, predicted = torch.max(outputs, 1)
      correct += (predicted == labels).sum().item()
      total += labels.size(0)

  accuracy = correct / total
  print(f"Epoch {epoch + 1} accuracy: {accuracy*100:.4f}%")




Epoch 1: 100%|██████████| 916/916 [00:26<00:00, 34.26it/s, loss=0.609]


Epoch 1, Average Loss: 1.3417
Epoch 1 accuracy: 80.5078%


Epoch 2: 100%|██████████| 916/916 [00:27<00:00, 32.76it/s, loss=0.558]


Epoch 2, Average Loss: 0.5693
Epoch 2 accuracy: 84.8690%


Epoch 3: 100%|██████████| 916/916 [00:26<00:00, 33.94it/s, loss=0.42]


Epoch 3, Average Loss: 0.4840
Epoch 3 accuracy: 86.2408%


Epoch 4: 100%|██████████| 916/916 [00:27<00:00, 33.41it/s, loss=0.341]


Epoch 4, Average Loss: 0.4402
Epoch 4 accuracy: 86.1452%


Epoch 5: 100%|██████████| 916/916 [00:26<00:00, 34.26it/s, loss=0.585]


Epoch 5, Average Loss: 0.4102
Epoch 5 accuracy: 87.0939%


Epoch 6: 100%|██████████| 916/916 [00:27<00:00, 33.60it/s, loss=0.291]


Epoch 6, Average Loss: 0.3874
Epoch 6 accuracy: 87.6263%


Epoch 7: 100%|██████████| 916/916 [00:26<00:00, 34.12it/s, loss=0.457]


Epoch 7, Average Loss: 0.3687
Epoch 7 accuracy: 87.9743%


Epoch 8: 100%|██████████| 916/916 [00:27<00:00, 33.37it/s, loss=0.609]


Epoch 8, Average Loss: 0.3513
Epoch 8 accuracy: 88.4316%


Epoch 9: 100%|██████████| 916/916 [00:26<00:00, 34.12it/s, loss=0.27]


Epoch 9, Average Loss: 0.3388
Epoch 9 accuracy: 87.8651%


Epoch 10: 100%|██████████| 916/916 [00:27<00:00, 33.31it/s, loss=0.314]


Epoch 10, Average Loss: 0.3282
Epoch 10 accuracy: 88.8684%


Accuracy peaking at about 87 - 88% but noisy seems to be reaching capacity of current model architecture as improvement is good across epochs, it just reaches a plateau

In [11]:
#better CNN training will compare results

class CNN_3_layer(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(128, 128, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(128 * 4 * 4, 256)
        self.fc2 = nn.Linear(256, 10)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = self.pool(self.relu(self.conv3(x)))
        x = x.view(x.size(0), -1)
        x = self.dropout(x)
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x


model = CNN_3_layer()
loss_fn = nn.CrossEntropyLoss()
#0.001 lr for now
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# 10 epochs fair number since fairly large dataset


for epoch in range(10):
  #training
  model.train()

  total_loss = 0


  pbar = tqdm(train_loader, desc=f"Epoch {epoch + 1}")
  for imgs, targets in pbar:
    optimizer.zero_grad()
    outputs = model(imgs)
    loss = loss_fn(outputs, targets)
    loss.backward()
    optimizer.step()
    pbar.set_postfix(loss=loss.item())
    total_loss += loss.item()

  avg_loss = total_loss / len(train_loader)
  print(f"Epoch {epoch+1}, Average Loss: {avg_loss:.4f}")

  model.eval()
  correct = 0
  total = 0
  with torch.no_grad():
    for images, labels in val_loader:
      outputs = model(images)
      _, predicted = torch.max(outputs, 1)
      correct += (predicted == labels).sum().item()
      total += labels.size(0)

  accuracy = correct / total
  print(f"Epoch {epoch + 1} accuracy: {accuracy*100:.4f}%")

Epoch 1: 100%|██████████| 916/916 [01:25<00:00, 10.66it/s, loss=0.725]


Epoch 1, Average Loss: 1.6854
Epoch 1 accuracy: 77.9211%


Epoch 2: 100%|██████████| 916/916 [01:26<00:00, 10.54it/s, loss=0.603]


Epoch 2, Average Loss: 0.7645
Epoch 2 accuracy: 85.7562%


Epoch 3: 100%|██████████| 916/916 [01:25<00:00, 10.65it/s, loss=0.638]


Epoch 3, Average Loss: 0.6091
Epoch 3 accuracy: 87.5102%


Epoch 4: 100%|██████████| 916/916 [01:25<00:00, 10.65it/s, loss=0.482]


Epoch 4, Average Loss: 0.5488
Epoch 4 accuracy: 88.5067%


Epoch 5: 100%|██████████| 916/916 [01:25<00:00, 10.71it/s, loss=0.731]


Epoch 5, Average Loss: 0.5104
Epoch 5 accuracy: 88.6432%


Epoch 6: 100%|██████████| 916/916 [01:26<00:00, 10.62it/s, loss=0.538]


Epoch 6, Average Loss: 0.4842
Epoch 6 accuracy: 89.6465%


Epoch 7: 100%|██████████| 916/916 [01:25<00:00, 10.71it/s, loss=0.6]


Epoch 7, Average Loss: 0.4599
Epoch 7 accuracy: 89.9331%


Epoch 8: 100%|██████████| 916/916 [01:25<00:00, 10.71it/s, loss=0.32]


Epoch 8, Average Loss: 0.4444
Epoch 8 accuracy: 90.5064%


Epoch 9: 100%|██████████| 916/916 [01:26<00:00, 10.59it/s, loss=0.683]


Epoch 9, Average Loss: 0.4341
Epoch 9 accuracy: 89.9468%


Epoch 10: 100%|██████████| 916/916 [01:24<00:00, 10.79it/s, loss=0.349]


Epoch 10, Average Loss: 0.4189
Epoch 10 accuracy: 90.5201%


Capped here 90% better than last but still room for improvement