In [2]:
import sys
import os
import scipy.io
import numpy as np

In [3]:
def load_svhn_data(split='train'):
    """
    Loads SVHN cropped digit data from .mat files.
    
    Args:
        data_dir (str): Path to the folder containing .mat files.
        split (str): 'train' or 'test'.
        
    Returns:
        X (np.array): Images of shape (N, 32, 32, 3) -> Normalized [0, 1]
        y (np.array): Labels of shape (N,) -> Corrected so '0' is class 0 (not 10)
    """
    file_path = os.path.join(f'{split}_32x32.mat')
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}. Please download it from http://ufldl.stanford.edu/housenumbers/")

    print(f"Loading {split} data from {file_path}...")
    mat_data = scipy.io.loadmat(file_path)
    
    # The .mat file has X shape: (32, 32, 3, N) -> (Height, Width, Channels, Batch)
    # We want standard shape: (N, 32, 32, 3) for visualization/processing
    X = mat_data['X']
    X = np.transpose(X, (3, 0, 1, 2))
    
    # Normalize pixel values to [0, 1] range (Standard for Deep Learning)
    X = X.astype('float32') / 255.0

    # The .mat file has y shape: (N, 1). Flatten it to (N,)
    y = mat_data['y'].flatten()
    
    # FIX LABELS: SVHN labels '0' as 10. We need to map 10 -> 0.
    y[y == 10] = 0
    
    print(f"Loaded {X.shape[0]} samples.")
    print(f"X shape: {X.shape}")
    print(f"y shape: {y.shape}")
    
    return X, y

In [5]:
X_train, Y_train = load_svhn_data()

Loading train data from train_32x32.mat...
Loaded 73257 samples.
X shape: (73257, 32, 32, 3)
y shape: (73257,)


In [6]:
X_test, Y_test = load_svhn_data(split = 'test')

Loading test data from test_32x32.mat...
Loaded 26032 samples.
X shape: (26032, 32, 32, 3)
y shape: (26032,)


In [7]:
X_flat_train = X_train.reshape(X_train.shape[0], -1)

In [8]:
X_flat_test = X_test.reshape(X_test.shape[0], -1)

In [9]:
red_values = X_flat_train[:, range(0, 3072, 3)].astype('float64')
green_values = X_flat_train[:, range(1, 3072, 3)].astype('float64')
blue_values = X_flat_train[:, range(2, 3072, 3)].astype('float64')

In [44]:
red_values

array([[0.12941177, 0.05882353, 0.05882353, ..., 0.43529412, 0.42745098,
        0.40392157],
       [0.32941177, 0.33725491, 0.3019608 , ..., 0.44313726, 0.40784314,
        0.40392157],
       [0.07450981, 0.07843138, 0.09803922, ..., 0.24705882, 0.24313726,
        0.24705882],
       ...,
       [0.36078432, 0.36862746, 0.44705883, ..., 0.8509804 , 0.86666667,
        0.85490197],
       [0.74509805, 0.80392158, 0.86274511, ..., 0.90980393, 0.88627452,
        0.85490197],
       [0.84705883, 0.86666667, 0.88627452, ..., 0.74509805, 0.69803923,
        0.70980394]], shape=(73257, 1024))

In [45]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np

class SVHN_CNN:
    def __init__(self, num_classes = 10):
        self.num_classes = num_classes
        
        # Define CNN architecture
        self.conv1 = nn.Conv2d(1, 16, kernel_size=5, stride=1, padding=2)  # 32x32 -> 32x32
        self.pool = nn.MaxPool2d(2, 2)                                     # 32x32 -> 16x16
        self.conv2 = nn.Conv2d(16, 32, kernel_size=5, stride=1, padding=2) # 16x16 -> 16x16
        self.fc1 = nn.Linear(32*8*8, 128)
        self.fc2 = nn.Linear(128, num_classes)
        
        # Put all layers in a ModuleList for optimizer
        self.model_layers = nn.ModuleList([self.conv1, self.conv2, self.fc1, self.fc2])
        
    def forward(self, X):
        X = F.relu(self.conv1(X))
        X = self.pool(X)
        X = F.relu(self.conv2(X))
        X = self.pool(X)
        X = X.view(X.size(0), -1)
        X = F.relu(self.fc1(X))
        X = self.fc2(X)
        return F.softmax(X, dim=1)
    
    def parameters(self):
        # Collect all parameters for optimizer
        return list(self.conv1.parameters()) + list(self.conv2.parameters()) + \
               list(self.fc1.parameters()) + list(self.fc2.parameters())
    
    def fit(self, X, Y, batch_size=256, epochs=5, lr=0.001, weight_decay=1e-4):
        """Train CNN using Adam optimizer and cross-entropy loss"""
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.device = device
        
        # Convert to torch tensors and reshape to (N,1,32,32)
        Xtorch = torch.tensor(X, dtype=torch.float32).reshape(-1,1,32,32).to(device)
        Ytorch = torch.tensor(Y, dtype=torch.long).to(device)
        
        # Move model layers to device
        for layer in self.model_layers:
            layer.to(device)
        
        optimizer = optim.Adam(self.parameters(), lr=lr, weight_decay=weight_decay)
        criterion = nn.CrossEntropyLoss()
        N = X.shape[0]
        
        for epoch in range(epochs):
            perm = np.random.permutation(N)
            for i in range(0, N, batch_size):
                idx = perm[i:i+batch_size]
                Xb = Xtorch[idx]
                Yb = Ytorch[idx]
                
                optimizer.zero_grad()
                outputs = self.forward(Xb)
                loss = criterion(outputs, Yb)
                loss.backward()
                optimizer.step()
            
            # Optional: print epoch info
            with torch.no_grad():
                preds = self.predict(X)
                acc = (preds == Y).mean()
                print(f"Epoch {epoch+1}/{epochs} - Training accuracy: {acc:.4f}")
    
    def predict(self, X):
        """Return predicted class labels"""
        device = self.device
        Xtorch = torch.tensor(X, dtype=torch.float32).reshape(-1,1,32,32).to(device)
        with torch.no_grad():
            outputs = self.forward(Xtorch)
            preds = torch.argmax(outputs, dim=1).cpu().numpy()
        return preds
    
    def probabilities(self, X):
        """Return class probabilities"""
        device = self.device
        Xtorch = torch.tensor(X, dtype=torch.float32).reshape(-1,1,32,32).to(device)
        with torch.no_grad():
            probs = self.forward(Xtorch).cpu().numpy()
        return probs


In [63]:
red_CNN = SVHN_CNN()
red_CNN.fit(red_values, Y_train)

Epoch 1/5 - Training accuracy: 0.3969
Epoch 2/5 - Training accuracy: 0.4816
Epoch 3/5 - Training accuracy: 0.5017
Epoch 4/5 - Training accuracy: 0.5126
Epoch 5/5 - Training accuracy: 0.5132


In [64]:
green_CNN = SVHN_CNN()
green_CNN.fit(green_values, Y_train)

Epoch 1/5 - Training accuracy: 0.4716
Epoch 2/5 - Training accuracy: 0.5655
Epoch 3/5 - Training accuracy: 0.5864
Epoch 4/5 - Training accuracy: 0.5870
Epoch 5/5 - Training accuracy: 0.6458


In [65]:
blue_CNN = SVHN_CNN()
blue_CNN.fit(blue_values, Y_train)

Epoch 1/5 - Training accuracy: 0.4047
Epoch 2/5 - Training accuracy: 0.4843
Epoch 3/5 - Training accuracy: 0.5088
Epoch 4/5 - Training accuracy: 0.5023
Epoch 5/5 - Training accuracy: 0.5120


In [60]:
from sklearn.ensemble import RandomForestClassifier

print("Training Random Forest...")
rf = RandomForestClassifier(criterion = 'entropy', n_estimators=100, max_depth=50, min_samples_leaf = 2, n_jobs=-1, random_state=1234)
rf.fit(X_flat_train, Y_train)

Training Random Forest...


0,1,2
,n_estimators,100
,criterion,'entropy'
,max_depth,50
,min_samples_split,2
,min_samples_leaf,2
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [59]:
y_pred_rf = rf.predict(X_flat_train)
print(np.mean(y_pred_rf == Y_train))

73247.0


In [61]:
y_test_rf = rf.predict(X_flat_test)
print(np.mean(y_test_rf == Y_test))

0.6901505838967424


In [66]:
# determine probabilities
red = red_CNN.predict(red_values)
green = green_CNN.predict(green_values)
print(np.mean(red == Y_train))

0.5131796278853898


In [67]:
blue = blue_CNN.predict(blue_values)

In [68]:
red[0]

np.int64(1)

In [70]:
red_probs = red_CNN.probabilities(red_values)
green_probs = green_CNN.probabilities(green_values)
blue_probs = blue_CNN.probabilities(blue_values)

In [71]:
# predict based on the three, if all distinct, take the one with highest probability
final_predict = []
for i in range(73257):
    if red[i] == blue[i] or red[i] == green[i]:
        final_predict.append(red[i])
    elif blue[i] == green[i]:
        final_predict.append(blue[i])
    else:
        p1 = red_probs[i][red[i]]
        p2 = blue_probs[i][blue[i]]
        p3 = green_probs[i][green[i]]

        if p1 > p2 and p1 > p3:
            final_predict.append(red[i])
        elif p2 > p1 and p2 > p3:
            final_predict.append(blue[i])
        else:
            final_predict.append(green[i])

In [73]:
# training success rate?
float(np.mean(final_predict == Y_train))

0.5425283590646628