In [1]:
from dl import Module, Variable
from dl.modules import Convolution, ReLU, Linear, Flatten, MaxPool
from dl.functions import cross_entropy_loss

# Downloading CIFAR-10
import numpy as np
import os
import requests
import tarfile
import pickle

# Training
from dl.data import BatchLoader, train_val_split
from dl.optimizers import SGD

## Download and extract CIFAR-10.

In [2]:
root = './data'
url = 'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'
filename = 'cifar-10-python.tar.gz'
archive_path = os.path.join(root, filename)
extract_path = os.path.join(root, 'cifar-10-batches-py')

os.makedirs(root, exist_ok=True)

# Download compressed file containing dataset.
if not os.path.exists(archive_path):
    with requests.get(url, stream=True) as r:
        r.raise_for_status()
        with open(archive_path, 'wb') as f:
            for chunk in r.iter_content(chunk_size=8192):
                if chunk:
                    f.write(chunk)

# Extract dataset from compressed file.
if not os.path.exists(extract_path):
    with tarfile.open(archive_path, 'r:gz') as tar:
        tar.extractall(path=os.path.dirname(extract_path))

print(f"\nCIFAR-10 is ready at: {extract_path}")


CIFAR-10 is ready at: ./data/cifar-10-batches-py


## Load CIFAR-10 into ram.

In [3]:
def load_cifar_batch(batch_path):
    with open(batch_path, 'rb') as fo:
        batch = pickle.load(fo, encoding='bytes')
        X = batch[b'data'] # shape (10000, 3072)
        X = X.reshape(-1, 3, 32, 32).astype(np.float32)
        y = np.array(batch[b'labels']) # list of 10000 ints

    return X, y

# Iterate over all 5 batch files.
xs = []
ys = []
for i in range(1, 6): 
    batch_path = os.path.join(extract_path, f'data_batch_{i}')
    with open(batch_path, 'rb') as fo:
        X, y = load_cifar_batch(batch_path)

    xs.append(X)
    ys.append(y)

X_train_full = np.concatenate(xs)  # shape (50000, 3, 32, 32)
y_train_full = np.concatenate(ys)
X_test, y_test = load_cifar_batch(os.path.join(extract_path, 'test_batch'))

# Normalize
X_train_full = (X_train_full / 255.0 - 0.5) / 0.5  # normalize to [-1, 1]
X_test = (X_test / 255.0 - 0.5) / 0.5

# Set aside a validation set.
X_train, y_train, X_val, y_val = train_val_split(X_train_full, y_train_full, ratio=0.1, seed=42)

## Create BatchLoaders.

In [4]:
class Cifar10:
    
    def __init__(self, images, labels):
        super().__init__()
        
        self.images = images
        self.labels = labels
        
    def __getitem__(self, idx):
        image = Variable(self.images[idx])
        label = Variable(self.labels[idx])
        return image, label
        
    def __len__(self):
        return self.images.shape[0]

In [5]:
batchloaders = {}
batchloaders['train'] = BatchLoader(Cifar10(X_train, y_train), batch_size=64, shuffle=True)

## Define the CNN architecture.

In [6]:
class CNN(Module):
    def __init__(self):
        super().__init__()
        
        # Conv Block 1
        self.conv1 = Convolution(C_in=3, C_out=32, K=3, stride=1, padding=1)
        self.relu1 = ReLU()
        self.pool1 = MaxPool(K=2, stride=2)  # 32x32 → 16x16

        # Conv Block 2
        self.conv2 = Convolution(C_in=32, C_out=64, K=3, stride=1, padding=1)
        self.relu2 = ReLU()
        self.pool2 = MaxPool(K=2, stride=2)  # 16x16 → 8x8

        # Conv Block 3
        self.conv3 = Convolution(C_in=64, C_out=128, K=3, stride=1, padding=1)
        self.relu3 = ReLU()
        self.pool3 = MaxPool(K=2, stride=2)  # 8x8 → 4x4

        self.flat = Flatten()
        self.fc1 = Linear(128 * 4 * 4, 256)
        self.relu4 = ReLU()
        self.fc2 = Linear(256, 10)  # CIFAR-10 output

    def forward(self, X):
        X = self.pool1(self.relu1(self.conv1(X)))
        X = self.pool2(self.relu2(self.conv2(X)))
        X = self.pool3(self.relu3(self.conv3(X)))
        X = self.flat(X)
        X = self.relu4(self.fc1(X))
        X = self.fc2(X)
        return X

## Train the CNN.

In [7]:
model = CNN()
optimizer = SGD(model.parameters(), 0.001)

In [8]:
for X_batch, y_batch in batchloaders['train']:
    
    # Compute features and loss.
    # print(X_batch)
    features = model(X_batch)
    loss = cross_entropy_loss(features, y_batch)

    print(loss.data)

    # Update model parameters.
    optimizer.clear_grad()
    loss.backward()
    optimizer.update_parameters()

3.065007252117097
2.6299327416531906
2.7263070892227765
2.652463272976955
2.8266908278827514
2.4197886126932575
2.547659512805119
2.506274184922236
2.368560455235001
2.4320414109541213
2.3935675471947078
2.250594616087179
2.293514366483207
2.4725810834047968
2.3722562954657893
2.24253641759704
2.304320559349934
2.365328866666954
2.444685288383788
2.291654912405363
2.3951446511777816
2.217830627479435
2.2995342133312895
2.2322434075397135
2.258606528482956
2.3775843401450585
2.3240797440832184
2.2630782386449604
2.313221024890109
2.338662485335516
2.27288847048003
2.248596405124302
2.344184322405588


KeyboardInterrupt: 