In [4]:
import sys
sys.path.append('../data/cifar100/')  
from cifar100_loader import load_cifar100
from models.model import LeNet5 #import the model

In [5]:

DEVICE = 'cuda' # 'cuda' or 'cpu'

BATCH_SIZE = 256     # Higher batch sizes allows for larger learning rates. An empirical heuristic suggests that, when changing
                     # the batch size, learning rate should change by the same factor to have comparable results

LR = 1e-3            # The initial Learning Rate
MOMENTUM = 0.9       # Hyperparameter for SGD, keep this at 0.9 when using SGD
WEIGHT_DECAY = 5e-5  # Regularization, you can keep this at the default

NUM_EPOCHS = 30      # Total number of training epochs (iterations over dataset)
STEP_SIZE = 20       # How many epochs before decreasing learning rate (if using a step-down policy)
GAMMA = 0.1          # Multiplicative factor for learning rate step-down

LOG_FREQUENCY = 10

In [6]:
trainloader, validloader, testloader = load_cifar100(batch_size=32, validation_split=0.1)

Downloading https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz to ./data\cifar-100-python.tar.gz


100.0%


Extracting ./data\cifar-100-python.tar.gz to ./data
Files already downloaded and verified


In [7]:
print("Dimension of the training dataset:", len(trainloader.dataset))
print("Dimension of the validation dataset:", len(validloader.dataset))
print("Dimension of the test dataset:", len(testloader.dataset))

Dimension of the training dataset: 45000
Dimension of the validation dataset: 5000
Dimension of the test dataset: 10000


In [8]:
import torch.optim as optim
import torch.nn as nn

model = LeNet5().to(DEVICE) # Create the model

# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)

# Training loop
for epoch in range(NUM_EPOCHS): 
    for data, targets in trainloader:
        data = data.to(DEVICE)        # Move the data to the GPU
        targets = targets.to(DEVICE)  # Move the targets to the GPU

        optimizer.zero_grad()         # Zero the gradients
        outputs = model(data)         # Pass data through the model
        loss = criterion(outputs, targets)  # Compute loss
        loss.backward()               # Backpropagation
        optimizer.step()              # Update model parameters

    print(f'Epoch {epoch+1}, Loss: {loss.item()}')


Epoch 1, Loss: 4.605193138122559
Epoch 2, Loss: 4.605221748352051
Epoch 3, Loss: 4.6051926612854
Epoch 4, Loss: 4.604877948760986
Epoch 5, Loss: 4.605062961578369
Epoch 6, Loss: 4.605139255523682
Epoch 7, Loss: 4.605148792266846
Epoch 8, Loss: 4.6052656173706055
Epoch 9, Loss: 4.605289459228516
Epoch 10, Loss: 4.605056285858154
Epoch 11, Loss: 4.605295658111572
Epoch 12, Loss: 4.605194091796875
Epoch 13, Loss: 4.605334281921387
Epoch 14, Loss: 4.604792594909668
Epoch 15, Loss: 4.60536003112793
Epoch 16, Loss: 4.605284214019775
Epoch 17, Loss: 4.60516357421875
Epoch 18, Loss: 4.605246067047119
Epoch 19, Loss: 4.604886054992676
Epoch 20, Loss: 4.605113983154297


KeyboardInterrupt: 