In [24]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
from tqdm.auto import tqdm


In [25]:
transform = transforms.Compose([
    transforms.Resize((227, 227)),  # Resize images to 227x227 as expected by AlexNet
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Normalize to [-1, 1]
])

train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

Files already downloaded and verified
Files already downloaded and verified


In [26]:
device="cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"

In [27]:
next(iter(train_loader))[0].shape

torch.Size([64, 3, 227, 227])

In [36]:
class AlexNet(nn.Module):
    '''Model architecture'''

    def __init__(self,input_shape:int,output_shape:int):
        super().__init__()
        self.block_1=nn.Sequential(
            nn.Conv2d(in_channels=input_shape[0],out_channels=96, kernel_size=11, stride=4,), #padding=2
            nn.ReLU(),
            nn.BatchNorm2d(96),
            #nn.LocalResponseNorm(size=5,k=2,alpha=0.0001,beta=0.75),
            nn.MaxPool2d(kernel_size=3,stride=2),

            nn.Conv2d(in_channels=96,out_channels=256,kernel_size=5,stride=1,padding=2),
            nn.ReLU(),
            nn.BatchNorm2d(256),
            #nn.LocalResponseNorm(size=5,k=2,alpha=0.0001,beta=0.75),
            nn.MaxPool2d(kernel_size=3,stride=2)
        )

        self.block_2=nn.Sequential(
            nn.Conv2d(in_channels=256,out_channels=384,kernel_size=3,stride=1,padding=1),
            nn.ReLU(),

            nn.Conv2d(in_channels=384,out_channels=384,kernel_size=3,stride=1,padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=384,out_channels=256,kernel_size=3,stride=1,padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3,stride=2)



        )
        #flatten
        self.block_3=nn.Sequential(
            nn.Dropout(p=0.5),
            nn.Linear(in_features=9216,out_features=4096),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(in_features=4096,out_features=4096),
            nn.ReLU(),
            nn.Linear(in_features=4096,out_features=output_shape)

        )
    def forward(self,X):
        X=self.block_1(X)
        X=self.block_2(X)
        X=X.view(X.size(0),-1)
        X=self.block_3(X)
        return X

In [37]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
alexnet = AlexNet(input_shape=(3, 32, 32), output_shape=10)  # CIFAR-10 has 10 classes
alexnet.to(device)

AlexNet(
  (block_1): Sequential(
    (0): Conv2d(3, 96, kernel_size=(11, 11), stride=(4, 4))
    (1): ReLU()
    (2): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (5): ReLU()
    (6): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (7): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (block_2): Sequential(
    (0): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): ReLU()
    (6): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (block_3): Sequential(
    (0): Dropout(p=0.5, inplac

In [42]:
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.SGD(alexnet.parameters(), lr=0.005,momentum=0.9)
def accuracy_fn(y_true, y_pred):
    """Calculates accuracy between truth labels and predictions."""

    correct = torch.eq(y_true, y_pred).sum().item()
    acc = (correct / len(y_pred)) * 100
    return acc

In [47]:
# Import tqdm for progress bar
from tqdm.auto import tqdm

# Set the seed and start the timer
torch.manual_seed(42)
# train_time_start_on_cpu = timer()

# Set the number of epochs (we'll keep this small for faster training times)
epochs = 5

# Create training and testing loop
for epoch in tqdm(range(epochs)):
    print(f"Epoch: {epoch}\n-------")
    ### Training
    train_loss = 0
    # Add a loop to loop through training batches
    for batch, (X, y) in enumerate(train_loader):
        X, y = X.to(device), y.to(device)
        alexnet.train()
        # 1. Forward pass
        y_pred = alexnet(X)

        # 2. Calculate loss (per batch)
        loss = loss_fn(y_pred, y)
        train_loss += loss # accumulatively add up the loss per epoch

        # 3. Optimizer zero grad
        optimizer.zero_grad()

        # 4. Loss backward
        loss.backward()

        # 5. Optimizer step
        optimizer.step()

        # Print out how many samples have been seen
        if batch % 20 == 0:
            print(f"Looked at {batch * len(X)}/{len(train_loader.dataset)} samples")

    # Divide total train loss by length of train dataloader (average loss per batch per epoch)
    train_loss /= len(train_loader)

    ### Testing
    # Setup variables for accumulatively adding up loss and accuracy
    test_loss, test_acc = 0, 0
    alexnet.eval()
    with torch.inference_mode():
        for X, y in test_loader:
            # 1. Forward pass
            X, y = X.to(device), y.to(device)
            test_pred = alexnet(X)

            # 2. Calculate loss (accumulatively)
            test_loss += loss_fn(test_pred, y) # accumulatively add up the loss per epoch
            probabilities = torch.nn.functional.softmax(test_pred, dim=1)
            # 3. Calculate accuracy (preds need to be same as y_true)
            pred=torch.max(probabilities, dim=1)
            test_acc += accuracy_fn(y_true=y, y_pred=pred.indices)

        # Calculations on test metrics need to happen inside torch.inference_mode()
        # Divide total test loss by length of test dataloader (per batch)
        test_loss /= len(test_loader)

        # Divide total accuracy by length of test dataloader (per batch)
        test_acc /= len(test_loader)

    ## Print out what's happening
    print(f"\nTrain loss: {train_loss:.5f} | Test loss: {test_loss:.5f}, Test acc: {test_acc:.2f}%\n")

# Calculate training time
# train_time_end_on_cpu = timer()
# total_train_time_model_0 = print_train_time(start=train_time_start_on_cpu,
#                                            end=train_time_end_on_cpu,
#                                            device=str(next(model_0.parameters()).device))

  0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 0
-------
Looked at 0/50000 samples
Looked at 1280/50000 samples
Looked at 2560/50000 samples
Looked at 3840/50000 samples
Looked at 5120/50000 samples
Looked at 6400/50000 samples
Looked at 7680/50000 samples
Looked at 8960/50000 samples
Looked at 10240/50000 samples
Looked at 11520/50000 samples
Looked at 12800/50000 samples
Looked at 14080/50000 samples
Looked at 15360/50000 samples
Looked at 16640/50000 samples
Looked at 17920/50000 samples
Looked at 19200/50000 samples
Looked at 20480/50000 samples
Looked at 21760/50000 samples
Looked at 23040/50000 samples
Looked at 24320/50000 samples
Looked at 25600/50000 samples
Looked at 26880/50000 samples
Looked at 28160/50000 samples
Looked at 29440/50000 samples
Looked at 30720/50000 samples
Looked at 32000/50000 samples
Looked at 33280/50000 samples
Looked at 34560/50000 samples
Looked at 35840/50000 samples
Looked at 37120/50000 samples
Looked at 38400/50000 samples
Looked at 39680/50000 samples
Looked at 40960/50000 samples
Look

In [48]:
torch.manual_seed(42)
def eval_model(model: torch.nn.Module,
               data_loader: torch.utils.data.DataLoader,
               loss_fn: torch.nn.Module,
               accuracy_fn):
    """Returns a dictionary containing the results of model predicting on data_loader."""


    loss, acc = 0, 0
    model.eval()
    with torch.inference_mode():
        for X, y in data_loader:
            #target device
            X, y = X.to(device), y.to(device)
            #forward pass
            y_pred = model(X)

            # Accumulate the loss and accuracy values per batch
            loss += loss_fn(y_pred, y)
            acc += accuracy_fn(y_true=y,
                                y_pred=y_pred.argmax(dim=1)) # For accuracy, need the prediction labels (logits -> pred_prob -> pred_labels)

        # Scale loss and acc to find the average loss/acc per batch
        loss /= len(data_loader)
        acc /= len(data_loader)

    return {"model_name": model.__class__.__name__,
            "model_loss": loss.item(),
            "model_acc": acc}

# Calculate model 0 results on test dataset
model_0_results = eval_model(model=alexnet, data_loader=test_loader,
    loss_fn=loss_fn, accuracy_fn=accuracy_fn
)
model_0_results


{'model_name': 'AlexNet',
 'model_loss': 0.5289348363876343,
 'model_acc': 82.5437898089172}