<a href="https://colab.research.google.com/github/sahilmaniyar888/Neural-Network-from-Scratch/blob/main/Pytorch/Transfer_learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


# Transfer Learning and PreTrained Models
### AlexNet on Cats vs Dogs (PyTorch)


## 1) Imports
Brief setup imports for model building, data loading, and training.


In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.models import alexnet, AlexNet_Weights
from torchvision import transforms
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
from tqdm import tqdm
import numpy as np

import warnings
warnings.filterwarnings("ignore")


## 2) Dataset and Transforms
Create ImageNet-style transforms, load dataset, and split train/validation.


In [None]:
# Auto path selection for local vs Google Colab
if "COLAB_RELEASE_TAG" in os.environ:
    PATH_TO_DATA = "/content/Neural-Network-from-Scratch/data/dogsvscats/"
else:
    PATH_TO_DATA = "../../data/dogsvscats/"

normalizer = transforms.Normalize(
    mean=[0.485, 0.456, 0.406],
    std=[0.229, 0.224, 0.225]
)

train_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    normalizer,
])

dataset = ImageFolder(PATH_TO_DATA, transform=train_transforms)

train_samples = int(0.9 * len(dataset))
val_samples = len(dataset) - train_samples
train_dataset, val_dataset = torch.utils.data.random_split(dataset, lengths=[train_samples, val_samples])

print(f"Data Path: {PATH_TO_DATA}")
print(f"Total: {len(dataset)} | Train: {len(train_dataset)} | Val: {len(val_dataset)}")


## 3) Load AlexNet and Inspect
Load architecture and inspect key layers.


In [None]:
model = alexnet()
print(model)


In [None]:
# Inspect first classifier linear layer
print(model.classifier[1])


## 4) Update Output Layer for Cats vs Dogs
Replace final layer to output 2 classes.


In [None]:
model = alexnet()
model.classifier[6] = nn.Linear(4096, 2)
print(model)


## 5) Sanity Check Forward Pass
Pass random image batch and check output shape.


In [None]:
rand_data = torch.rand(16, 3, 224, 224)
model_output = model(rand_data)
print(model_output.shape)  # expected: [16, 2]


## 6) Check Model Parameters
View named parameters and total parameter count.


In [None]:
total_parameters = 0
for name, params in model.named_parameters():
    num_params = int(torch.prod(torch.tensor(params.shape)))
    print(name, ":", params.shape, "Num Parameters:", num_params)
    total_parameters += num_params

print("-" * 24)
print("Total Parameters in Model", total_parameters)


## 7) Training Utility
Define a reusable train/validate loop.


In [None]:
def train(model, device, epochs, optimizer, loss_fn, trainloader, valloader):
    log_training = {
        "epoch": [],
        "training_loss": [],
        "training_acc": [],
        "validation_loss": [],
        "validation_acc": [],
    }

    for epoch in range(1, epochs + 1):
        print(f"Starting Epoch {epoch}")
        model.train()

        training_losses, training_accuracies = [], []
        validation_losses, validation_accuracies = [], []

        for image, label in tqdm(trainloader):
            image, label = image.to(device), label.to(device)
            optimizer.zero_grad()
            out = model(image)

            loss = loss_fn(out, label)
            training_losses.append(loss.item())

            predictions = torch.argmax(out, dim=1)
            accuracy = (predictions == label).float().mean()
            training_accuracies.append(accuracy.item())

            loss.backward()
            optimizer.step()

        model.eval()
        with torch.no_grad():
            for image, label in tqdm(valloader):
                image, label = image.to(device), label.to(device)
                out = model(image)

                loss = loss_fn(out, label)
                validation_losses.append(loss.item())

                predictions = torch.argmax(out, dim=1)
                accuracy = (predictions == label).float().mean()
                validation_accuracies.append(accuracy.item())

        training_loss_mean = np.mean(training_losses)
        training_acc_mean = np.mean(training_accuracies)
        valid_loss_mean = np.mean(validation_losses)
        valid_acc_mean = np.mean(validation_accuracies)

        log_training["epoch"].append(epoch)
        log_training["training_loss"].append(training_loss_mean)
        log_training["training_acc"].append(training_acc_mean)
        log_training["validation_loss"].append(valid_loss_mean)
        log_training["validation_acc"].append(valid_acc_mean)

        print("Training Loss:", training_loss_mean)
        print("Training Acc:", training_acc_mean)
        print("Validation Loss:", valid_loss_mean)
        print("Validation Acc:", valid_acc_mean)

    return log_training, model


## 8) Train AlexNet From Scratch
Random initialization, then full training.


In [None]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Training on Device {DEVICE}")

model = alexnet()
model.classifier[6] = nn.Linear(4096, 2)
model = model.to(DEVICE)

epochs = 5
batch_size = 128
optimizer = optim.Adam(params=model.parameters(), lr=0.0001)
loss_fn = nn.CrossEntropyLoss()

trainloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
valloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)

scratch_logs, scratch_model = train(
    model=model,
    device=DEVICE,
    epochs=epochs,
    optimizer=optimizer,
    loss_fn=loss_fn,
    trainloader=trainloader,
    valloader=valloader,
)


## 9) Load Pretrained AlexNet and Fine-Tune End-to-End
Use ImageNet weights, replace classifier head, train all layers.


In [None]:
model = alexnet(weights=AlexNet_Weights.IMAGENET1K_V1)
model.classifier[6] = nn.Linear(4096, 2)
model = model.to(DEVICE)

epochs = 2
optimizer = optim.Adam(params=model.parameters(), lr=0.0001)
loss_fn = nn.CrossEntropyLoss()
batch_size = 128

trainloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
valloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)

finetune_all_logs, finetune_all_model = train(
    model=model,
    device=DEVICE,
    epochs=epochs,
    optimizer=optimizer,
    loss_fn=loss_fn,
    trainloader=trainloader,
    valloader=valloader,
)


## 10) Freeze All Layers Except Final Classifier
Keep pretrained feature extractor fixed and train only `classifier[6]`.


In [None]:
model = alexnet(weights=AlexNet_Weights.IMAGENET1K_V1)
model.classifier[6] = nn.Linear(4096, 2)

for name, param in model.named_parameters():
    if "classifier.6" not in name:
        param.requires_grad_(False)

# Quick check
for name, param in model.named_parameters():
    if "bias" in name:
        print(name)
        print("requires_grad:", param.requires_grad)
        break


## 11) Train Only Final Classifier Layer
Optimizer updates only parameters with `requires_grad=True`.


In [None]:
model = model.to(DEVICE)

epochs = 2
batch_size = 128
optimizer = optim.Adam(
    params=[p for p in model.parameters() if p.requires_grad],
    lr=0.0001,
)
loss_fn = nn.CrossEntropyLoss()

trainloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
valloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)

head_only_logs, head_only_model = train(
    model=model,
    device=DEVICE,
    epochs=epochs,
    optimizer=optimizer,
    loss_fn=loss_fn,
    trainloader=trainloader,
    valloader=valloader,
)


## 12) Summary
- Transfer learning reuses pretrained knowledge for new tasks.
- You can fine-tune all layers or only train the classifier head.
- It usually reduces training time and improves results with less data.
