In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# Setup device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# Transformations applied on each image
transform = transforms.Compose(
    [
        transforms.ToTensor(),  # Convert images to tensor
        transforms.Normalize((0.5,), (0.5,)),  # Normalize the images
    ]
)

# Load the dataset
train_dataset = datasets.FashionMNIST(
    root="./data", train=True, download=True, transform=transform
)
test_dataset = datasets.FashionMNIST(
    root="./data", train=False, download=True, transform=transform
)

# DataLoader
train_loader = DataLoader(train_dataset, batch_size=1000, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=1000, shuffle=False)


# Define the CNN model
class FashionCNN(nn.Module):
    def __init__(self):
        super(FashionCNN, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(
                1, 64, kernel_size=3, padding=1
            ),  # Input: 1 x 28 x 28, Output: 64 x 28 x 28
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )  # Output: 64 x 14 x 14

        self.layer2 = nn.Sequential(
            nn.Conv2d(64, 64, kernel_size=3),  # Output: 64 x 12 x 12
            nn.ReLU(),
            nn.MaxPool2d(2),
        )  # Output: 64 x 6 x 6

        self.fc1 = nn.Linear(64 * 6 * 6, 128)
        self.fc2 = nn.Linear(128, 10)  # 10 classes

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.view(out.size(0), -1)  # Flatten the output
        out = self.fc1(out)
        out = self.fc2(out)
        return out


# Instantiate the model, loss function, and optimizer
model = FashionCNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


# Function to train the model
def train(epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
    print(f"Train Epoch: {epoch} -- Loss: {loss.item():.6f}")


# Training the model
for epoch in range(1, 50):  # Train for 15 epochs
    train(epoch)


# Evaluation
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f"Accuracy of the network on the 10000 test images: {100 * correct / total}%")

In [None]:
from torchsummary import summary

model = FashionCNN().to(
    device
)  # Assuming your model is already defined and moved to the device
summary(model, input_size=(1, 28, 28))  # (Channels, Height, Width)


# Horses or Humans

In [1]:
import urllib.request
import zipfile

url = "https://storage.googleapis.com/learning-datasets/horse-or-human.zip"
file_name = "horse-or-human.zip"
training_dir = "horse-or-human/training/"
urllib.request.urlretrieve(url, file_name)

zip_ref = zipfile.ZipFile(file_name, "r")
zip_ref.extractall(training_dir)
zip_ref.close()


In [2]:
url = "https://storage.googleapis.com/learning-datasets/validation-horse-or-human.zip"
file_name = "validation-horse-or-human.zip"
validation_dir = "horse-or-human/validation/"
urllib.request.urlretrieve(url, file_name)

zip_ref = zipfile.ZipFile(file_name, "r")
zip_ref.extractall(validation_dir)
zip_ref.close()

In [3]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

training_dir = "horse-or-human/training/"
validation_dir = "horse-or-human/validation/"
# Define transformations
train_transform = transforms.Compose(
    [
        transforms.Resize((150, 150)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(20),
        transforms.RandomAffine(
            degrees=0,  # No rotation
            translate=(0.2, 0.2),  # Translate up to 20% vertically and horizontally
            scale=(0.8, 1.2),  # Zoom in or out by 20%
            shear=20,  # Shear by up to 20 degrees
        ),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
    ]
)


# Load the datasets
train_dataset = datasets.ImageFolder(root=training_dir, transform=train_transform)
val_dataset = datasets.ImageFolder(root=validation_dir, transform=train_transform)

# Data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=True)


In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F


class HorsesHumansCNN(nn.Module):
    def __init__(self):
        super(HorsesHumansCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * 18 * 18, 512)
        self.drop = nn.Dropout(0.25)
        self.fc2 = nn.Linear(512, 1)  # Only 1 output neuron for binary classification

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = x.view(-1, 64 * 18 * 18)
        x = F.relu(self.fc1(x))
        x = self.drop(x)
        x = self.fc2(x)
        x = torch.sigmoid(x)  # Use sigmoid to output probabilities
        return x


In [5]:
import torch.optim as optim

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = HorsesHumansCNN().to(device)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


def train_model(num_epochs):
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for images, labels in train_loader:
            images, labels = (
                images.to(device),
                labels.to(device).float(),
            )  # Convert labels to float
            optimizer.zero_grad()
            outputs = model(images).view(-1)  # Flatten outputs to match label shape
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        print(f"Epoch {epoch + 1}, Loss: {running_loss / len(train_loader)}")

        # Evaluate on training set
        model.eval()
        with torch.no_grad():
            correct = 0
            total = 0
            for images, labels in train_loader:
                images, labels = images.to(device), labels.to(device).float()
                outputs = model(images).view(-1)
                predicted = outputs > 0.5  # Threshold predictions
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
            print(f"Training Set Accuracy: {100 * correct / total}%")

        # Evaluate on validation set
        model.eval()
        with torch.no_grad():
            correct = 0
            total = 0
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device).float()
                outputs = model(images).view(-1)
                predicted = outputs > 0.5  # Threshold predictions
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
            print(f"Validation Set Accuracy: {100 * correct / total}%")


train_model(15)

Epoch 1, Loss: 0.7057890449509476
Training Set Accuracy: 69.4255111976631%
Validation Set Accuracy: 50.0%
Epoch 2, Loss: 0.5696496846097888
Training Set Accuracy: 79.45472249269717%
Validation Set Accuracy: 50.0%
Epoch 3, Loss: 0.46444773041840753
Training Set Accuracy: 72.15189873417721%
Validation Set Accuracy: 50.0%
Epoch 4, Loss: 0.4191161019332481
Training Set Accuracy: 84.12852969814995%
Validation Set Accuracy: 53.515625%
Epoch 5, Loss: 0.34964124858379364
Training Set Accuracy: 86.6601752677702%
Validation Set Accuracy: 68.359375%
Epoch 6, Loss: 0.26823490180752496
Training Set Accuracy: 86.56280428432328%
Validation Set Accuracy: 64.84375%
Epoch 7, Loss: 0.20637338676235892
Training Set Accuracy: 91.91820837390458%
Validation Set Accuracy: 69.53125%
Epoch 8, Loss: 0.21157201729489095
Training Set Accuracy: 93.1840311587147%
Validation Set Accuracy: 60.15625%
Epoch 9, Loss: 0.17945410830504965
Training Set Accuracy: 96.39727361246348%
Validation Set Accuracy: 80.078125%
Epoch 1

In [6]:
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in val_loader:
        images, labels = images.to(device), labels.to(device).float()
        outputs = model(images).view(-1)
        predicted = outputs > 0.5  # Threshold predictions
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        print(outputs)
        print(labels)
    print(f"Validation Accuracy: {100 * correct / total}%")

tensor([9.7736e-01, 9.9994e-01, 9.1021e-01, 9.8722e-01, 9.9984e-01, 7.7831e-01,
        9.6096e-01, 8.3773e-02, 9.8293e-01, 9.9950e-01, 5.9853e-01, 9.8388e-01,
        9.9975e-01, 9.9974e-01, 9.9774e-01, 9.7574e-01, 9.9871e-01, 4.4554e-01,
        4.8558e-01, 9.9176e-01, 9.0314e-01, 9.0461e-01, 8.9361e-01, 6.1158e-01,
        9.7448e-01, 9.9946e-01, 8.6108e-02, 9.9983e-01, 9.9262e-01, 9.9147e-01,
        3.2320e-04, 2.8685e-02])
tensor([1., 1., 0., 0., 1., 0., 1., 0., 1., 1., 0., 1., 1., 1., 1., 0., 1., 0.,
        0., 1., 1., 0., 0., 0., 1., 1., 1., 1., 0., 1., 0., 0.])
tensor([0.9976, 0.6913, 0.9865, 0.2034, 0.4928, 0.9914, 0.9996, 0.9204, 0.2354,
        0.8544, 0.9996, 0.9259, 0.9811, 0.7991, 0.9873, 0.7957, 0.9819, 0.6693,
        0.9999, 0.9967, 0.9340, 0.9771, 0.0362, 0.7641, 0.9999, 0.9855, 0.9996,
        0.9998, 0.9560, 0.9969, 0.7642, 0.1245])
tensor([1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 1., 0., 1., 0., 0., 0., 0., 0.,
        1., 1., 1., 1., 0., 0., 1., 1., 0., 1., 0., 1

In [7]:
from torchsummary import summary

summary(model, input_size=(3, 150, 150))  # (Channels, Height, Width)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 16, 150, 150]             448
         MaxPool2d-2           [-1, 16, 75, 75]               0
            Conv2d-3           [-1, 32, 75, 75]           4,640
         MaxPool2d-4           [-1, 32, 37, 37]               0
            Conv2d-5           [-1, 64, 37, 37]          18,496
         MaxPool2d-6           [-1, 64, 18, 18]               0
            Linear-7                  [-1, 512]      10,617,344
           Dropout-8                  [-1, 512]               0
            Linear-9                    [-1, 1]             513
Total params: 10,641,441
Trainable params: 10,641,441
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.26
Forward/backward pass size (MB): 5.98
Params size (MB): 40.59
Estimated Total Size (MB): 46.83
-----------------------------------

In [8]:
from PIL import Image
from torchvision import transforms

# Define transformations
transform = transforms.Compose(
    [
        transforms.Resize((150, 150)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
    ]
)


def load_image(image_path, transform):
    # Load image
    image = Image.open(image_path).convert(
        "RGB"
    )  # Convert to RGB just in case it's not
    # Apply transformations
    image = transform(image)
    # Add batch dimension, as the model expects batches
    image = image.unsqueeze(0)
    return image

    # Prediction function


def predict(image_path, model, device, transform):
    model.eval()
    image = load_image(image_path, transform)
    image = image.to(device)
    with torch.no_grad():
        output = model(image)
        prediction = output > 0.5
        class_name = "Human" if prediction.item() == 1 else "Horse"
        print(image_path)
        print(f"The image is predicted to be a {class_name}.")
        print(output)

In [None]:
# from google.colab import files

# uploaded = files.upload()

# for img in uploaded.keys():
#     predict(img, model, device, transform)
imgs = ["./test/horse-facts.jpg", "./test/M4ewc.jpg", "./test/girl-3277529_640.jpg"]
predict(imgs[2], model, device, train_transform)

./test/girl-3277529_640.jpg
The image is predicted to be a Human.
tensor([[0.7271]])


# Transfer Learning


In [None]:
import urllib.request
import zipfile

url = "https://storage.googleapis.com/learning-datasets/horse-or-human.zip"
file_name = "horse-or-human.zip"
training_dir = "horse-or-human/training/"
urllib.request.urlretrieve(url, file_name)

zip_ref = zipfile.ZipFile(file_name, "r")
zip_ref.extractall(training_dir)
zip_ref.close()

url = "https://storage.googleapis.com/learning-datasets/validation-horse-or-human.zip"
file_name = "validation-horse-or-human.zip"
validation_dir = "horse-or-human/validation/"
urllib.request.urlretrieve(url, file_name)

zip_ref = zipfile.ZipFile(file_name, "r")
zip_ref.extractall(validation_dir)
zip_ref.close()

In [None]:
import torch
import torch.nn as nn
from torchvision import models, transforms
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
from torch.optim import RMSprop

# Load the pre-trained Inception V3 model
pre_trained_model = models.inception_v3(pretrained=True, aux_logits=True)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
pre_trained_model.to(device)


def print_model_summary(model):
    for name, module in model.named_modules():
        print(f"{name} : {module.__class__.__name__}")


# Example of how to use the function with your pre-trained model
print_model_summary(pre_trained_model)

from torchsummary import summary

summary(pre_trained_model, input_size=(3, 299, 299))  # (Channels, Height, Width)

In [19]:
# Freeze all layers up to and including the 'Mixed_7c'
for name, parameter in pre_trained_model.named_parameters():
    parameter.requires_grad = False
    if "Mixed_7c" in name:
        break

# Modify the existing fully connected layer
num_ftrs = pre_trained_model.fc.in_features
pre_trained_model.fc = nn.Sequential(
    nn.Linear(num_ftrs, 1024),  # New fully connected layer with 1024 outputs
    nn.ReLU(),  # Activation layer
    nn.Linear(1024, 2),  # Final layer for binary classification
)

transform = transforms.Compose(
    [
        transforms.Resize((299, 299)),  # Resize to match Inception V3 input size
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]
)


# Load datasets using ImageFolder
train_dataset = ImageFolder(root=training_dir, transform=transform)
val_dataset = ImageFolder(root=validation_dir, transform=transform)

# Data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)


def train_model(model, criterion, optimizer, train_loader, num_epochs=10):
    model.train()  # Set the model to training mode
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    for epoch in range(num_epochs):
        running_loss = 0.0
        running_corrects = 0

        for inputs, labels in train_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()

            # Forward pass
            outputs = model(inputs)
            # Handle multiple outputs for training with auxiliary logits
            if isinstance(outputs, tuple):
                output, aux_output = outputs
                loss1 = criterion(output, labels)
                loss2 = criterion(aux_output, labels)
                loss = (
                    loss1 + 0.4 * loss2
                )  # Scale the auxiliary loss as is standard for Inception
            else:
                loss = criterion(outputs, labels)

            _, preds = torch.max(
                output, 1
            )  # Ensure you use the main output for accuracy calculation

            # Backward pass and optimize
            loss.backward()
            optimizer.step()

            # Update statistics
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels).item()

        epoch_loss = running_loss / len(train_loader.dataset)
        epoch_acc = running_corrects / len(train_loader.dataset)

        print(
            f"Epoch {epoch + 1}/{num_epochs} - Loss: {epoch_loss:.4f}, Acc: {epoch_acc:.4f}"
        )


In [20]:
# Only optimize parameters that are set to be trainable
optimizer = RMSprop(
    filter(lambda p: p.requires_grad, pre_trained_model.parameters()), lr=0.001
)
criterion = nn.CrossEntropyLoss()

# Train the model
train_model(pre_trained_model, criterion, optimizer, train_loader, num_epochs=3)

Epoch 1/3 - Loss: 4.2148, Acc: 0.9455
Epoch 2/3 - Loss: 3.5521, Acc: 0.9905
Epoch 3/3 - Loss: 3.5704, Acc: 0.9820


In [21]:
def evaluate_model(model, data_loader, device):
    model.eval()  # Set the model to evaluation mode
    total = 0
    corrects = 0

    with torch.no_grad():  # No need to track gradients for evaluation
        for inputs, labels in data_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            # Handle multiple outputs during evaluation
            if isinstance(outputs, tuple):
                outputs = outputs[0]  # Use only the main output for evaluation

            _, preds = torch.max(outputs, 1)
            corrects += torch.sum(preds == labels).item()
            total += labels.size(0)

    accuracy = corrects / total
    print(f"Accuracy on the validation set: {accuracy:.4f} ({corrects}/{total})")
    return accuracy


# Assuming the necessary imports and pre_trained_model are defined and set up
# Ensure the model and data loaders are on the appropriate device

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
pre_trained_model = pre_trained_model.to(device)

# Assuming val_loader is defined and set up as previously shown

accuracy = evaluate_model(pre_trained_model, val_loader, device)


Accuracy on the validation set: 0.9870 (987/1000)


# Cats versus Dogs
(Note the following cells will only work if you have already run the above cells for training Horses v Humans)

In [17]:
import urllib.request
import zipfile

!wget --no-check-certificate \
    https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip \
    -O "cats_and_dogs_filtered.zip"


zip_ref = zipfile.ZipFile("cats_and_dogs_filtered.zip", "r")
zip_ref.extractall("/tmp")
zip_ref.close()

training_dir = "/tmp/cats_and_dogs_filtered/train/"
validation_dir = "/tmp/cats_and_dogs_filtered/validation/"

--2025-07-20 20:13:32--  https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip
Resolving storage.googleapis.com (storage.googleapis.com)... 2404:6800:4004:801::201b, 2404:6800:4004:808::201b, 2404:6800:4004:827::201b, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|2404:6800:4004:801::201b|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 68606236 (65M) [application/zip]
Saving to: ‘cats_and_dogs_filtered.zip’


2025-07-20 20:13:35 (25.6 MB/s) - ‘cats_and_dogs_filtered.zip’ saved [68606236/68606236]



In [None]:
import torch
import torch.nn as nn
from torchvision import models, transforms
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
from torch.optim import RMSprop

# Load the pre-trained Inception V3 model
pre_trained_model = models.inception_v3(pretrained=True, aux_logits=True)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
pre_trained_model.to(device)


def print_model_summary(model):
    for name, module in model.named_modules():
        print(f"{name} : {module.__class__.__name__}")


# Example of how to use the function with your pre-trained model
print_model_summary(pre_trained_model)

from torchsummary import summary

summary(pre_trained_model, input_size=(3, 299, 299))  # (Channels, Height, Width)

In [None]:
# Freeze all layers up to and including the 'Mixed_7c'
for name, parameter in pre_trained_model.named_parameters():
    parameter.requires_grad = False
    if "Mixed_7c" in name:
        break

# Modify the existing fully connected layer
num_ftrs = pre_trained_model.fc.in_features
pre_trained_model.fc = nn.Sequential(
    nn.Linear(num_ftrs, 1024),  # New fully connected layer with 1024 outputs
    nn.ReLU(),  # Activation layer
    nn.Linear(1024, 2),  # Final layer for binary classification
)

transform = transforms.Compose(
    [
        transforms.Resize((299, 299)),  # Resize to match Inception V3 input size
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]
)


# Load datasets using ImageFolder
train_dataset = ImageFolder(root=training_dir, transform=transform)
val_dataset = ImageFolder(root=validation_dir, transform=transform)

# Data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)


def train_model(model, criterion, optimizer, train_loader, num_epochs=10):
    model.train()  # Set the model to training mode
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    for epoch in range(num_epochs):
        running_loss = 0.0
        running_corrects = 0

        for inputs, labels in train_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()

            # Forward pass
            outputs = model(inputs)
            # Handle multiple outputs for training with auxiliary logits
            if isinstance(outputs, tuple):
                output, aux_output = outputs
                loss1 = criterion(output, labels)
                loss2 = criterion(aux_output, labels)
                loss = (
                    loss1 + 0.4 * loss2
                )  # Scale the auxiliary loss as is standard for Inception
            else:
                loss = criterion(outputs, labels)

            _, preds = torch.max(
                output, 1
            )  # Ensure you use the main output for accuracy calculation

            # Backward pass and optimize
            loss.backward()
            optimizer.step()

            # Update statistics
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels).item()

        epoch_loss = running_loss / len(train_loader.dataset)
        epoch_acc = running_corrects / len(train_loader.dataset)

        print(
            f"Epoch {epoch + 1}/{num_epochs} - Loss: {epoch_loss:.4f}, Acc: {epoch_acc:.4f}"
        )


In [None]:
# Only optimize parameters that are set to be trainable
optimizer = RMSprop(
    filter(lambda p: p.requires_grad, pre_trained_model.parameters()), lr=0.001
)
criterion = nn.CrossEntropyLoss()

# Train the model
train_model(pre_trained_model, criterion, optimizer, train_loader, num_epochs=3)

In [None]:
def evaluate_model(model, data_loader, device):
    model.eval()  # Set the model to evaluation mode
    total = 0
    corrects = 0

    with torch.no_grad():  # No need to track gradients for evaluation
        for inputs, labels in data_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            # Handle multiple outputs during evaluation
            if isinstance(outputs, tuple):
                outputs = outputs[0]  # Use only the main output for evaluation

            _, preds = torch.max(outputs, 1)
            corrects += torch.sum(preds == labels).item()
            total += labels.size(0)

    accuracy = corrects / total
    print(f"Accuracy on the validation set: {accuracy:.4f} ({corrects}/{total})")
    return accuracy


# Assuming the necessary imports and pre_trained_model are defined and set up
# Ensure the model and data loaders are on the appropriate device

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
pre_trained_model = pre_trained_model.to(device)

# Assuming val_loader is defined and set up as previously shown

accuracy = evaluate_model(pre_trained_model, val_loader, device)

In [None]:
from PIL import Image
from torchvision import transforms


def load_image(image_path, transform):
    # Load image
    image = Image.open(image_path).convert(
        "RGB"
    )  # Convert to RGB just in case it's not
    # Apply transformations
    image = transform(image)
    # Add batch dimension, as the model expects batches
    image = image.unsqueeze(0)
    return image

    # Prediction function


def predict(image_path, model, device, transform):
    model.eval()
    image = load_image(image_path, transform)
    image = image.to(device)
    with torch.no_grad():
        output = model(image)
        print(output)
        prediction = torch.max(output, 1)
        print(prediction)

In [None]:
from google.colab import files

uploaded = files.upload()

for img in uploaded.keys():
    predict(img, pre_trained_model, device, transform)

# Rock Paper Scissors

In [None]:
import urllib.request
import zipfile

!wget --no-check-certificate \
    https://storage.googleapis.com/learning-datasets/rps.zip -O "rps.zip"


zip_ref = zipfile.ZipFile("rps.zip", "r")
zip_ref.extractall("/tmp")
zip_ref.close()

training_dir = "/tmp/rps/"

In [None]:
import torch
import torch.nn as nn
from torchvision import models, transforms
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
from torch.optim import RMSprop

# Load the pre-trained Inception V3 model
pre_trained_model = models.inception_v3(pretrained=True, aux_logits=True)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
pre_trained_model.to(device)


def print_model_summary(model):
    for name, module in model.named_modules():
        print(f"{name} : {module.__class__.__name__}")


# Example of how to use the function with your pre-trained model
print_model_summary(pre_trained_model)

from torchsummary import summary

summary(pre_trained_model, input_size=(3, 299, 299))  # (Channels, Height, Width)

In [None]:
# Freeze all layers up to and including the 'Mixed_7c'
for name, parameter in pre_trained_model.named_parameters():
    parameter.requires_grad = False
    if "Mixed_7c" in name:
        break

# Modify the existing fully connected layer
num_ftrs = pre_trained_model.fc.in_features
pre_trained_model.fc = nn.Sequential(
    nn.Linear(num_ftrs, 1024),  # New fully connected layer with 1024 outputs
    nn.ReLU(),  # Activation layer
    nn.Linear(1024, 3),  # Final layer for binary classification
)

transform = transforms.Compose(
    [
        transforms.Resize((299, 299)),  # Resize to match Inception V3 input size
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]
)


# Load datasets using ImageFolder
train_dataset = ImageFolder(root=training_dir, transform=transform)
val_dataset = ImageFolder(root=validation_dir, transform=transform)

# Data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)


def train_model(model, criterion, optimizer, train_loader, num_epochs=10):
    model.train()  # Set the model to training mode
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    for epoch in range(num_epochs):
        running_loss = 0.0
        running_corrects = 0

        for inputs, labels in train_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()

            # Forward pass
            outputs = model(inputs)
            # Handle multiple outputs for training with auxiliary logits
            if isinstance(outputs, tuple):
                output, aux_output = outputs
                loss1 = criterion(output, labels)
                loss2 = criterion(aux_output, labels)
                loss = (
                    loss1 + 0.4 * loss2
                )  # Scale the auxiliary loss as is standard for Inception
            else:
                loss = criterion(outputs, labels)

            _, preds = torch.max(
                output, 1
            )  # Ensure you use the main output for accuracy calculation

            # Backward pass and optimize
            loss.backward()
            optimizer.step()

            # Update statistics
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels).item()

        epoch_loss = running_loss / len(train_loader.dataset)
        epoch_acc = running_corrects / len(train_loader.dataset)

        print(
            f"Epoch {epoch + 1}/{num_epochs} - Loss: {epoch_loss:.4f}, Acc: {epoch_acc:.4f}"
        )


In [None]:
# Only optimize parameters that are set to be trainable
optimizer = RMSprop(
    filter(lambda p: p.requires_grad, pre_trained_model.parameters()), lr=0.001
)
criterion = nn.CrossEntropyLoss()

# Train the model
train_model(pre_trained_model, criterion, optimizer, train_loader, num_epochs=3)

In [None]:
from PIL import Image
from torchvision import transforms


def load_image(image_path, transform):
    # Load image
    image = Image.open(image_path).convert(
        "RGB"
    )  # Convert to RGB just in case it's not
    # Apply transformations
    image = transform(image)
    # Add batch dimension, as the model expects batches
    image = image.unsqueeze(0)
    return image

    # Prediction function


def predict(image_path, model, device, transform):
    model.eval()
    image = load_image(image_path, transform)
    image = image.to(device)
    with torch.no_grad():
        output = model(image)
        print(output)
        prediction = torch.max(output, 1)
        print(prediction)

In [None]:
from google.colab import files

uploaded = files.upload()

for img in uploaded.keys():
    predict(img, pre_trained_model, device, transform)