In [None]:
import torch
from torchvision.datasets import ImageFolder
import matplotlib.pyplot as plt
import numpy as np
from torchvision import transforms
from torch.utils.data import DataLoader, random_split
import torch.nn as nn
import torch.optim as optim
# Note: The exact path might vary slightly depending on the Kaggle environment.
# This path is a common structure for this dataset.
train_dir = "/kaggle/input/stanford-dogs-dataset/images/Images"
# The Stanford Dogs dataset, as provided in the link, doesn't have a separate test directory in the same format.
# We will create train and validation sets from the main directory.
# If you have a separate 'test' directory, you can uncomment the next line and adjust the code.


tf= transforms.Compose([transforms.Resize((256,256)),
                       transforms.RandomHorizontalFlip(p=0.5),
                       transforms.RandomVerticalFlip(p=0.5),
                       transforms.RandomRotation((10,40)),
                       transforms.ToTensor()])


# Create the full dataset from the main directory
full_dataset = ImageFolder(train_dir, transform=tf) #an array of images

# Split the dataset into training and validation sets
train_size = int(0.6 * len(full_dataset))
val_size = int(0.2 * len(full_dataset))
test_size = len(full_dataset) - train_size - val_size
train_dataset, val_dataset, test_dataset = random_split(full_dataset, [train_size, val_size, test_size])

# If you have a separate test set, you would load it like this:
# test_dataset = ImageFolder(test_dir, transform=tf)


# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


print(f"Length of train: {len(train_dataset)}, Validation: {len(val_dataset)}, Test: {len(test_dataset)}")

In [None]:
#image, label = train_dataset[2001]

#print(f"Label: {label}")

#plt.imshow(image)
#plt.show()

In [None]:
class CNN(nn.Module):
    def __init__(self, num_classes=120):
        super().__init__()
    # Conv Block 1
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1) #32, 256, 256
        self.bn1 = nn.BatchNorm2d(32)
        self.pool1 = nn.MaxPool2d(kernel_size=2) #32, 128, 128
        self.drop1 = nn.Dropout(p=0.1)
        # Conv Block 2
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1) #64,128, 128
        self.bn2 = nn.BatchNorm2d(64)
        self.pool2 = nn.MaxPool2d(kernel_size=2) #64, 64, 64 
        self.drop2 = nn.Dropout(p=0.1)
        # Conv Block 3
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1) #128, 64, 64
        self.bn3 = nn.BatchNorm2d(128)
        self.pool3 = nn.MaxPool2d(kernel_size=2) #128, 32, 32
        self.drop3 = nn.Dropout(p=0.1)
        # Conv Block 4
        self.conv4 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1) #256, 32, 32
        self.bn4 = nn.BatchNorm2d(256)
        self.pool4 = nn.MaxPool2d(kernel_size=2) #256, 16, 16
        self.drop4 = nn.Dropout(p=0.1)
    
        self.gap = nn.AdaptiveAvgPool2d((1,1))  #256, 1, 1
        self.flatten = nn.Flatten()
        self.fc = nn.Linear(256, num_classes) # Fully-Connected/MLP/Linear Layer #10

    def forward(self,x):
        # Block 1 Output
        x = self.drop1(self.pool1(self.bn1(self.conv1(x))))
        # Block 2 Output
        x = self.drop2(self.pool2(self.bn2(self.conv2(x))))
        # Block 3 Output
        x = self.drop3(self.pool3(self.bn3(self.conv3(x))))
        # Block 4 Output
        x = self.drop4(self.pool4(self.bn4(self.conv4(x))))
        # Classification Head
        x = self.fc(self.flatten(self.gap(x)))

        return x

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model1 = CNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model1.parameters(), lr = 1e-3)

In [None]:
def accuracy(outputs, labels):
    preds = outputs.argmax(dim=1)   # 32, 10  #airplane =0.6 (argmax pick this), cat =0.2, dog = 0.1, deer =0.05,...
    acc = (preds == labels).float().mean().item()  # 24 correct, total= 32, avg=24/32=0.75 (mean) [0 1 2 5] == [1 1 2 1] = [0 1 1 0] avg(0.5)
    return acc

def train_model(model, criterion, optimizer, train_loader, val_loader, device, num_epochs=100):
    for i in range(num_epochs):
        # train
        model.train()
        running_train_loss = 0.0
        train_acc_sum = 0.0
        train_samples = 0
        
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()    # dl/dW
            optimizer.step()  #W< W-lr*dl/dW 
            running_train_loss += loss.item() * images.size(0) # 32, 3, 256, 256
            train_acc_sum += accuracy(outputs, labels) * images.size(0)
            train_samples += images.size(0)

        epoch_train_loss = running_train_loss/train_samples
        epoch_train_acc = train_acc_sum/train_samples

        # validation
        
        model.eval()
        running_val_loss = 0.0
        val_acc_sum = 0.0
        val_samples = 0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)

            running_val_loss += loss.item() * images.size(0) # 32, 3, 256, 256
            val_acc_sum += accuracy(outputs, labels) * images.size(0)
            val_samples += images.size(0)

        epoch_val_loss = running_val_loss/val_samples
        epoch_val_acc = val_acc_sum/val_samples
        
        print(f"Epoch: {i}, train_loss: {epoch_train_loss}, train_acc: {epoch_train_acc}, val_loss{epoch_val_loss}, val_acc: {epoch_val_acc}")

In [None]:
train_model(model1, criterion, optimizer, train_loader, val_loader, device, num_epochs=100)


In [None]:
def test_model(model, criterion, test_loader, device):
        model.eval()
        running_test_loss = 0.0
        test_acc_sum = 0.0
        test_samples = 0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)

            running_test_loss += loss.item() * images.size(0) # 32, 3, 256, 256
            test_acc_sum += accuracy(outputs, labels) * images.size(0)
            test_samples += images.size(0)

        epoch_test_loss = running_test_loss/test_samples
        epoch_test_acc = test_acc_sum/test_samples
    
        print(f"test_loss: {epoch_test_loss}, test_acc: {epoch_test_acc}")


In [None]:
test_model(model1, criterion, test_loader, device)
