In [1]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms, datasets


In [4]:
import torch
from torch.utils.data import DataLoader
from torchvision import transforms, datasets

# Define the transforms for the training and test datasets
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
            transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
}


# Load the training and test datasets
image_datasets = {x: datasets.Flowers102(root="./data/",
                                         split=x,
                                         transform=data_transforms[x],
                                         download=True)
                  for x in ['train', 'test', "val"]}
# Create dataloaders for training and test datasets
dataloaders = {x: DataLoader(image_datasets[x], batch_size=64,
                             shuffle=True, num_workers=4)
              for x in ['train', 'test', 'val']}


In [6]:
import torch.nn.functional as F

class CNN(nn.Module):
    def __init__(self, num_channels=3, num_out_ch=[32, 64, 128], img_w=100, img_h=100, num_classes=102):
        super(CNN, self).__init__()
        # our first conv layer will have 32 output channels, a kernel size of 3, a stride of 1, and a padding of 1
        self.conv1 = nn.Conv2d(num_channels, num_out_ch[0], kernel_size=(3,3), stride=(1,1), padding=(1,1))
        # batch normalization layer with 32 channels (same as output of conv layer)
        self.bn1 = nn.BatchNorm2d(num_out_ch[0])
        # our second conv layer will have 64 output channels, a kernel size of 3, a stride of 1, and a padding of 1
        self.conv2 = nn.Conv2d(num_out_ch[0], num_out_ch[1], kernel_size=(3,3), stride=(1,1), padding=(1,1))
        # batch normalization layer with 64 channels (same as output of conv layer)
        self.bn2 = nn.BatchNorm2d(num_out_ch[1])
        # our third conv layer will have 128 output channels, a kernel size of 3, a stride of 1, and a padding of 1
        self.conv3 = nn.Conv2d(num_out_ch[1], num_out_ch[2], kernel_size=(3,3), stride=(1,1), padding=(1,1))
        # batch normalization layer with 128 channels (same as output of conv layer)
        self.bn3 = nn.BatchNorm2d(num_out_ch[2])
        # max pooling layer with kernel size 2 and stride 2
        self.pool = nn.MaxPool2d(kernel_size=(2,2), stride=(2,2))
        # fully connected layer that transforms the output of the conv layers into num_classes
        self.fc = nn.Linear(int(img_w/8)*int(img_h/8)*num_out_ch[2], num_classes)
    
    def forward(self, x):
        # apply first conv layer, then relu, then batch norm, then max pool
        x = self.pool(F.relu(self.bn1(self.conv1(x))))
        # apply second conv layer, then relu, then batch norm, then max pool
        x = self.pool(F.relu(self.bn2(self.conv2(x))))
        # apply third conv layer, then relu, then batch norm, then max pool
        x = self.pool(F.relu(self.bn3(self.conv3(x))))
        # flatten the output of the conv layers
        x = self.fc(x.reshape(x.shape[0], -1))
        
        return x
model = CNN()
x = torch.randn(1, 3, 100, 100)
print(model(x).shape)
y = model(x)
print(y.shape)

torch.Size([1, 102])
torch.Size([1, 102])


In [9]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimiser = optim.Adam(model.parameters(), lr=0.001)
num_epochs = 12

In [10]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

for epoch in range(num_epochs):
    running_loss = 0.0
    running_corrects = 0

    for inputs, labels in dataloaders['train']:
        inputs, labels = inputs.to(device), labels.to(device)

        # Zero the parameter gradients
        optimiser.zero_grad()

        # Forward pass
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        loss = criterion(outputs, labels)

        # Backward pass
        loss.backward()
        optimiser.step()

        # Statistics
        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)

    epoch_loss = running_loss / len(image_datasets['train'])
    epoch_acc = running_corrects.double() / len(image_datasets['train'])

    print('Epoch {}/{} - Loss: {:.4f} - Acc: {:.4f}'.format(epoch + 1, num_epochs, epoch_loss, epoch_acc))


RuntimeError: mat1 and mat2 shapes cannot be multiplied (64x100352 and 18432x102)