In [1]:
import torch 
from torch import nn
from torchvision import models, datasets, transforms

In [3]:
train_dir = 'train'
valid_dir = 'valid'
test_dir = 'test'

train_transforms = transforms.Compose([transforms.RandomRotation(30),
                                       transforms.RandomResizedCrop(224),
                                       transforms.RandomHorizontalFlip(),
                                       transforms.ToTensor(),
                                       transforms.Normalize([0.485, 0.456, 0.406], 
                                                            [0.229, 0.224, 0.225])])

valid_transforms = transforms.Compose([transforms.Resize(256),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize([0.485, 0.456, 0.406], 
                                                           [0.229, 0.224, 0.225])])

test_transforms = transforms.Compose([transforms.Resize(256),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize([0.485, 0.456, 0.406], 
                                                           [0.229, 0.224, 0.225])])


train_dataset = datasets.ImageFolder(train_dir, transform = train_transforms)
valid_dataset = datasets.ImageFolder(valid_dir, transform = valid_transforms)
test_dataset = datasets.ImageFolder(test_dir, transform = test_transforms)


In [4]:
trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=50, shuffle=True)
validloader = torch.utils.data.DataLoader(valid_dataset, batch_size=50)
testloader = torch.utils.data.DataLoader(test_dataset, batch_size=50)

In [9]:
print(train_dataset)

Dataset ImageFolder
    Number of datapoints: 6552
    Root Location: train
    Transforms (if any): Compose(
                             RandomRotation(degrees=(-30, 30), resample=False, expand=False)
                             RandomResizedCrop(size=(224, 224), scale=(0.08, 1.0), ratio=(0.75, 1.3333), interpolation=PIL.Image.BILINEAR)
                             RandomHorizontalFlip(p=0.5)
                             ToTensor()
                             Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
                         )
    Target Transforms (if any): None


In [10]:
import json
with open('cat_to_name.json', 'r') as f:
    cat_to_name = json.load(f)

In [11]:
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        
        # Convolution 1 -
        """
            in_channels -> number of input channels . In this case , the input channels is 3 . The RGB
            out_channels -> number of output channels . That is how many feature maps we want to make . 
            kernel -> the size of kernel. In this case , the kernel is 3 x 3 matrix .
            stride -> the number of pixels the kernel moves .
            padding -> Determines the output size of the feature map .
        """
        self.cnn1 = nn.Conv2d(in_channels = 3, out_channels = 64, kernel_size = 3, stride = 1, padding = 1)
        self.relu1 = nn.ReLU()
        
        # Convolution 2 -
        """
            in_channels -> number of input channels . In this case , the input channels is 3 . The RGB
            out_channels -> number of output channels . That is how many feature maps we want to make . 
            kernel -> the size of kernel. In this case , the kernel is 3 x 3 matrix .
            stride -> the number of pixels the kernel moves .
            padding -> Determines the output size of the feature map .
        """
        self.cnn2 = nn.Conv2d(in_channels = 64, out_channels = 64, kernel_size = 3, stride = 1, padding = 1)
        self.relu2 = nn.ReLU()
        
        # MaxPool 1 - 
        self.maxpool1 = nn.MaxPool2d(kernel_size = 2, stride = 2, padding = 0, dilation = 1, ceil_mode = False)
        
        # Convolution 3 -
        """
            in_channels -> number of input channels . In this case , the input channels is 3 . The RGB
            out_channels -> number of output channels . That is how many feature maps we want to make . 
            kernel -> the size of kernel. In this case , the kernel is 3 x 3 matrix .
            stride -> the number of pixels the kernel moves .
            padding -> Determines the output size of the feature map .
        """
        self.cnn3 = nn.Conv2d(in_channels = 64, out_channels = 128, kernel_size = 3, stride = 1, padding = 1)
        self.relu3 = nn.ReLU()
        
        # Convolution 4 -
        """
            in_channels -> number of input channels . In this case , the input channels is 3 . The RGB
            out_channels -> number of output channels . That is how many feature maps we want to make . 
            kernel -> the size of kernel. In this case , the kernel is 3 x 3 matrix .
            stride -> the number of pixels the kernel moves .
            padding -> Determines the output size of the feature map .
        """
        self.cnn4 = nn.Conv2d(in_channels = 128, out_channels = 128, kernel_size = 3, stride = 1, padding = 1)
        self.relu4 = nn.ReLU()
        
        # MaxPool 2 - 
        self.maxpool2 = nn.MaxPool2d(kernel_size = 2, stride = 2, padding = 0, dilation = 1, ceil_mode = False)
        
        # Convolution 5 -
        """
            in_channels -> number of input channels . In this case , the input channels is 3 . The RGB
            out_channels -> number of output channels . That is how many feature maps we want to make . 
            kernel -> the size of kernel. In this case , the kernel is 3 x 3 matrix .
            stride -> the number of pixels the kernel moves .
            padding -> Determines the output size of the feature map .
        """
        self.cnn5 = nn.Conv2d(in_channels = 128, out_channels = 256, kernel_size = 3, stride = 1, padding = 1)
        self.relu5 = nn.ReLU()
        
        # Convolution 6 -
        """
            in_channels -> number of input channels . In this case , the input channels is 3 . The RGB
            out_channels -> number of output channels . That is how many feature maps we want to make . 
            kernel -> the size of kernel. In this case , the kernel is 3 x 3 matrix .
            stride -> the number of pixels the kernel moves .
            padding -> Determines the output size of the feature map .
        """
        self.cnn6 = nn.Conv2d(in_channels = 256, out_channels = 256, kernel_size = 3, stride = 1, padding = 1)
        self.relu6 = nn.ReLU()
        
        # Convolution 7 -
        """
            in_channels -> number of input channels . In this case , the input channels is 3 . The RGB
            out_channels -> number of output channels . That is how many feature maps we want to make . 
            kernel -> the size of kernel. In this case , the kernel is 3 x 3 matrix .
            stride -> the number of pixels the kernel moves .
            padding -> Determines the output size of the feature map .
        """
        self.cnn7 = nn.Conv2d(in_channels = 256, out_channels = 256, kernel_size = 3, stride = 1, padding = 1)
        self.relu7 = nn.ReLU()
        
        # MaxPool 3 - 
        self.maxpool3 = nn.MaxPool2d(kernel_size = 2, stride = 2, padding = 0, dilation = 1, ceil_mode = False)
        
        # Convolution 8 -
        """
            in_channels -> number of input channels . In this case , the input channels is 3 . The RGB
            out_channels -> number of output channels . That is how many feature maps we want to make . 
            kernel -> the size of kernel. In this case , the kernel is 3 x 3 matrix .
            stride -> the number of pixels the kernel moves .
            padding -> Determines the output size of the feature map .
        """
        self.cnn8 = nn.Conv2d(in_channels = 256, out_channels = 512, kernel_size = 3, stride = 1, padding = 1)
        self.relu8 = nn.ReLU()
        
        # Convolution 9 -
        """
            in_channels -> number of input channels . In this case , the input channels is 3 . The RGB
            out_channels -> number of output channels . That is how many feature maps we want to make . 
            kernel -> the size of kernel. In this case , the kernel is 3 x 3 matrix .
            stride -> the number of pixels the kernel moves .
            padding -> Determines the output size of the feature map .
        """
        self.cnn9 = nn.Conv2d(in_channels = 512, out_channels = 512, kernel_size = 3, stride = 1, padding = 1)
        self.relu9 = nn.ReLU()
        
        # Convolution 10 -
        """
            in_channels -> number of input channels . In this case , the input channels is 3 . The RGB
            out_channels -> number of output channels . That is how many feature maps we want to make . 
            kernel -> the size of kernel. In this case , the kernel is 3 x 3 matrix .
            stride -> the number of pixels the kernel moves .
            padding -> Determines the output size of the feature map .
        """
        self.cnn10 = nn.Conv2d(in_channels = 512, out_channels = 512, kernel_size = 3, stride = 1, padding = 1)
        self.relu10 = nn.ReLU()
        
        # MaxPool 4 - 
        self.maxpool4 = nn.MaxPool2d(kernel_size = 2, stride = 2, padding = 0, dilation = 1, ceil_mode = False)
        
        # Convolution 11 -
        """
            in_channels -> number of input channels . In this case , the input channels is 3 . The RGB
            out_channels -> number of output channels . That is how many feature maps we want to make . 
            kernel -> the size of kernel. In this case , the kernel is 3 x 3 matrix .
            stride -> the number of pixels the kernel moves .
            padding -> Determines the output size of the feature map .
        """
        self.cnn11 = nn.Conv2d(in_channels = 512, out_channels = 512, kernel_size = 3, stride = 1, padding = 1)
        self.relu11 = nn.ReLU()
        
        # Convolution 12 -
        """
            in_channels -> number of input channels . In this case , the input channels is 3 . The RGB
            out_channels -> number of output channels . That is how many feature maps we want to make . 
            kernel -> the size of kernel. In this case , the kernel is 3 x 3 matrix .
            stride -> the number of pixels the kernel moves .
            padding -> Determines the output size of the feature map .
        """
        self.cnn12 = nn.Conv2d(in_channels = 512, out_channels = 512, kernel_size = 3, stride = 1, padding = 1)
        self.relu12 = nn.ReLU()
        
        # Convolution 13 -
        """
            in_channels -> number of input channels . In this case , the input channels is 3 . The RGB
            out_channels -> number of output channels . That is how many feature maps we want to make . 
            kernel -> the size of kernel. In this case , the kernel is 3 x 3 matrix .
            stride -> the number of pixels the kernel moves .
            padding -> Determines the output size of the feature map .
        """
        self.cnn13 = nn.Conv2d(in_channels = 512, out_channels = 512, kernel_size = 3, stride = 1, padding = 1)
        self.relu13 = nn.ReLU()
        
        # MaxPool 5 - 
        self.maxpool5 = nn.MaxPool2d(kernel_size = 2, stride = 2, padding = 0, dilation = 1, ceil_mode = False)
        
        self.fc1 = nn.Linear(25088, 4096)
        self.fc2 = nn.Linear(4096, 102)
        
    def forward(self, x):
        
        # Conv 1
        out = self.cnn1(x)
        out = self.relu1(out)
        
        # Conv 2
        out = self.cnn2(out)
        out = self.relu2(out)
        
        # Max Pool 1
        out = self.maxpool1(out)
        
          # Conv 3
        out = self.cnn3(out)
        out = self.relu3(out)
        
        # Conv 4
        out = self.cnn4(out)
        out = self.relu4(out)
        
        # Max Pool 2
        out = self.maxpool2(out)
        
          # Conv 5
        out = self.cnn5(out)
        out = self.relu5(out)
        
        # Conv 6
        out = self.cnn6(out)
        out = self.relu6(out)
        
        # Conv 7
        out = self.cnn7(out)
        out = self.relu7(out)
        
        # Max Pool 3
        out = self.maxpool3(out)
        
        # Conv 8
        out = self.cnn8(out)
        out = self.relu8(out)
        
        # Conv 9
        out = self.cnn9(out)
        out = self.relu9(out)
        
        # Conv 10
        out = self.cnn10(out)
        out = self.relu10(out)
        
        # Max Pool 4
        out = self.maxpool4(out)
        
        # Conv 11
        out = self.cnn11(out)
        out = self.relu11(out)
        
        # Conv 12
        out = self.cnn12(out)
        out = self.relu12(out)
        
        # Conv 13
        out = self.cnn13(out)
        out = self.relu13(out)
        
        # Max Pool 5
        out = self.maxpool5(out)
        
        ## Resizing
        out = out.view(out.size(0), -1)
        
        # Linear Function 1
        out = self.fc1(out)
        out = nn.ReLU(out)
        out = nn.Dropout(p = 0.5)
        
        # Linear Function 2
        out = self.fc2(out)
        out = nn.ReLU(out)
        out = nn.Dropout(p = 0.5)
        
        # Applying Softmax
        out = nn.LogSoftmax(out, dim = 1)
        
        return out

In [12]:
torch.cuda.is_available()

False

In [13]:
model = CNNModel()

In [14]:
criterion = nn.CrossEntropyLoss()

In [15]:
learning_rate = 0.1

optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [16]:
print(model.parameters())

<generator object Module.parameters at 0x000001BF556E6410>


In [17]:
print(list(model.parameters()))

[Parameter containing:
tensor([[[[-0.1057,  0.1255, -0.1458],
          [ 0.1607, -0.1762,  0.0535],
          [ 0.0658, -0.0710,  0.1529]],

         [[ 0.0351, -0.0452, -0.1786],
          [ 0.1083, -0.0872,  0.0921],
          [-0.1230,  0.1671, -0.1902]],

         [[ 0.0989,  0.0517,  0.0739],
          [ 0.0154, -0.1893,  0.1650],
          [-0.1492,  0.1232,  0.0256]]],


        [[[-0.1855,  0.0538,  0.1147],
          [-0.1596, -0.1718, -0.0170],
          [-0.1723, -0.1901, -0.0073]],

         [[-0.0596, -0.0622,  0.1524],
          [ 0.0970,  0.1290, -0.1217],
          [-0.1098, -0.1664,  0.0238]],

         [[ 0.1311, -0.1620,  0.1330],
          [-0.1097,  0.1197,  0.1841],
          [-0.0684, -0.1011, -0.0885]]],


        [[[ 0.0593, -0.1346,  0.0844],
          [ 0.0191,  0.1453, -0.1537],
          [ 0.1903,  0.0238, -0.0988]],

         [[ 0.0088,  0.1165, -0.1745],
          [ 0.1164, -0.0893, -0.0214],
          [-0.0875, -0.1705,  0.0595]],

         [[-0.0312, -

       requires_grad=True)]


In [18]:
def validation(model, testloader, criterion):
    test_loss = 0
    accuracy = 0
    
    for ii, (inputs, labels) in enumerate(testloader):
        
        inputs, labels = inputs.to(device), labels.to(device)
        
        output = model.forward(inputs)
        test_loss += criterion(output, labels).item()
        
        ps = torch.exp(output)
        equality = (labels.data == ps.max(dim=1)[1])
        accuracy += equality.type(torch.FloatTensor).mean()
    
    return test_loss, accuracy

In [None]:
print("Training process initializing .....\n")

for e in range(epochs):
    running_loss = 0
    model.train() # Technically not necessary, setting this for good measure
    
    for ii, (inputs, labels) in enumerate(trainloader):
        steps += 1
        
        optimizer.zero_grad()
        
        # Forward and backward passes
        outputs = model.forward(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        
        if steps % print_every == 0:
            model.eval()

            with torch.no_grad():
                valid_loss, accuracy = validation(model, validloader, criterion)
            
            print("Epoch: {}/{} | ".format(e+1, epochs),
                  "Training Loss: {:.4f} | ".format(running_loss/print_every),
                  "Validation Loss: {:.4f} | ".format(valid_loss/len(testloader)),
                  "Validation Accuracy: {:.4f}".format(accuracy/len(testloader)))
            
            running_loss = 0
            model.train()

print("\nTraining process is now complete!!")