In [12]:
import glob
import torch
import torch.nn as nn
from torchvision.transforms import transforms
from torch.utils.data import DataLoader
from torch.optim import Adam, SGD
from torch.autograd import Variable
import torchvision
import pathlib


"""
1. Select a device 
2. Prepare data
3. Build neural network
4. Training
5. Evaluate  
"""

'\n1. Select a device \n2. Prepare data\n3. Build neural network\n4. Training\n5. Evaluate  \n'

In [2]:
# Select a device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

from fastai.vision.all import *
set_seed(42,reproducible=True)
source = untar_data(URLs.IMAGENETTE)

In [3]:
# Transformer
transformer = transforms.Compose([
    transforms.Resize((150, 150)),
    transforms.RandomHorizontalFlip(0.5),
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) 
])

In [4]:
# Prepare data
train_path = source/'train'
val_path = source/'val'
train_loader = DataLoader(
    torchvision.datasets.ImageFolder(root=train_path, transform=transformer),
    batch_size = 128,
    shuffle = True  
)
val_loader = DataLoader(
    torchvision.datasets.ImageFolder(root=val_path, transform=transformer),
    batch_size = 128
)

"""This gets the names of the classes (names of folders containing classes)""" 
root = pathlib.Path(train_path)
classes = sorted([i.name.split('/')[-1] for i in root.iterdir()])
print(classes)

['n01440764', 'n02102040', 'n02979186', 'n03000684', 'n03028079', 'n03394916', 'n03417042', 'n03425413', 'n03445777', 'n03888257']


In [16]:
# Build the network
class ConvNet(nn.Module):
    def __init__(self, num_classes=10):
        super(ConvNet, self).__init__()
        """Convolutional filter output size = ((w - f + 2p)/s) + 1
        where w is width/height
        f is filter/kernel size
        p is padding 
        s is stride """
        """Input shape = (256, 3, 150, 150)"""
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=10, kernel_size=3, stride=1, padding=1)
        """Current shape = (256, 10, 150, 150)"""
        self.relu1 = nn.ReLU()
        """Current shape = (256, 10, 150, 150)"""
        self.bn1 = nn.BatchNorm2d(num_features=10)
        """Current shape = (256, 10, 150, 150)"""
        self.conv2 = nn.Conv2d(in_channels=10, out_channels=20, kernel_size=3, stride=1, padding=1)
        """Current shape = (256, 20, 150, 150)"""
        self.relu2 = nn.ReLU()
        """Current shape = (256, 20, 150, 150)"""
        self.bn2 = nn.BatchNorm2d(num_features=20)
        """Current shape = (256, 20, 150, 150)"""
        self.max_pool1 = nn.MaxPool2d(kernel_size=2)
        """Current shape = (256, 20, 75, 75)"""
        self.fc1 = nn.Linear(in_features=20*75*75, out_features=20*75)
        self.dropout1 = nn.Dropout(p=0.5)
        self.fc2 = nn.Linear(in_features=20*75, out_features=num_classes)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, input):
        output = self.conv1(input.cuda())
        output = self.relu1(output)
        output = self.bn1(output)
        output = self.conv2(output)
        output = self.relu2(output)
        output = self.bn2(output)
        output = self.max_pool1(output)
        output = output.view(-1, 20*75*75)
        output = self.fc1(output)
        output = self.dropout1(output)
        output = self.fc2(output)
        output = self.softmax(output)
        return output
    
model = ConvNet().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0.0001)
loss_function = nn.CrossEntropyLoss()
num_epochs = 10

In [8]:
# Count the number of images in train and val
train_count = 9469
val_count = 3925
print('train_count: ' + str(train_count))
print('val_count: ' + str(val_count))

train_count: 9469
val_count: 3925


In [None]:
# Training stage
best_accuracy = 0.0
for epoch in range(num_epochs):
    train_loss = 0.0
    train_accuracy = 0.0
    model.train()
    """Train batch by batch"""
    for i, (images, labels) in enumerate(train_loader):
        """zero gradients at the start of each batch"""
        if torch.cuda.is_available():
          images = Variable(images.cuda())
          labels = Variable(labels.cuda())
        optimizer.zero_grad()
        """Feed images into model and get output"""
        outputs = model(images)
        """Calculate loss function"""
        loss = loss_function(outputs, labels)
        """Back propagation"""
        loss.backward()
        """optimize"""
        optimizer.step()
        train_loss = loss.cpu().data*images.size(0)
        _,prediction = torch.max(outputs.data, 1)
        train_accuracy += int(torch.sum(prediction==labels.data))
    train_loss = train_loss/train_count
    train_accuracy = train_accuracy/train_count

    model.eval()
    val_accuracy = 0.0
    for i, (images, labels) in enumerate(val_loader):
        if torch.cuda.is_available():
          images = Variable(images.cuda())
          labels = Variable(labels.cuda())
        outputs = model(images)
        _,prediction = torch.max(outputs.data, 1)
        val_accuracy += int(torch.sum(prediction==labels.data))
    val_accuracy = val_accuracy/val_count
    
    print('epoch: ' + str(epoch))
    print('train loss: ' + str(train_loss))
    print('train accuracy: ' + str(train_accuracy))
    print('val accuracy: ' + str(val_accuracy))
    print()

    if val_accuracy > best_accuracy:
        torch.save(model.state_dict(), 'checkpoint.model')
        best_accuracy = val_accuracy

epoch: 0
train loss: tensor(0.0293)
train accuracy: 0.15556024923434364
val accuracy: 0.16152866242038216

epoch: 1
train loss: tensor(0.0297)
train accuracy: 0.1596789523708945
val accuracy: 0.15566878980891719

epoch: 2
train loss: tensor(0.0302)
train accuracy: 0.16960608300770938
val accuracy: 0.17248407643312103

