In [1]:
import os
import numpy as np
import torch
import glob
import torch.nn as nn
from torchvision.transforms import transforms
from torch.utils.data import DataLoader
from torch.optim import Adam
from torch.autograd import Variable
import torchvision


In [2]:
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')


In [3]:
# transformation
transformer=transforms.Compose([
    transforms.Resize((150,150)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),  #0-255 to 0-1, numpy to tensors
    transforms.Normalize([0.5,0.5,0.5], # 0-1 to [-1,1] , formula (x-mean)/std
                        [0.5,0.5,0.5])
])


In [4]:
#Dataloader
train_path = './train_imgs/'
test_path = './test_imgs/'

train_loader=DataLoader(
    torchvision.datasets.ImageFolder(train_path,transform=transformer),
    batch_size=9, shuffle=True
)
test_loader=DataLoader(
    torchvision.datasets.ImageFolder(test_path,transform=transformer),
    batch_size=5, shuffle=True
)

In [7]:
classes = sorted(os.listdir(train_path))
classes

['0.0', '1.0', '2.0', '3.0', '5.0', '9.0']

In [8]:
class ConvNet(nn.Module):
    def __init__(self,num_classes=6):
        super(ConvNet,self).__init__()
        #((w-f+2P)/s) +1
        
        #(256,3,150,150)
        self.conv1=nn.Conv2d(in_channels=3,out_channels=12,kernel_size=3,stride=1,padding=1)
        #(256,12,150,150)
        self.bn1=nn.BatchNorm2d(num_features=12)
        self.relu1=nn.ReLU()
        
        self.pool=nn.MaxPool2d(kernel_size=2)
        #(256,12,75,75)
        
        
        self.conv2=nn.Conv2d(in_channels=12,out_channels=20,kernel_size=3,stride=1,padding=1)
        #Shape= (256,20,75,75)
        self.relu2=nn.ReLU()        
        
        self.conv3=nn.Conv2d(in_channels=20,out_channels=32,kernel_size=3,stride=1,padding=1)
        #Shape= (256,32,75,75)
        self.bn3=nn.BatchNorm2d(num_features=32)
        self.relu3=nn.ReLU()
        
        
        self.fc=nn.Linear(in_features=75 * 75 * 32, out_features=num_classes)
        
        
        
    #forwad pass
        
    def forward(self,input):
        output = self.conv1(input)
        output = self.bn1(output)
        output = self.relu1(output)
            
        output = self.pool(output)
            
        output = self.conv2(output)
        output = self.relu2(output)
            
        output = self.conv3(output)
        output = self.bn3(output)
        output = self.relu3(output)
            
        output=output.view(-1,32*75*75)
            
        output=self.fc(output)
            
        return output


In [9]:
model=ConvNet(num_classes=6).to(device)

In [10]:
optimizer=Adam(model.parameters(),lr=0.001,weight_decay=0.0001)
loss_function=nn.CrossEntropyLoss()


In [11]:
num_epochs=10

In [15]:
# calculating size of train and test datasets
train_num = len(glob.glob(train_path+'/**/*.png'))
test_num = len(glob.glob(test_path+'/**/*.png'))

47

In [16]:
print(train_count,test_count)

454 47


In [21]:
#Model training and saving best model

best_accuracy = 0.0

for epoch in range(num_epochs):
    
    #Evaluation and training on training dataset
    model.train()
    train_accuracy = 0.0
    train_loss = 0.0
    
    for i, (images,labels) in enumerate(train_loader):
        if torch.cuda.is_available():
            images = Variable(images.cuda())
            labels = Variable(labels.cuda())
            
        # zero gradients    
        optimizer.zero_grad()
        
        outputs = model(images)
        loss = loss_function(outputs,labels)
        
        # backward pass
        loss.backward()
        optimizer.step()
        
        
        train_loss += loss.cpu().data*images.size(0)
        _,prediction =torch.max(outputs.data,1)
        
        train_accuracy += int(torch.sum(prediction == labels.data))
        
    train_accuracy = train_accuracy/train_count
    train_loss = train_loss/train_count
    
    
    # Evaluation on testing dataset
    model.eval()
    
    test_accuracy=0.0
    for i, (images,labels) in enumerate(test_loader):
        if torch.cuda.is_available():
            images = Variable(images.cuda())
            labels = Variable(labels.cuda())
            
        outputs = model(images)
        _,prediction = torch.max(outputs.data,1)
        test_accuracy += int(torch.sum(prediction == labels.data))
    
    test_accuracy = test_accuracy/test_count
    
    
    print('Epoch: ' + str(epoch) + ' Train Loss: ' + str(train_loss) + ' Train Accuracy: ' + \
          str(train_accuracy)+' Test Accuracy: '+str(test_accuracy))
    
    #Save the best model
    if test_accuracy > best_accuracy:
        torch.save(model.state_dict(),'best_checkpoint.model')
        best_accuracy = test_accuracy

Epoch: 0 Train Loss: tensor(0.1337) Train Accuracy: 0.9713656387665198 Test Accuracy: 0.46808510638297873
Epoch: 1 Train Loss: tensor(0.0356) Train Accuracy: 0.986784140969163 Test Accuracy: 0.48936170212765956
Epoch: 2 Train Loss: tensor(0.0184) Train Accuracy: 0.9933920704845814 Test Accuracy: 0.46808510638297873
Epoch: 3 Train Loss: tensor(0.0100) Train Accuracy: 0.9977973568281938 Test Accuracy: 0.44680851063829785
Epoch: 4 Train Loss: tensor(0.0645) Train Accuracy: 0.9713656387665198 Test Accuracy: 0.46808510638297873
Epoch: 5 Train Loss: tensor(0.0432) Train Accuracy: 0.9779735682819384 Test Accuracy: 0.44680851063829785
Epoch: 6 Train Loss: tensor(0.0549) Train Accuracy: 0.986784140969163 Test Accuracy: 0.44680851063829785
Epoch: 7 Train Loss: tensor(0.0608) Train Accuracy: 0.973568281938326 Test Accuracy: 0.48936170212765956
Epoch: 8 Train Loss: tensor(0.0590) Train Accuracy: 0.9757709251101322 Test Accuracy: 0.46808510638297873
Epoch: 9 Train Loss: tensor(0.0199) Train Accurac

 Here accuracy is not the best metric because of imbalanced dataset.
 
 With oversampling we can handle imbalanced classes though there's risk of overfitting.
 
 Adjusting a loss function is also a good idea (paper 'Focal Loss for Dense Object Detection', https://arxiv.org/pdf/1708.02002.pdf).
 
 Mainly, we need more data for getting good predictions. 
 
 Augmentation also will increase model's performance.