In [28]:
import os
import torch
import torch.nn as nn
import numpy as np
import torch.optim as optim

from torchvision import transforms, datasets, models
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data import Dataset, DataLoader

import numpy as np
import torch
from torchvision import datasets, transforms
import torch.nn.functional as F

import time
import random

In [29]:
train_on_gpu = torch.cuda.is_available()

if not train_on_gpu:
    print('CUDA is not available.  Training on CPU ...')
else:
    print('CUDA is available!  Training on GPU ...')

CUDA is available!  Training on GPU ...


In [30]:
image_path = "/kaggle/input/asl-alphabet/asl_alphabet_train/asl_alphabet_train"

train_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.Resize(size=(128, 128)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

test_transform = transform = transforms.Compose([
    transforms.Resize(size=(128, 128)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])  


class CustomDataset(Dataset):
    def __init__(self, data_dir, split_ratio=0.8, transform=None, train=True):
        self.data_dir = data_dir
        self.split_ratio = split_ratio
        self.transform = transform
        
        self.dataset = datasets.ImageFolder(root=data_dir, transform=None)
        self.classes = self.dataset.classes 
        
        
        indices = list(range(len(self.dataset)))
        random.shuffle(indices)

        
        split_index = int(self.split_ratio * len(indices))
        if train:
            self.indices = indices[:split_index]
        else:
            self.indices = indices[split_index:]

    def __len__(self):
        return len(self.indices)

    def __getitem__(self, idx):
        idx_in_dataset = self.indices[idx]
        image, label = self.dataset[idx_in_dataset]
        
        if self.transform:
            image = self.transform(image)
        
        return image, label



In [31]:
train_dataset = CustomDataset(data_dir=image_path, transform=train_transform, train=True)
test_dataset = CustomDataset(data_dir=image_path, transform=test_transform, train=False)


batch_size = 64

valid_size = 0.2


num_train = len(train_dataset)
indices = list(range(num_train))
np.random.shuffle(indices)
split_val = int(np.floor(valid_size * num_train))

valid_idx, train_idx = indices[:split_val], indices[split_val:]

In [32]:
train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)


train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size,
    sampler=train_sampler)
valid_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, 
    sampler=valid_sampler)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=64)

# CLASSIFICATION USING CUSTOM CNN

In [33]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        
        self.conv1 = nn.Conv2d(3, 6, kernel_size=7)
        self.conv2 = nn.Conv2d(6, 30, kernel_size=7)
        self.conv3 = nn.Conv2d(30, 120, kernel_size=7)
        self.conv4 = nn.Conv2d(120, 120, kernel_size=7)
        
        self.sub = nn.MaxPool2d(2, 2)
        
        
        self.fc1 = nn.Linear(120*4*4, 1024)
        self.fc2 = nn.Linear(1024, 512)
        self.fc3 = nn.Linear(512, 128)
        self.fc4 = nn.Linear(128, 29)
        self.dropout = nn.Dropout(0.4)
        self.batch_norm = nn.BatchNorm1d(512)
        
        
    def forward(self, x):
        
        x = self.sub(F.relu(self.conv1(x)))
        x = self.sub(F.relu(self.conv2(x)))
        x = self.sub(F.relu(self.conv3(x)))
        x = F.relu(self.conv4(x))
        
        x = x.view(-1, 120*4*4)
            
        
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        
        x = self.dropout(x)
        x = F.relu(self.fc3(x))
        x = self.dropout(x)
        x = self.fc4(x)
        
        x = F.log_softmax(x, dim=1)
            
        return x

In [43]:
model

CNN(
  (conv1): Conv2d(3, 6, kernel_size=(7, 7), stride=(1, 1))
  (conv2): Conv2d(6, 30, kernel_size=(7, 7), stride=(1, 1))
  (conv3): Conv2d(30, 120, kernel_size=(7, 7), stride=(1, 1))
  (conv4): Conv2d(120, 120, kernel_size=(7, 7), stride=(1, 1))
  (sub): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=1920, out_features=1024, bias=True)
  (fc2): Linear(in_features=1024, out_features=512, bias=True)
  (fc3): Linear(in_features=512, out_features=128, bias=True)
  (fc4): Linear(in_features=128, out_features=29, bias=True)
  (dropout): Dropout(p=0.4, inplace=False)
  (batch_norm): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)

In [34]:
model = CNN()

if torch.cuda.is_available():
    model.cuda()
    
criterion = nn.CrossEntropyLoss()

optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)

In [35]:
n_epochs = 5
start_time = time.time()
valid_loss_min = np.Inf 

for epoch in range(1, n_epochs+1):
    
    train_loss = 0.0
    valid_loss = 0.0
    
    
    model.train()
    for data, target in train_loader:
        
        if train_on_gpu:
            data, target = data.cuda(), target.cuda()
        
        optimizer.zero_grad()
        
        output = model(data)
        
        loss = criterion(output, target)
        
        loss.backward()
        
        optimizer.step()
       
        train_loss += loss.item()*data.size(0)
        

    model.eval()
    for data, target in valid_loader:
        
        if train_on_gpu:
            data, target = data.cuda(), target.cuda()
        
        output = model(data)
        
        loss = criterion(output, target)
       
        valid_loss += loss.item()*data.size(0)
    
    
    train_loss = train_loss/len(train_loader.sampler)
    valid_loss = valid_loss/len(valid_loader.sampler)
        
    v_loss.append(train_loss)
    t_loss.append(valid_loss)
    print(f'Epoch: {epoch} \tTraining Loss: {train_loss} \tValidation Loss: {valid_loss}')
    
    
    if valid_loss <= valid_loss_min:
        print(f'Training loss decreased ({valid_loss_min} --> {valid_loss}).  Saving model ...')
        torch.save(model.state_dict(), 'model.pt')
        valid_loss_min = valid_loss
        
        
print("Training time: %s seconds ---" % (time.time() - start_time))

Epoch: 1 	Training Loss: 3.2824165631984843 	Validation Loss: 2.8745576765345433
Validation loss decreased (inf --> 2.8745576765345433).  Saving model ...
Epoch: 2 	Training Loss: 2.104143702161723 	Validation Loss: 1.1162338819997064
Validation loss decreased (2.8745576765345433 --> 1.1162338819997064).  Saving model ...
Epoch: 3 	Training Loss: 0.9509252579732873 	Validation Loss: 0.47591181020627077
Validation loss decreased (1.1162338819997064 --> 0.47591181020627077).  Saving model ...
Epoch: 4 	Training Loss: 0.5229015865887718 	Validation Loss: 0.2624741597422238
Validation loss decreased (0.47591181020627077 --> 0.2624741597422238).  Saving model ...
Epoch: 5 	Training Loss: 0.327110498501309 	Validation Loss: 0.18846143037937155
Validation loss decreased (0.2624741597422238 --> 0.18846143037937155).  Saving model ...
Training time: 975.191657781601 seconds ---


In [38]:
model.load_state_dict(torch.load('model.pt'))

<All keys matched successfully>

In [50]:

test_loss = 0.0
correct = 0.0

pred = []
true = []
model.eval()
with torch.no_grad():
    for data, target in test_loader:

        if train_on_gpu:
            data, target = data.cuda(), target.cuda()

        output = model(data)

        test_loss += F.nll_loss(output, target, reduction='sum').item() 
        preds = output.argmax(dim=1, keepdim=True) 
        correct += preds.eq(target.view_as(preds)).sum().item()
        
        

        preds = preds.cpu().numpy()
        target = target.cpu().numpy()
        preds = np.reshape(preds,(len(preds),1))
        target = np.reshape(target,(len(preds),1))
        data = data.cpu().numpy()
        for i in range(len(preds)):
            pred.append(preds[i])
            true.append(target[i])


test_loss /= len(test_loader.dataset)

print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    


Test set: Average loss: 0.1631, Accuracy: 16500.0/17400 (95%)



In [51]:
from sklearn import metrics

precision = metrics.precision_score(true,pred,average='macro')
recall = metrics.recall_score(true,pred,average='macro')
f1_score = metrics.f1_score(true,pred,average='macro')

print(f'Precision: {precision*100} Recall: {recall*100} f1_score: {f1_score*100}')



Precision: 94.96083962002571 Recall: 94.77833782682039 f1_score: 94.79495534871947
