In [8]:
#imports
import os
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
from torchvision.transforms import ToTensor, Normalize
import torchvision.datasets as datasets
import torchvision.transforms as transforms

from tqdm import tqdm as tqdm
import torch.optim.lr_scheduler as lr_scheduler

In [9]:
TRAIN_DIRECTORY = 'E:/Bangla Sign Alphabet/Bangla Sign Alphabet Data/Isharalipi_augmentated/Isharalipi_augmentated_split/train'
VAL_DIRECTORY = 'E:/Bangla Sign Alphabet/Bangla Sign Alphabet Data/Isharalipi_augmentated/Isharalipi_augmentated_split/val'
TEST_DIRECTORY = 'E:/Bangla Sign Alphabet/Bangla Sign Alphabet Data/Isharalipi_original/Isharalipi_original_main'

In [10]:
print(os.listdir(TRAIN_DIRECTORY))

['10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44']


In [11]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [12]:
# Hyper-parameters 
num_epochs = 15
batch_size = 100
learning_rate = 0.01

In [13]:
img_transform = transforms.Compose([transforms.Resize((64,64)), transforms.ToTensor()])
train_data = ImageFolder(TRAIN_DIRECTORY, transform=img_transform)
val_data = ImageFolder(VAL_DIRECTORY, transform=img_transform)
test_data = ImageFolder(TEST_DIRECTORY, transform=img_transform)

In [14]:
print(len(train_data))
print(len(val_data))
print(len(test_data))

18968
2353
978


In [15]:
train_dl = DataLoader(train_data, batch_size, shuffle=True, num_workers=4, pin_memory=True)
test_dl = DataLoader(test_data, batch_size*2, num_workers=4, pin_memory=True)
val_dl = DataLoader(val_data, batch_size*2,shuffle=True, num_workers=4, pin_memory=True)

In [16]:
#load pretrained model
import torchvision.models as models

# model = models.resnet18(pretrained=True)
# model = models.alexnet(pretrained=True)
# model = models.squeezenet1_0(pretrained=True)
# model = models.vgg16(pretrained=True)
# model = models.densenet161(pretrained=True)
# model = models.inception_v3(pretrained=True)
# model = models.googlenet(pretrained=True)
# model = models.shufflenet_v2_x1_0(pretrained=True)
model = models.mobilenet_v2(pretrained=False)
# model = models.resnext50_32x4d(pretrained=True)
# model = models.wide_resnet50_2(pretrained=True)
# model = models.mnasnet1_0(pretrained=True)

#in_features
# resnet18 = 1000
# alexnet = 256*6*6
# squeezenet1_0 = 1000
vgg16 = 512*7*7
# densenet161
# inception_v3
# googlenet = 1000
# shufflenet_v2_x1_0 = 1000
mobilenet_v2 = 1280
# resnext50_32x4d = 1000
# wide_resnet50_2 = 1000
# mnasnet1_0 = 1000

In [17]:
for param in model.parameters():
    param.requires_grad = True

model.classifier = nn.Sequential(nn.Linear(in_features=mobilenet_v2, out_features=256, bias=True),
                                 nn.ReLU(inplace=True),
                                 nn.BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True),
                                 nn.Linear(in_features=256, out_features=128, bias=True),
                                 nn.ReLU(inplace=True),
                                 nn.BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True),
                                 nn.Linear(in_features=128, out_features=35, bias=True))


model.to(device)

MobileNetV2(
  (features): Sequential(
    (0): ConvBNReLU(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): ConvBNReLU(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): ConvBNReLU(
          (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=Tr

In [18]:
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [19]:
n_total_steps = len(train_dl)
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_dl):
    
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i+1) % 100 == 0:
            print (f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{n_total_steps}], Loss: {loss.item():.4f}')

print('Finished Training')

Epoch [1/15], Step [100/190], Loss: 3.3782
Epoch [2/15], Step [100/190], Loss: 2.0790
Epoch [3/15], Step [100/190], Loss: 1.1044
Epoch [4/15], Step [100/190], Loss: 0.6029
Epoch [5/15], Step [100/190], Loss: 0.3002
Epoch [6/15], Step [100/190], Loss: 0.2222
Epoch [7/15], Step [100/190], Loss: 0.1087
Epoch [8/15], Step [100/190], Loss: 0.2223
Epoch [9/15], Step [100/190], Loss: 0.0604
Epoch [10/15], Step [100/190], Loss: 0.0426
Epoch [11/15], Step [100/190], Loss: 0.0440
Epoch [12/15], Step [100/190], Loss: 0.0198
Epoch [13/15], Step [100/190], Loss: 0.0248
Epoch [14/15], Step [100/190], Loss: 0.0159
Epoch [15/15], Step [100/190], Loss: 0.0247
Finished Training


In [20]:
def check_acc(dl):
    n_correct = 0
    n_samples = 0
    model.eval()

    with torch.no_grad():
        for images, labels in dl:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            # max returns (value ,index)
            _, predicted = torch.max(outputs, 1)

            n_correct += (predicted == labels).sum()
            n_samples += predicted.size(0)

        print(f"acc : Got {n_correct} / {n_samples} with accuracy {float(n_correct)/float(n_samples)*100:.2f}")

In [21]:
check_acc(train_dl)

acc : Got 18968 / 18968 with accuracy 100.00


In [22]:
check_acc(test_dl)

acc : Got 2323 / 2353 with accuracy 98.73


In [23]:
check_acc(val_dl)

acc : Got 976 / 978 with accuracy 99.80
