In [1]:
#imports
import os
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
from torchvision.transforms import ToTensor, Normalize
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torch.optim.lr_scheduler as lr_scheduler
from torch.nn import Parameter
import math

In [2]:
#directories
BDSL_TRAIN = 'E:/Bangla Sign Alphabet/Bangla Sign Alphabet Data/BDSL/BDSL_split/train'
BDSL_VAL = 'E:/Bangla Sign Alphabet/Bangla Sign Alphabet Data/BDSL/BDSL_split/val'
BDSL_TEST = 'E:/Bangla Sign Alphabet/Bangla Sign Alphabet Data/BDSL/BDSL_split/test'

ISHARA_AUGMENTED_TRAIN = 'E:/Bangla Sign Alphabet/Bangla Sign Alphabet Data/Isharalipi_augmentated/Isharalipi_augmentated_split/train'
ISHARA_AUGMENTED_VAL = 'E:/Bangla Sign Alphabet/Bangla Sign Alphabet Data/Isharalipi_augmentated/Isharalipi_augmentated_split/val'
ISHARA_AUGMENTED_TEST = 'E:/Bangla Sign Alphabet/Bangla Sign Alphabet Data/Isharalipi_augmentated/Isharalipi_augmentated_split/test'

ISHARA_MAIN = 'E:/Bangla Sign Alphabet/Bangla Sign Alphabet Data/Isharalipi_original/Isharalipi_original_main'

In [3]:
TRAIN_DIRECTORY = ISHARA_AUGMENTED_TRAIN
VAL_DIRECTORY = ISHARA_AUGMENTED_VAL
TEST_DIRECTORY = BDSL_TEST

In [4]:
print(os.listdir(TRAIN_DIRECTORY))

['10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44']


In [5]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [6]:
# Hyper-parameters 
num_epochs = 35
batch_size = 100
learning_rate = 0.01

In [7]:
img_transform = transforms.Compose([transforms.Resize((64,64)), transforms.ToTensor()])

train_data = ImageFolder(TRAIN_DIRECTORY, transform=img_transform)
val_data = ImageFolder(VAL_DIRECTORY, transform=img_transform)
test_data = ImageFolder(TEST_DIRECTORY, transform=img_transform)

In [8]:
print(len(train_data))
print(len(val_data))
print(len(test_data))

18968
2353
2416


In [9]:
train_dl = DataLoader(train_data, batch_size, shuffle=True, num_workers=4, pin_memory=True)
test_dl = DataLoader(test_data, batch_size*2, num_workers=4, pin_memory=True)
val_dl = DataLoader(val_data, batch_size*2,shuffle=True, num_workers=4, pin_memory=True)

In [10]:
img, label = train_data[5]
print(img.shape, label)

torch.Size([3, 64, 64]) 0


In [11]:
img, label = test_data[5]
print(img.shape, label)

torch.Size([3, 64, 64]) 0


In [12]:
#load pretrained model
import torchvision.models as models

# model = models.resnet18(pretrained=True)
# model = models.alexnet(pretrained=True)
# model = models.squeezenet1_0(pretrained=True)
model = models.vgg16(pretrained=False)
# model = models.densenet161(pretrained=True)
# model = models.inception_v3(pretrained=True)
# model = models.googlenet(pretrained=True)
# model = models.shufflenet_v2_x1_0(pretrained=False)
# model = models.mobilenet_v2(pretrained=False)
# model = models.resnext50_32x4d(pretrained=True)
# model = models.wide_resnet50_2(pretrained=True)
# model = models.mnasnet1_0(pretrained=True)

#in_features
resnet18 = 1000
# alexnet = 256*6*6
# squeezenet1_0 = 1000
vgg16 = 512*7*7
# densenet161
# inception_v3
googlenet = 1000
shufflenet_v2_x1_0 = 1000
mobilenet_v2 = 1280
# resnext50_32x4d = 1000
# wide_resnet50_2 = 1000
# mnasnet1_0 = 1000

In [13]:
for param in model.parameters():
    param.requires_grad = True

model.classifier = nn.Sequential(nn.Linear(in_features=vgg16, out_features=256, bias=True),
                                 nn.ReLU(inplace=True),
                                 nn.BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True),
                                 nn.Linear(in_features=256, out_features=128, bias=True),
                                 )

model.to(device)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [14]:
def l2_norm(input, axis = 1):
    norm = torch.norm(input, 2, axis, True)
    output = torch.div(input, norm)

    return output

In [15]:
class Arcface(nn.Module):
    def __init__(self, embedding_size=128, classnum=35,  s=1, m=0.3):
        super(Arcface, self).__init__()
        self.classnum = classnum
        self.kernel = Parameter(torch.FloatTensor(embedding_size,classnum), requires_grad=True)
        # initial kernel
        self.kernel.data.uniform_(-1, 1).renorm_(2,1,1e-5).mul_(1e5)
        self.m = m # the margin value, default is 0.5
        self.s = s # scalar value default is 64, see normface https://a...content-available-to-author-only...v.org/abs/1704.06369
        self.cos_m = math.cos(m)
        self.sin_m = math.sin(m)
    def forward(self, embbedings, label):
        # weights norm
        nB = len(embbedings)
        kernel_norm = l2_norm(self.kernel,axis=0)
        # cos(theta) using dot product
        cos_theta = torch.mm(embbedings,kernel_norm)
        cos_theta = cos_theta.clamp(-1,1) # for numerical stability
        #Sqrt(1 - cos^2(theta))
        sin_theta = torch.sqrt(torch.clamp((1.0 - torch.pow(cos_theta, 2)),1e-9,1))
        #cos(theta+m) = cos(theta)*cos(m) - sin(theta)*sin(m)
        cos_theta_m = (cos_theta * self.cos_m - sin_theta * self.sin_m)
        output = cos_theta * 1.0 # a little bit hacky way to prevent in_place operation on cos_theta
        idx_ = torch.arange(0, nB, dtype=torch.long)
        output[idx_, label] = cos_theta_m[idx_, label] 
        output *= self.s #scale up in order to make softmax work, first introduced in normface
        return output

In [16]:
metric_fc = Arcface().to(device)
criterion = torch.nn.CrossEntropyLoss()

# optimzer nn
optimizer_nn = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=0.005)

# optimzer arcface
optimzer_arcface = torch.optim.SGD(metric_fc.parameters(), lr=0.01)

In [17]:
n_total_steps = len(train_dl)
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_dl):
        # origin shape: [4, 3, 32, 32] = 4, 3, 1024
        # input_layer: 3 input channels, 6 output channels, 5 kernel size
        images = images.to(device)
        labels = labels.to(device)
        
        # set opt
        optimizer_nn.zero_grad()
        optimzer_arcface.zero_grad()

        # Forward pass
        outputs = model(images.float())
        outputs = metric_fc(outputs, labels)
        loss = criterion(outputs, labels)

        # Backward and optimize
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)
        optimizer_nn.step()
        optimzer_arcface.step()
        
        #optimizer.zero_grad()
        #loss.backward()
        #optimizer.step()

        if (i+1) % 100 == 0:
            print (f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{n_total_steps}], Loss: {loss.item():.4f}')

print('Finished Training')

Epoch [1/35], Step [100/190], Loss: 3.0543
Epoch [2/35], Step [100/190], Loss: 2.8097
Epoch [3/35], Step [100/190], Loss: 2.7473
Epoch [4/35], Step [100/190], Loss: 2.6598
Epoch [5/35], Step [100/190], Loss: 2.6261
Epoch [6/35], Step [100/190], Loss: 2.5946
Epoch [7/35], Step [100/190], Loss: 2.5907
Epoch [8/35], Step [100/190], Loss: 2.5644
Epoch [9/35], Step [100/190], Loss: 2.5096
Epoch [10/35], Step [100/190], Loss: 2.4883
Epoch [11/35], Step [100/190], Loss: 2.4671
Epoch [12/35], Step [100/190], Loss: 2.4804
Epoch [13/35], Step [100/190], Loss: 2.4348
Epoch [14/35], Step [100/190], Loss: 2.4409
Epoch [15/35], Step [100/190], Loss: 2.4501
Epoch [16/35], Step [100/190], Loss: 2.4000
Epoch [17/35], Step [100/190], Loss: 2.4354
Epoch [18/35], Step [100/190], Loss: 2.3916
Epoch [19/35], Step [100/190], Loss: 2.3673
Epoch [20/35], Step [100/190], Loss: 2.4028
Epoch [21/35], Step [100/190], Loss: 2.3728
Epoch [22/35], Step [100/190], Loss: 2.3706
Epoch [23/35], Step [100/190], Loss: 2.35

In [18]:
def test(model, metric_fc, criterion, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, labels in test_loader:
            data, labels = data.to(device), labels.to(device)
            features3d = model(data.float())
            
            output = metric_fc(features3d, labels)
            test_loss += criterion(output, labels)
            # test_loss += centerLoss(output, target, device, features3d)
            pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability
            correct += pred.eq(labels.view_as(pred)).sum().item()
    test_loss /= len(test_loader.dataset)

    print('\nResult: Average loss: {}, Accuracy: {}/{} ({}%)\n'.format(
        str(test_loss), str(correct), str(len(test_loader.dataset)),
        str(100. * correct / len(test_loader.dataset))))

In [19]:
test(model, metric_fc, criterion, device, train_dl)


Result: Average loss: tensor(0.0227, device='cuda:0'), Accuracy: 10793/18968 (56.90109658371995%)



In [20]:
test(model, metric_fc, criterion, device, val_dl)


Result: Average loss: tensor(0.0116, device='cuda:0'), Accuracy: 1337/2353 (56.82107947301318%)



In [21]:
test(model, metric_fc, criterion, device, test_dl)


Result: Average loss: tensor(0.0175, device='cuda:0'), Accuracy: 229/2416 (9.478476821192054%)

