In [1]:
#Import Libraries
from torchvision import models, transforms, datasets
import torch
from torch import nn
import matplotlib.pyplot as plt

In [2]:
#!pip install torchvision
#!pip install torch

In [3]:
train_data_path  = r'.\archive\asl_alphabet_train\asl_alphabet_train'

In [4]:
#Defining Transformations on the dataset. 
#Resizing the images 224(Input shape) and converting the pixel array to tensors and normalizing them
#Values refered from ImageNET
#We are normalizing the 3 channels seperately
train_transforms = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

test_transforms = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

In [5]:
#Loading the dataset using ImageFolder

train_dataset = datasets.ImageFolder(train_data_path, transform=train_transforms)
val_dataset = datasets.ImageFolder(train_data_path, transform=test_transforms)

In [6]:
#Splitting the data for training and validation
torch.manual_seed(1)
# num_train_samples = len(train_dataset)
num_train_samples = 20000

val_split = 0.2
split = int(num_train_samples * val_split)
indices = torch.randperm(num_train_samples)


train_subset = torch.utils.data.Subset(train_dataset, indices[split:])
val_subset = torch.utils.data.Subset(val_dataset, indices[:split])

len(train_subset), len(val_subset)

(16000, 4000)

In [7]:
batch_size = 32

train_dataloader = torch.utils.data.DataLoader(
    dataset=train_subset, 
    batch_size=batch_size,
    shuffle=True
)

val_dataloader = torch.utils.data.DataLoader(
    dataset=val_subset,
    batch_size=batch_size,
    shuffle=False
)

In [8]:
classes = train_dataloader.dataset.dataset.classes
classes

['A',
 'B',
 'C',
 'D',
 'E',
 'F',
 'G',
 'H',
 'I',
 'J',
 'K',
 'L',
 'M',
 'N',
 'O',
 'P',
 'Q',
 'R',
 'S',
 'T',
 'U',
 'V',
 'W',
 'X',
 'Y',
 'Z',
 'del',
 'nothing',
 'space']

In [9]:
resnet = models.resnet50(pretrained=True)

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to C:\Users\lakram9u/.cache\torch\hub\checkpoints\resnet50-0676ba61.pth
100%|█████████████████████████████████████████████████████████████████████████████| 97.8M/97.8M [00:38<00:00, 2.67MB/s]


In [10]:
#Freezing the model weights. only the dense layer that is built on top of resnet will be put for training
for param in resnet.parameters():
    param.requires_grad = False

In [12]:
#Top layer
in_features = resnet.fc.in_features
fc = nn.Linear(in_features=in_features, out_features=len(classes)) #len(classes) = 29
resnet.fc = fc

In [14]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cpu'

In [15]:
#Training loop
from time import time
from tqdm import tqdm


def train(model,criterion,optimizer,train_dataloader,test_dataloader,print_every,num_epoch):
    steps = 0
    train_losses, val_losses = [], []

    model.to(device)
    for epoch in tqdm(range(num_epoch)):
        running_loss = 0
        correct_train = 0
        total_train = 0
        start_time = time()
        iter_time = time()
        
        model.train()
        for i, (images, labels) in enumerate(train_dataloader):
            steps += 1
            images = images.to(device)
            labels = labels.to(device)

            # Forward pass
            output = model(images)
            loss = criterion(output, labels)

            correct_train += (torch.max(output, dim=1)[1] == labels).sum()
            total_train += labels.size(0)

            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

            # Logging
            if steps % print_every == 0:
                print(f'Epoch [{epoch + 1}]/[{num_epoch}]. Batch [{i + 1}]/[{len(train_dataloader)}].', end=' ')
                print(f'Train loss {running_loss / steps:.3f}.', end=' ')
                print(f'Train acc {correct_train / total_train * 100:.3f}.', end=' ')
                with torch.no_grad():
                    model.eval()
                    correct_val, total_val = 0, 0
                    val_loss = 0
                    for images, labels in test_dataloader:
                        images = images.to(device)
                        labels = labels.to(device)
                        output = model(images)
                        loss = criterion(output, labels)
                        val_loss += loss.item()

                        correct_val += (torch.max(output, dim=1)[1] == labels).sum()
                        total_val += labels.size(0)

                print(f'Val loss {val_loss / len(test_dataloader):.3f}. Val acc {correct_val / total_val * 100:.3f}.', end=' ')
                print(f'Took {time() - iter_time:.3f} seconds')
                iter_time = time()

                train_losses.append(running_loss / total_train)
                val_losses.append(val_loss / total_val)


        print(f'Epoch took {time() - start_time}') 
        torch.save(model, f'checkpoint_{correct_val / total_val * 100:.2f}')
        
    return model, train_losses, val_losses

In [None]:
print_every = 50
num_epoch = 1

params_to_update = []
for name, param in resnet.named_parameters():
    if param.requires_grad == True:
        params_to_update.append(param)

        
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params_to_update, lr=0.001)

resnet, train_losses, val_losses = train(
    model=resnet,
    criterion=criterion,
    optimizer=optimizer,
    train_dataloader=train_dataloader,
    test_dataloader=val_dataloader,
    print_every=print_every,
    num_epoch=num_epoch
)

  0%|                                                                                            | 0/1 [00:00<?, ?it/s]

Epoch [1]/[1]. Batch [50]/[500]. Train loss 1.450. Train acc 51.312. Val loss 0.956. Val acc 76.850. Took 988.596 seconds
Epoch [1]/[1]. Batch [100]/[500]. Train loss 1.069. Train acc 69.594. Val loss 0.540. Val acc 91.425. Took 889.153 seconds
Epoch [1]/[1]. Batch [150]/[500]. Train loss 0.857. Train acc 77.812. Val loss 0.393. Val acc 93.600. Took 843.598 seconds
Epoch [1]/[1]. Batch [200]/[500]. Train loss 0.726. Train acc 82.031. Val loss 0.326. Val acc 94.025. Took 832.060 seconds
Epoch [1]/[1]. Batch [250]/[500]. Train loss 0.638. Train acc 84.750. Val loss 0.274. Val acc 95.725. Took 824.553 seconds
Epoch [1]/[1]. Batch [300]/[500]. Train loss 0.573. Train acc 86.552. Val loss 0.225. Val acc 96.375. Took 816.246 seconds
Epoch [1]/[1]. Batch [350]/[500]. Train loss 0.522. Train acc 87.946. Val loss 0.205. Val acc 96.250. Took 830.403 seconds
Epoch [1]/[1]. Batch [400]/[500]. Train loss 0.479. Train acc 89.039. Val loss 0.193. Val acc 96.375. Took 808.543 seconds
Epoch [1]/[1]. Ba

In [None]:
#Visualization
plt.plot(train_losses, label='Training loss')
plt.plot(val_losses, label='Validation loss')
plt.legend(frameon=False)
plt.show()

In [None]:
from pathlib import Path
from PIL import Image

#Model Evaluation

test_data_path = Path(r'C:\Users\lakram9u\Downloads\archive\asl_alphabet_test\asl_alphabet_test')


class ASLTestDataset(torch.utils.data.Dataset):
    def __init__(self, root_path, transforms=None):
        super().__init__()
        
        self.transforms = transforms
        self.imgs = sorted(list(Path(root_path).glob('*.jpg')))
        
    def __len__(self):
        return len(self.imgs)
    
    def __getitem__(self, idx):
        img_path = self.imgs[idx]
        img = Image.open(img_path).convert('RGB')
        
        label = img_path.parts[-1].split('_')[0]
        if self.transforms:
            img = self.transforms(img)
        
        return img, label

In [None]:
test_dataset = ASLTestDataset(test_data_path, transforms=test_transforms)

columns = 7
row = round(len(test_dataset) / columns)

fig, ax = plt.subplots(row, columns, figsize=(columns * row, row * columns))
plt.subplots_adjust(wspace=0.1, hspace=0.2)

i, j = 0, 0
for img, label in test_dataset:
    img = torch.Tensor(img)
    img = img.to(device)
    resnet.eval()
    prediction = resnet(img[None]) #predicting 

    #plotting
    ax[i][j].imshow(img.cpu().permute(1, 2, 0))
    ax[i][j].set_title(f'GT {label}. Pred {classes[torch.max(prediction, dim=1)[1]]}')
    ax[i][j].axis('off')
    j += 1
    if j == columns:
        j = 0
        i += 1
        
plt.show()