In [1]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import random_split
from torch.utils.data import DataLoader, Dataset
from torch.utils.data import random_split
from torchvision import datasets, transforms 
from pytorch_lightning import LightningModule
import pytorch_lightning as pl
%matplotlib inline
from sklearn.metrics import classification_report
from PIL import Image


  warn(f"Failed to load image Python extension: {e}")


# Load data 
Load data from the folders. The output will be a list of tuples, containing the path to image and the label. 

In [2]:

# display current directory
print(os.getcwd())

# get Image path
TRAIN_CLASS_PATH = r'../../asl_alphabet_train/asl_alphabet_train'
# TEST_CLASS_PATH = r'../asl_alphabet_test/asl_alphabet_test'

# #TRAIN_CLASS_PATH = r'..\data\asl_alphabet_train\asl_alphabet_train'
# #TEST_CLASS_PATH = r'..\data\asl_alphabet_test\asl_alphabet_test'

# hold class labels 
asl_classes = os.listdir(TRAIN_CLASS_PATH)
trainImages = []
image_label_paths = [] # used for customized dataset 

for i, class_name in enumerate(asl_classes):
    class_path = os.path.join(TRAIN_CLASS_PATH, class_name)
    fnames = os.listdir(class_path)
    for f in fnames:
        f_path = os.path.join(TRAIN_CLASS_PATH,class_name, f)
        trainImages.append(f_path)
        image_label_paths.append((f_path, i))

# # display class names 
print(os.getcwd())
print(asl_classes)
print(len(image_label_paths))
print(image_label_paths[0])

/home/jupyter-suh222/Hand-Gesuture-CV-24/src/model
/home/jupyter-suh222/Hand-Gesuture-CV-24/src/model
['N', 'D', 'P', 'space', 'Z', 'nothing', 'W', 'I', 'C', 'del', 'Y', 'S', 'G', 'M', 'J', 'T', 'V', 'B', 'H', 'E', 'O', 'Q', 'K', 'A', 'U', 'R', 'X', 'L', 'F']
87000
('../../asl_alphabet_train/asl_alphabet_train/N/N478.jpg', 0)


# Data Preprocessing 


In [3]:
# Create the Dataset ibject using torch 

BATCH_SIZE = 32
transform = transforms.Compose([
            transforms.Resize(224),
            transforms.RandomHorizontalFlip(),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                                std=[0.229, 0.224, 0.225])
        ])

class CustomDataset(Dataset):
    def __init__(self, image_label_paths,transform=None):
        # store image path
        self.image_label_paths = image_label_paths
        self.transform = transform
    
    def __len__(self):
        # return the number of total sampels in the dataset
        return len(self.image_label_paths)
    
    def __getitem__(self, index):
        imagePath, label = self.image_label_paths[index]
        
        #image = cv2.imread(imagePath)
        #image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = Image.open(imagePath).convert('RGB')
        if self.transform is not None:
            # apply transoformation 
            image = self.transform(image)
        return image, label


class ImageDataset(LightningModule):
    def __init__(self, image_label_paths, batch_size = BATCH_SIZE):
        super().__init__()
        
        self.image_label_paths = image_label_paths
        self.batch_size = batch_size
        self.transform = transforms.Compose([
            transforms.Resize(224),
            transforms.RandomHorizontalFlip(),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                                std=[0.229, 0.224, 0.225])
        ])
    def __len__(self):
        if self.trainDS is not None:
            return len(self.trainDS)
        elif self.valDS is not None:
            return len(self.valDS)
        else:
            return 0 
    
    def __getitem__(self, index):
        if self.trainDS is not None:
            return self.trainDS[index]
        elif self.test_dataset is not None:
            return self.test_dataset[index]
    
    def setup(self, stage=None):
        DS = CustomDataset(self.image_label_paths, self.transform)
        # setup training and validation set 
        DATA_SIZE = len(DS)
        TRAIN_SIZE = int(0.8*DATA_SIZE)
        VAL_SIZE = DATA_SIZE-TRAIN_SIZE
        self.trainDS, self.valDS = random_split(DS, [TRAIN_SIZE, VAL_SIZE])
    
    def train_dataloader(self):
        return DataLoader(self.trainDS, batch_size=BATCH_SIZE,shuffle=True)
    
    def val_dataloader(self):
        return DataLoader(self.valDS, batch_size=BATCH_SIZE, shuffle=False) 


dataset = ImageDataset(image_label_paths)
dataset.setup() 
train_dataloader = dataset.train_dataloader
val_dataloader = dataset.val_dataloader


In [4]:
class DataModule(pl.LightningDataModule):
    
    def __init__(self, transform=transform, batch_size=16):
        super().__init__()
        self.root_dir = TRAIN_CLASS_PATH
        self.transform = transform
        self.batch_size = batch_size

    def setup(self, stage=None):
        data_set = datasets.ImageFolder(root=self.root_dir, transform=self.transform)
        
        n_data = len(dataset)
        n_train = int(0.8 * n_data)
        n_val = n_data - n_train
        train_dataset, val_dataset =  random_split(dataset, [n_train, n_val])

        self.train_dataset = DataLoader(train_dataset, batch_size=self.batch_size, shuffle=True)
        self.val_dataset = DataLoader(val_dataset, batch_size=self.batch_size)
    
    def train_dataloader(self):
        return self.train_dataset

    def val_dataloader(self):
        return self.val_dataset
    
    def test_dataloader(self):
        return self.test_dataset

# Setup Model


In [5]:
class CNN(LightningModule):
    def __init__(self):
        super(CNN, self).__init__()
        # RGB image with 3 channels, output 6 channels with 3x3 kernel 
        self.conv1 = nn.Conv2d(in_channels=3,out_channels=6,kernel_size=3)
        self.conv2 = nn.Conv2d(in_channels=6,out_channels=12,kernel_size=3)
        
        # set up first fully connected layers, outchanel * image size 
        self.fc1 = nn.Linear(in_features=12*54*54, out_features=120)
        self.fc2 = nn.Linear(in_features=120, out_features=84)
        self.fc3 = nn.Linear(in_features=84, out_features=20)
        self.fc4 = nn.Linear(in_features=20, out_features= len(asl_classes))
        
    def forward(self, x):
        # apply first convolutin layer by ReLu
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2, 2)
        
        # second convltn. layer
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2, 2)
        
        # flatten
        x = x.view(-1, 12*54*54)
        
        # activation
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        
        # applies the log softmax function
        output = F.log_softmax(x, dim=1)
        return output
    
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
        return optimizer
    
    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = F.cross_entropy(y_hat, y)
        pred = y_hat.argmax(dim=1, keepdim=True)
        acc = pred.eq(y.view_as(pred)).sum().item() / y.shape[0]
        self.log("train_loss", loss)
        self.log("train_acc", acc)
        return loss

    def validation_step(self, val_batch, batch_idx):
        x, y = val_batch
        y_hat = self(x)
        loss = F.cross_entropy(y_hat, y)
        pred = y_hat.argmax(dim=1, keepdim=True)
        acc = pred.eq(y.view_as(pred)).sum().item() / y.shape[0]
        self.log("val_loss", loss)
        self.log("val_acc", acc)
        
    def test_step(self, test_batch, batch_idx):
        x, y = test_batch
        y_hat = self(x)
        loss = F.cross_entropy(y_hat, y)
        pred = y_hat.argmax(dim=1, keepdim=True)
        acc = pred.eq(y.view_as(pred)).sum().item() / y.shape[0]
        self.log("test_loss", loss)
        self.log("test_acc", acc)

# Training the Model

In [6]:

#test_dataloader = dataset.test_dataloader
datamodule = DataModule()
model = CNN()


In [7]:
trainer = pl.Trainer(max_epochs=10)
trainer.fit(model, datamodule)



Trainer will use only 1 of 4 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=4)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA RTX A5000') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]

  | Name  | Type   | Params
---------------------------------
0 | conv1 | Conv2d | 168   
1 | conv2 | Conv2d | 660   
2 | fc1   | Linear | 4.2 M 
3 | fc2   | Line

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/home/jupyter-suh222/.local/lib/python3.9/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=63` in the `DataLoader` to improve performance.
/home/jupyter-suh222/.local/lib/python3.9/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=63` in the `DataLoader` to improve performance.


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


In [8]:
torch.save(model, 'new_model.pth')

# Evaluation

In [9]:
# cnn = torch.load("model.pth")
val_loader = datamodule.val_dataloader()
trainer.test(dataloaders=val_loader)

Restoring states from the checkpoint path at /home/jupyter-suh222/Hand-Gesuture-CV-24/src/model/lightning_logs/version_9/checkpoints/epoch=9-step=34800.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loaded model weights from the checkpoint at /home/jupyter-suh222/Hand-Gesuture-CV-24/src/model/lightning_logs/version_9/checkpoints/epoch=9-step=34800.ckpt
/home/jupyter-suh222/.local/lib/python3.9/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=63` in the `DataLoader` to improve performance.


Testing: |          | 0/? [00:00<?, ?it/s]

[{'test_loss': 0.1253911256790161, 'test_acc': 0.9617816209793091}]

In [14]:
#
model = model.cuda()
device = torch.device("cuda")   #"cuda:0"

model.eval()
y_true=[]
y_pred=[]
with torch.no_grad():
    print("evaluating")
    for test_data in datamodule.val_dataloader():
        test_images, test_labels = test_data[0].to(device), test_data[1].to(device)
        #test_images, test_labels = test_data[0], test_data[1]
        pred = model(test_images).argmax(dim=1)
        for i in range(len(pred)):
            y_true.append(test_labels[i].item())
            y_pred.append(pred[i].item())



evaluating


NameError: name 'class_names' is not defined

In [15]:
print(classification_report(y_true,y_pred,target_names=asl_classes,digits=4))

              precision    recall  f1-score   support

           N     1.0000    0.9959    0.9979       483
           D     0.9798    0.9878    0.9838       491
           P     0.9644    0.9935    0.9788       464
       space     0.9894    0.9852    0.9873       474
           Z     0.9939    0.9878    0.9908       491
     nothing     0.9939    1.0000    0.9970       491
           W     0.9615    0.9855    0.9734       482
           I     0.9721    0.9939    0.9828       490
           C     0.9980    0.9749    0.9863       517
         del     0.9823    0.9960    0.9891       501
           Y     0.9835    0.8948    0.9371       466
           S     0.9571    0.9859    0.9713       498
           G     0.9471    0.9598    0.9534       448
           M     0.9778    0.9898    0.9837       489
           J     0.9855    0.9896    0.9876       481
           T     0.9958    0.9715    0.9835       492
           V     0.9734    0.9615    0.9674       494
           B     0.9765    