In [1]:
# Here we import everything we need for the project

%matplotlib inline
import os, time
import csv

# pytorch
import torch
from torch.nn import Module, Conv2d, MaxPool2d, Linear
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
import torch.nn.functional as F

# Helper libraries
import numpy as np
import matplotlib.pyplot as plt
import cv2
import pandas as pd

# Sklearn
from sklearn.model_selection import train_test_split # Helps with organizing data for training
from sklearn.metrics import confusion_matrix, classification_report # Helps present results as a confusion-matrix

## Loading Data

This project uses the [Hand Gesture Recognition Database](https://www.kaggle.com/gti-upm/leapgestrecog/version/1) (citation below) available on Kaggle. It contains 20000 images with different hands and hand gestures. There is a total of 10 hand gestures of 10 different people presented in the dataset. There are 5 female subjects and 5 male subjects.
The images were captured using the Leap Motion hand tracking device.

>Hand Gesture | Label used
>--- | ---
> Thumb down | 0
> Palm (Horizontal) | 1
> L | 2
> Fist (Horizontal) | 3
> Fist (Vertical) | 4
> Thumbs up | 5
> Index | 6
> OK | 7
> Palm (Vertical) | 8
> C | 9

Table 1 - Classification used for every hand gesture.


T. Mantecón, C.R. del Blanco, F. Jaureguizar, N. García, “Hand Gesture Recognition using Infrared Imagery Provided by Leap Motion Controller”, Int. Conf. on Advanced Concepts for Intelligent Vision Systems, ACIVS 2016, Lecce, Italy, pp. 47-57, 24-27 Oct. 2016. (doi: 10.1007/978-3-319-48680-2_5)  

Overview:
- Load images
- Some validation
- Preparing the images for training
- Use of train_test_split

In [2]:
# Unzip images, ignore this cell if files are already in the workspace
#!unzip leapGestRecog.zip

## Writing CSV - Kaggle Dataset
Since this dataset doesn't come with a nice csv, write one ourselves to make loading the data easier later

In [3]:
header = ["path_to_file", "class/GT"]
csv_path = "kaggle_images.csv"
# We need to get all the paths for the images to later load them
imagepaths = []
root = "./leapgestrecog"

for dirname, dirs, files in os.walk(root):
    for fname in files:
        path = os.path.join(dirname, fname)
        if path.endswith(".png"):
            imagepaths.append(path)

print(len(imagepaths)) # If > 0, then a PNG image was loaded
categories = [fpath.split("/")[4] for _, fpath in enumerate(imagepaths)]
gt = [category.split("_")[0] for _, category in enumerate(categories)]
with open(csv_path, 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(header)
    for i, fpath in enumerate(imagepaths):
        writer.writerow([fpath, gt[i]])

0


## Writing CSV - Kaggle Dataset
Since this dataset doesn't come with a nice csv, write one ourselves to make loading the data easier later
Note: directly setting filepaths to the pre-binarized images so we don't need to perform this operation ourselves

In [4]:
header = ["path_to_file", "GT"]
csv_path = "./data/asl/asl_images.csv"
imagepaths = []
fnames = []
# these are 400x400 BW images
root = "./data/asl/asl_data/binary_frames_rotated"

for dirname, dirs, files in os.walk(root):
    for fname in files:
        if fname.endswith(".png"):
            fnames.append(fname)
        path = os.path.join(dirname, fname)
        if path.endswith(".png"):
            imagepaths.append(path)

gt = [fname.split('_')[0] for fname in fnames]
with open(csv_path, 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(header)
    for i, fpath in enumerate(imagepaths):
        writer.writerow([fpath, gt[i]])

## ASL Helpers
Mostly used to create a somewhat balanced train/test split, and to visualize counts of every class

In [5]:
def make_asl_count_df(y):
    counts = np.zeros(26)
    y = np.array(y)
    for i in range(26):
        counts[i] = np.sum(np.where(y == i, 1, 0))
    idx = [chr(i) for i in range(97, 123)]
    columns=["Count"]
    df = pd.DataFrame(counts, index=idx, columns=columns)
    return df

def y_as_np_arr(dataset):
    return np.array([sample['y'] for sample in dataset])


def make_asl_train_test_split(dataset, counts_df, split_ratio=0.75, train_csv="./data/asl/train_asl.csv", test_csv="./data/asl/test_asl.csv"):
    header = ["path_to_file", "GT"]
    print(0)
    train = []
    test = []
    np.random.seed(0)
    print(1)
    train_counts, test_counts = [int(np.ceil(split_ratio*counts_df.iloc[idx, 0])) for idx in range(counts_df.shape[0])], [int(np.floor((1-split_ratio)*counts_df.iloc[idx, 0])) for idx in range(counts_df.shape[0])]
#     y = y_as_np_arr(dataset)
    print(2)
    for idx in range(counts_df.shape[0]):
        #curr_train_selections = []
        #curr_test_selections = []
        only_class_locs = np.where(y==idx)[0]
        train_idxes = np.random.choice(only_class_locs, size=train_counts[idx], replace=False)
        for data_idx in only_class_locs:
            sample = dataset[data_idx]
            if data_idx in train_idxes:
                #curr_train_selections.append(sample['fname'])
                train.append(sample['fname'])
            else:
                #curr_test_selections.append(sample['fname'])
                test.append(sample['fname'])
        #train.append(curr_train_selections)
        #test.append(curr_test_selections)
    
    print(3)
#     train_lasts = [fname.split('/')[-1] for fname in train]
    train_gt = [ os.path.split(fname)[-1].split('_')[0] for fname in train]
#     test_lasts = [fname.split('/')[-1] for fname in test]
    test_gt = [os.path.split(fname)[-1].split('_')[0] for fname in test]
    print(train_gt[:5])
    print(test_gt[:5])
    with open(train_csv, 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(header)
        for i, fpath in enumerate(train):
            writer.writerow([fpath, train_gt[i]])
    with open(test_csv, 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(header)
        for i, fpath in enumerate(test):
            writer.writerow([fpath, test_gt[i]])
    return train, test



#plt.imshow(np.squeeze(test[0][0]['image'].numpy()))

In [16]:
# for sample in asl_dataset:
#     print(sample['y'])

0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0


KeyboardInterrupt: 

0
1
2
3
['a', 'a', 'a', 'a', 'a']
['a', 'a', 'a', 'a', 'a']


In [20]:
# print(test)

4320
12960


## Dataloaders
To facilitate using pytorch to build our cnn, we write a custom DataLoader class. This allows for on-demand loading of images, which are used to train our cnn.

In [22]:
class KaggleHandDetectionDataset(Dataset):
    """Custom loader for the Kaggle Hand Detection Dataset"""
    
    def __init__(self, csv_file, transforms=None):
        """
        Args:
            csv_file (string): Path to the csv file with image filepaths and gt classes
            transforms (callable, optional): Optional transforms to be applied on a sample
        """
        self.images_frame = pd.read_csv(csv_file)
        #print(self.images_frame)
        self.transforms = transforms
        
    def __len__(self):
        return len(self.images_frame)
    
    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
            
        img_name = self.images_frame.iloc[idx, 0]
        img = cv2.imread(img_name)
        y = int(self.images_frame.iloc[idx, 1])-1
        sample = {'image' : img, 'y' : y, 'fname' : img_name}
        
        if len(self.transforms) > 0:
            for _, transform in enumerate(self.transforms):
                sample = transform(sample)
        return sample


In [23]:
class AslGestureDataset(Dataset):
    """Custom loader for the Kaggle Hand Detection Dataset"""
    
    def __init__(self, csv_file, transforms=None):
        """
        Args:
            csv_file (string): Path to the csv file with image filepaths and gt classes
            transforms (callable, optional): Optional transforms to be applied on a sample
        """
        self.images_frame = pd.read_csv(csv_file)
        self.transforms = transforms
        
    def __len__(self):
        return len(self.images_frame)
    
    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
            
        img_name = self.images_frame.iloc[idx, 0]
        #print(img_name)
        img = cv2.imread(img_name)
        y = (self.images_frame.iloc[idx, 1])
        # normalize to 0-26 for classes (missing j and z b/c dynamic)
        y = ord(y)- ord('a')
        sample = {'image' : img, 'y' : y, 'fname' : img_name}
        
        if len(self.transforms) > 0:
            for _, transform in enumerate(self.transforms):
                sample = transform(sample)
        return sample

In [24]:
class Rescale(object):
    """Used to rescale an image to a given size. Useful for the CNN
    
    Args:
        output_size (tuple or int): Desired output size after rescaling. If tuple, output is matched to output_size.
        If int smaller of width/height is matched to output_size, keeping aspect ratio the same.
    """
    def __init__(self, output_size):
        assert isinstance(output_size, (int, tuple))
        self.output_size = output_size
        
    def __call__(self, sample):
        y = sample['y']
        image = sample['image']
        
        h, w = image.shape[:2]
        if isinstance(self.output_size, int):
            if h > w:
                new_h, new_w = self.output_size * h // w, self.output_size
            else:
                new_h, new_w = self.output_size, self.output_size * w // h
        else:
            new_h, new_w = self.output_size
        
        img = cv2.resize(image, (new_h, new_w))
        #print(f"after resize: {img.shape}")
        return {'image' : img, 'y' : y, 'fname' : sample['fname']}
    
class Recolor(object):
    """Used to recolor an image using cv2
    
    Args:
        flag (cv2.COLOR_): color to swap to
    """
    def __init__(self, color):
        self.color = color
    
    def __call__(self, sample):
        y = sample['y']
        image = sample['image']
        #print(f"before recolor: {image.shape}")
        
        # cvtColor to gray drops the damned channel dimension but we need it
        img_cvt = cv2.cvtColor(image, self.color)
        # fucking hack an extra dim to appease pytorch's bitchass
        img_cvt = np.expand_dims(img_cvt, axis=-1)
        #print(f"after exansion: {img_cvt.shape}")
        return {'image' : img_cvt, 'y' : y, 'fname' : sample['fname']}

class ToTensor(object):
    """Convert ndarrays to pytorch Tensors"""
    
    def __call__(self, sample):
        y = sample['y']
        image = sample['image']
        
        # swap color axis b/c 
        # numpy img: H x W x C
        # torch img: C x H x W
        image = image.transpose((2, 0, 1))
        return {'image' : torch.from_numpy(image), 'y' : y, 'fname' : sample['fname']}

In [25]:
def prediction_to_class_str(pred):
    classes = {0 : "palm", 1 : "L", 2 : "fist", 3 : "fist_moved", 4 : "thumb", 5 : "index", 6 : "ok", 7 : "palm_moved", 8 : "c", 9 : "down"}
    return classes[pred]

def classify_arbitrary_image(model, img):
    img_type = type(img)
    print(img_type)
    if img_type == torch.Tensor:
        print("tensor")
        img = img.float()
        img = img.unsqueeze(1)
    elif img_type == np.ndarray:
        print("np array")
        img = np.expand_dims(img, 1)
    else:
        print("error: something other than a torch.Tensor or an np.ndarray was passed as img")
    prediction = model(img)
    prediction = prediction.data.numpy()
    y_hat = np.argmax(prediction, axis=1)
    return prediction_to_class_str(y_hat[0])

def classify_many_images(model, imgs):
    # for now, assuming imgs is a list of images that are either np.ndarrays or torch.Tensors
    # labels will be given back in order images were given
    predictions = []
    for img in imgs:
        predictions.append(classify_arbitrary_image(model, img))
    return predictions

In [26]:
class HandNNModel(Module):
    def __init__(self):
        super().__init__()
        
        # input shape = (32, 256, 256) - (batch_size, w, h) from dataloader
        self.conv1 = Conv2d(1, 32, kernel_size=5) # output shape: (252, 252, 32)
        self.pool1 = MaxPool2d(2) # output shape: (121, 121, 32)
        self.conv2 = Conv2d(32, 64, kernel_size=3) # output shape: (119, 119, 64)
        self.pool2 = MaxPool2d(2) # output shape: (59, 59, 64) - torch uses floor by default
        self.conv3 = Conv2d(64, 64, kernel_size=3) # output shape: (57, 57, 64)
        self.pool3 = MaxPool2d(2) # output shape: (28, 28, 64)
        self.fc1 = Linear(28*28*64, 128) # output shape: (28*28*64, 128)
        self.fc2 = Linear(128, 10)
        self.activation = torch.nn.ReLU()
        
    def forward(self, X):
        X = self.activation(self.conv1(X))
        X = self.pool1(X)
        X = self.activation(self.conv2(X))
        X = self.pool2(X)
        X = self.activation(self.conv3(X))
        X = self.pool3(X)
        X = torch.flatten(X, 1) # flatten with start_dim = 1
        X = self.fc1(X)
        X = self.fc2(X)
        output = F.softmax(X)
        return output

In [27]:
class AslNNModel(Module):
    # same structure as HandNNModel, need to change dimensions
    def __init__(self):
        super().__init__()
        
        # input shape = (64, 400, 400) - (batch_size, w, h) from dataloader
        self.conv1 = Conv2d(1, 32, kernel_size=5) # output shape: (496, 496, 32)
        self.pool1 = MaxPool2d(2) # output shape: (198, 198, 32)
        self.conv2 = Conv2d(32, 64, kernel_size=3) # output shape: (196, 196, 64)
        self.pool2 = MaxPool2d(2) # output shape: (98, 98, 64) - torch uses floor by default
        self.conv3 = Conv2d(64, 64, kernel_size=3) # output shape: (96, 96, 64)
        self.pool3 = MaxPool2d(2) # output shape: (48, 48, 64)
        self.fc1 = Linear(48*48*64, 128) # output shape: (48*48*64, 128)
        self.fc2 = Linear(128, 26) # 24 possible output classes, but it goes up to idx 26: CUDA screams otherwise, so here we are
        self.activation = torch.nn.ReLU()
        
    def forward(self, X):
        X = self.activation(self.conv1(X))
        X = self.pool1(X)
        X = self.activation(self.conv2(X))
        X = self.pool2(X)
        X = self.activation(self.conv3(X))
        X = self.pool3(X)
        X = torch.flatten(X, 1) # flatten with start_dim = 1
        X = self.fc1(X)
        X = self.fc2(X)
        output = F.softmax(X)
        return output

In [28]:
resize = Rescale((256,256))
recolor = Recolor(cv2.COLOR_BGR2GRAY)
to_tensor = ToTensor()
transforms = [resize, recolor, to_tensor]

#hand_dataset = KaggleHandDetectionDataset(csv_file="kaggle_images.csv", transforms=transforms)

# for some reason, even with the binarization, there's 3 channels, but we only want one - so, use recolor
asl_dataset = AslGestureDataset(csv_file="./data/asl/asl_images.csv", transforms=[recolor, to_tensor])
train_asl_dataset = AslGestureDataset(csv_file="./data/asl/train_asl.csv", transforms=[recolor, to_tensor])
print(len(train_asl_dataset))
# df = pd.read_csv("kaggle_images.csv")

sample = asl_dataset[0]
print()
print(sample['image'].shape)
print(sample['y'])
print(sample)
#sample = hand_dataset[0]
#print()
#print(sample['image'].shape)
#print(sample['y'])
#print(sample)

12960

torch.Size([1, 400, 400])
0
{'image': tensor([[[0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0],
         ...,
         [0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0]]], dtype=torch.uint8), 'y': 0, 'fname': './data/asl/asl_data/binary_frames_rotated\\a_100-19.png'}


## Splitting Data

In [None]:
asl_dataset = AslGestureDataset(csv_file="./data/asl/asl_images.csv", transforms=[recolor, to_tensor])

In [None]:
y = y_as_np_arr(asl_dataset)

In [None]:
df = make_asl_count_df(y)

In [None]:
train, test = make_asl_train_test_split(asl_dataset, df)

In [None]:
lasts = [fname.split('/')[-1] for fname in test]
gt = [fname.split('_')[0] for fname in lasts]
print(len(gt))
train_asl_dataset = AslGestureDataset(csv_file="./data/asl/train_asl.csv", transforms=[recolor, to_tensor])
print(len(train_asl_dataset))

In [29]:
"""
sample = asl_dataset[0]
print()
print(sample['image'].shape)
print(sample['y'])
print(sample)
sample = hand_dataset[0]
img = sample['image']
#recolor = Recolor(cv2.COLOR_BGR2GRAY)
#as_gray = recolor(sample)
#scale = Rescale((256, 256))
#tns = ToTensor()
#as_tns = tns(sample)
#print(type(as_tns['image']))
#new_img = scale(sample)
print(sample['image'])
# complains b/c the gpu owns it, not the cpu since it's a tensor
cv2.imshow('original', sample['image'])
cv2.waitKey(2000)
#cv2.imshow('recolored', as_gray['image'])
#cv2.waitKey(2000)
#cv2.imshow('rescaled', new_img['image'])
#cv2.waitKey(3000)
cv2.destroyAllWindows()
"""


"\nsample = asl_dataset[0]\nprint()\nprint(sample['image'].shape)\nprint(sample['y'])\nprint(sample)\nsample = hand_dataset[0]\nimg = sample['image']\n#recolor = Recolor(cv2.COLOR_BGR2GRAY)\n#as_gray = recolor(sample)\n#scale = Rescale((256, 256))\n#tns = ToTensor()\n#as_tns = tns(sample)\n#print(type(as_tns['image']))\n#new_img = scale(sample)\nprint(sample['image'])\n# complains b/c the gpu owns it, not the cpu since it's a tensor\ncv2.imshow('original', sample['image'])\ncv2.waitKey(2000)\n#cv2.imshow('recolored', as_gray['image'])\n#cv2.waitKey(2000)\n#cv2.imshow('rescaled', new_img['image'])\n#cv2.waitKey(3000)\ncv2.destroyAllWindows()\n"

In [30]:
dataloader = DataLoader(asl_dataset, batch_size=4)
for i_batch, sample_batched in enumerate(dataloader):
    print(i_batch, sample_batched['image'].size())
    if i_batch == 3:
        break

0 torch.Size([4, 1, 400, 400])
1 torch.Size([4, 1, 400, 400])
2 torch.Size([4, 1, 400, 400])
3 torch.Size([4, 1, 400, 400])


## Train the model using the Custom Dataloader
Below, we will actually train our CNN model

In [31]:
use_cuda = torch.cuda.is_available()
print(use_cuda)

True


In [32]:
# torch.cuda.empty_cache()

In [None]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")
batch = 48
if use_cuda:
    params = {'batch_size' : batch, 'shuffle': True, 'num_workers' : 0, 'pin_memory' : True}
else:
    params = {'batch_size' : batch, 'shuffle': True, 'num_workers' : 0}
    

max_epochs = 250 
data_loader = DataLoader(train_asl_dataset, **params)
model = AslNNModel()
# move to GPU
if use_cuda:
    model.cuda()

# batch_size, num_channels, w, h
#random_data = torch.rand((1, 1, 256, 256))

#result = model(random_data)

criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

#imgs, labels = (next(iter(data_loader)))
start = time.time()
for epoch in range(max_epochs):
    print(f"start epoch {epoch}")
    running_loss = 0.0
    epoch_start = time.time()
    count = 0
    for i, data in enumerate(data_loader):
        imgs, labels = data['image'], data['y']
        # move to GPU
        imgs, labels = imgs.cuda(), labels.cuda()
        imgs = imgs.float()
        optimizer.zero_grad()
        
        outputs = model(imgs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        count = i + 1
        if i %  50 == 0:
            print(f"epoch {epoch} ({count * batch}/17,280): avg running loss = {running_loss / count}")
            print(f"epoch {epoch} ({count * batch}/17,280): time passed = {time.time() - epoch_start} seconds")
    
    print(f"epoch {epoch}: final loss = {running_loss/count}")
    epoch_end = time.time()
    print(f"epoch {epoch} runtime = {epoch_end - epoch_start}")
end = time.time()
print(f"Total training time = {end - start}")

# make sure to save the model so we don't need to train again
save = True
if save:
    save_path = "./data/asl/asl_train_no_tl_same_model.pkl"
    torch.save(model.state_dict(), save_path)

start epoch 0




epoch 0 (48/345,600): avg running loss = 3.2598073482513428
epoch 0 (48/345,600): time passed = 0.35500001907348633 seconds
epoch 0 (2448/345,600): avg running loss = 3.277959337421492
epoch 0 (2448/345,600): time passed = 18.323999166488647 seconds
epoch 0 (4848/345,600): avg running loss = 3.2782880221263015
epoch 0 (4848/345,600): time passed = 36.43057703971863 seconds
epoch 0 (7248/345,600): avg running loss = 3.2807812311791427
epoch 0 (7248/345,600): time passed = 55.44711089134216 seconds
epoch 0 (9648/345,600): avg running loss = 3.280064081078145
epoch 0 (9648/345,600): time passed = 73.99658465385437 seconds
epoch 0 (12048/345,600): avg running loss = 3.2808776671193036
epoch 0 (12048/345,600): time passed = 92.56910991668701 seconds
epoch 0: final loss = 3.2810253240444043
epoch 0 runtime = 99.59725308418274
start epoch 1
epoch 1 (48/345,600): avg running loss = 3.2387588024139404
epoch 1 (48/345,600): time passed = 0.34085917472839355 seconds
epoch 1 (2448/345,600): avg ru

In [None]:
#print(model.named_parameters)

params = model.parameters()

In [None]:
loaded_model = AslNNModel()
loaded_model.load_state_dict(torch.load(save_path))

## Saving Trained Model

In [None]:
save_path = "./trained_test_cnn.pkl"
torch.save(model.state_dict(), save_path)

## Loading the model
Load the model from disk later for evaluation

In [None]:
loaded_model = HandNNModel()
loaded_model.load_state_dict(torch.load(save_path))

## Confusion Matrix and Classification Report (from sklearn)


In [None]:
import pandas as pd
from sklearn.metrics import confusion_matrix, classification_report
def make_stats(y, y_hat, num_classes=10):
    cm = confusion_matrix(y, y_hat)
    cm_df = pd.DataFrame(cm, columns=[str(i) for i in range(num_classes)])
    report = classification_report(y, y_hat)
    return cm_df, report

In [None]:
total_samples = 0
total_misclass = 0
test_data_loader = DataLoader(test_asl_dataset, **params)
all_y = np.array([], dtype=np.uint8)
all_y_hat = np.array([], dtype=np.uint8)
for i, sample in enumerate(data_loader):
    y = sample['y']
    y = y.data.numpy()
    images = (sample['image'])
    images = images.float()
    images = (sample['image'])
    images = images.float()
    model = model.cpu()
    predictions = model(images)
    predictions = predictions.cpu()
    predictions = predictions.data.numpy()
    y_hat = np.argmax(predictions, axis=1)
    misclass = np.sum(np.where(y != y_hat, 1, 0))
    total_samples += y.shape[0]
    total_misclass += misclass
    all_y = np.append(all_y, y)
    all_y_hat = np.append(all_y_hat, y_hat)
    #print(f"all_y = {all_y}")
    #print(f"all_y_hat = {all_y_hat}")
    print(f"Number of Misclassifications = {misclass}")
    print(f"Sample acc = {(y.shape[0]-misclass)/y.shape[0]*100}")
overall_acc = (total_samples - total_misclass)/total_samples
print(f"Overall Accuracy = {overall_acc}")

In [None]:
cm_df, report = make_stats(all_y, all_y_hat, num_classes=24)
#print(cm_df.to_markdown())
print(cm_df)#.to_markdown())
print(report)

In [None]:
test_asl_dataset = AslGestureDataset(csv_file="./data/asl/test_asl.csv", transforms=[recolor, to_tensor])
test_data_loader = DataLoader(test_asl_dataset, **params)
all_y = np.array([], dtype=np.uint8)
all_y_hat = np.array([], dtype=np.uint8)
for i, sample in enumerate(test_data_loader):
    y = sample['y']
    y = y.data.numpy()
    images = (sample['image'])
    images = images.float()
    images = (sample['image'])
    images = images.float()
    model = model.cpu()
    predictions = model(images)
    predictions = predictions.cpu()
    predictions = predictions.data.numpy()
    y_hat = np.argmax(predictions, axis=1)
    misclass = np.sum(np.where(y != y_hat, 1, 0))
    total_samples += y.shape[0]
    total_misclass += misclass55555555
    all_y = np.append(all_y, y)
    all_y_hat = np.append(all_y_hat, y_hat)
    #print(f"all_y = {all_y}")
    #print(f"all_y_hat = {all_y_hat}")
    print(f"Number of Misclassifications = {misclass}")
    print(f"Sample acc = {(y.shape[0]-misclass)/y.shape[0]*100}")
overall_acc = (total_samples - total_misclass)/total_samples
print(f"Overall Accuracy = {overall_acc}")

In [None]:
cm_df, report = make_stats(all_y, all_y_hat, num_classes=24)
#print(cm_df.to_markdown())
print(cm_df)#.to_markdown())
print(report)