Mount Google Drive to get access to folders within Google Drive.

In [1]:
from google.colab import drive
drive.mount("/content/drive", force_remount=True)

Mounted at /content/drive


Pass the images from the dataset that was obtained from running the data_loader script through the feature extraction part of the pretrained VGG16 CNN. The features, which will be in the form of tensors, will then be passed through our ANN classifier for training, validation, and testing.

In [4]:
import os
import torch
from torchvision import datasets, models, transforms

PROJECT_FOLDER = '/content/drive/My Drive/ECE496/machine_learning_pipeline/'
FEATURE_FOLDER = PROJECT_FOLDER + 'vgg16_gesture_training_dataset_features/'
DATASET_FOLDER = PROJECT_FOLDER + 'gesture_training_dataset'

#Construct the pretrained VGG16 CNN
vgg = models.vgg16(pretrained=True)

#Transform that will be applied to each image in the dataset
data_transform = transforms.Compose([transforms.CenterCrop(224),
                                     transforms.ToTensor(),
                                     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

#Get all images from the dataset and transform them so that they can be 
#passed into the feature extractor  of VGG16
dataset = datasets.ImageFolder(DATASET_FOLDER, transform=data_transform)

#Mapping of actual label to arbitrary class
print(dataset.class_to_idx)

#Mapping from arbitrary class to actual label
labelMap = dict((v,k) for k,v in dataset.class_to_idx.items())
print(labelMap)

{'A': 0, 'B': 1, 'C': 2, 'D': 3, 'E': 4, 'F': 5, 'G': 6, 'H': 7, 'I': 8}
{0: 'A', 1: 'B', 2: 'C', 3: 'D', 4: 'E', 5: 'F', 6: 'G', 7: 'H', 8: 'I'}


Iterate through each image in the dataset and create a corresponding feature by passing the image through the feature extractor of VGG16. The feature tensors will be stored in the specified FEATURE_FOLDER.

In [5]:
if not os.path.exists(FEATURE_FOLDER):
    print('Creating folder called %s to store extracted features...' %FEATURE_FOLDER)
    os.mkdir(FEATURE_FOLDER)

fileNumber = 1 #All features will be assigned an arbitrary number for naming purposes
data_loader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False)
for image, labels in data_loader:
    feature = vgg.features(image) #Create a feature by passing the image through VGG16 feature extractor
    featureTensor = torch.from_numpy(feature.detach().numpy()) #Convert PyTorch tensor into NumPy array

    label = labelMap[int(labels[0])]; #Label of the image
    folder = os.path.join(FEATURE_FOLDER, label)
    if not os.path.exists(folder):
        print('Creating folder to store extracted features for label %s...' %label)
        os.mkdir(folder)

    fileName = os.path.join(folder, str(fileNumber) + '.tensor')
    torch.save(featureTensor.squeeze(0), fileName)
    #Sanity check
    if fileNumber % 5 == 0:
        print("Saved file %s" %fileName)
    fileNumber += 1

Creating folder called /content/drive/My Drive/ECE496/machine_learning_pipeline/vgg16_gesture_training_dataset_features/ to store extracted features...
Creating folder to store extracted features for label A...
Saved file /content/drive/My Drive/ECE496/machine_learning_pipeline/vgg16_gesture_training_dataset_features/A/5.tensor
Saved file /content/drive/My Drive/ECE496/machine_learning_pipeline/vgg16_gesture_training_dataset_features/A/10.tensor
Saved file /content/drive/My Drive/ECE496/machine_learning_pipeline/vgg16_gesture_training_dataset_features/A/15.tensor
Saved file /content/drive/My Drive/ECE496/machine_learning_pipeline/vgg16_gesture_training_dataset_features/A/20.tensor
Saved file /content/drive/My Drive/ECE496/machine_learning_pipeline/vgg16_gesture_training_dataset_features/A/25.tensor
Saved file /content/drive/My Drive/ECE496/machine_learning_pipeline/vgg16_gesture_training_dataset_features/A/30.tensor
Saved file /content/drive/My Drive/ECE496/machine_learning_pipeline/vg