### Table of Contents

### 00. Load Data
##### Data Selection

In [1]:
import random
import numpy as np
# for reproducibility
random.seed(123)
np.random.seed(123)

In [2]:
def get_random_list(num_items, start, end, set_seed=123):
    random.seed(set_seed)
    selected_idx = [i for i in range(start,end)]
    random.shuffle(selected_idx)
    return sorted(selected_idx[0:num_items])

def train_test_split(input_list, train_proportion=0.7, set_seed=123):
    train_idx = get_random_list(num_items=round(len(input_list)*train_proportion), 
                                start=0, end=len(input_list), set_seed=123)
    train_list = []
    test_list = []
    for i in range(0,len(input_list)):
        if i in train_idx:
            train_list.append(input_list[i])
        else:
            test_list.append(input_list[i])
    return train_list, test_list

In [3]:
# choose 25 PIE subjects
selected_idx = get_random_list(num_items=25, start=1, end=68, set_seed=123)
print('Selected subjects:', selected_idx)

Selected subjects: [2, 8, 13, 14, 15, 17, 19, 20, 23, 24, 27, 31, 32, 33, 34, 40, 42, 44, 46, 48, 50, 52, 59, 63, 65]


In [4]:
NUM_IMG_PER_SUBJ = 170
NUM_SELFIES = 10

# list of paths to PIE images
pie_list = []
for subj_idx in selected_idx:
    temp_list = ['PIE//'+str(subj_idx)+'//'+str(i+1)+'.jpg' for i in range(0,NUM_IMG_PER_SUBJ)]
    pie_list.extend(temp_list)

# list of paths to selfies
selfies_list = ['selfies//formatted//'+str(i+1)+'.jpg' for i in range(0,NUM_SELFIES)]

# list of paths to all images of interest
list_of_img_end_paths = pie_list + selfies_list

print('Number of selected PIE images:', len(pie_list))
print('Number of selected selfies:', len(selfies_list))
print('Number of selected images:', len(list_of_img_end_paths))

Number of selected PIE images: 4250
Number of selected selfies: 10
Number of selected images: 4260


##### Train Test Split

In [5]:
TRAIN_PROPORTION = 0.7

# split PIE train and test
pie_train_list, pie_test_list = train_test_split(pie_list, train_proportion=TRAIN_PROPORTION, set_seed=123)

# split selfies train and test
selfies_train_list, selfies_test_list = train_test_split(selfies_list, train_proportion=TRAIN_PROPORTION, set_seed=123)

print('Number||Proportion of train PIE images:', len(pie_train_list), '||', len(pie_train_list)/len(pie_list))
print('Number||Proportion of test PIE images:', len(pie_test_list), '||', len(pie_test_list)/len(pie_list))
print('Number||Proportion of train selfies:', len(selfies_train_list), '||', len(selfies_train_list)/len(selfies_list))
print('Number||Proportion of test selfies:', len(selfies_test_list), '||', len(selfies_test_list)/len(selfies_list))

Number||Proportion of train PIE images: 2975 || 0.7
Number||Proportion of test PIE images: 1275 || 0.3
Number||Proportion of train selfies: 7 || 0.7
Number||Proportion of test selfies: 3 || 0.3


In [6]:
import numpy as np
import cv2
import os

In [7]:
def get_labels_from_path_list(list_of_img_paths, label_type='general'):
    label_list = []
    for path in list_of_img_paths:
        splitted = path.split('//')
        if splitted[0]=='PIE':
            if label_type=='general':
                label_list.append(splitted[0])
            elif label_type=='specific':
                label_list.append(splitted[1])
            else:
                print("Please specify label_type as 'general' or 'specific'")
        else:
            if label_type=='general':
                label_list.append(splitted[0])
            elif label_type=='specific':
                label_list.append('selfies')
            else:
                print("Please specify label_type as 'general' or 'specific'")
    return label_list

In [8]:
def load_and_vectorise_images(list_of_img_paths):
    # read image
    path = os.path.abspath('')

    for counter, end_paths in enumerate(list_of_img_paths):
        # load image
        img = cv2.imread(os.path.join(path, end_paths))

        # vectorise image
        array_size = img.shape[0]*img.shape[1]
        vectorised_img = img.copy()
        vectorised_img = vectorised_img.reshape(array_size,3)
        vectorised_img = np.array([i[0] for i in vectorised_img], dtype=int)

        # add to dataset
        if counter == 0:
            data = vectorised_img.reshape(1,array_size).copy()
        else:
            data = np.concatenate((data, vectorised_img.reshape(1,array_size)), axis=0)
    return data

In [9]:
# load test data
train_data = load_and_vectorise_images(pie_train_list+selfies_train_list)
test_data = load_and_vectorise_images(pie_test_list+selfies_test_list)
print('Vectorised and loaded train data:', train_data.shape)
print('Vectorised and loaded test data:', test_data.shape)

Vectorised and loaded train data: (2982, 1024)
Vectorised and loaded test data: (1278, 1024)


In [10]:
# prepare specific labels (target var for classification task)
train_labels = get_labels_from_path_list(pie_train_list+selfies_train_list, label_type='specific')
test_labels = get_labels_from_path_list(pie_test_list+selfies_test_list, label_type='specific')

In [11]:
# format labels into numeric values (& storing conversion dictionary)
def convert_str_to_num(str_list, str_to_num=None):
    unique = list(set(str_list))
    if str_to_num is None:
        str_to_num = {string:idx for idx, string in enumerate(unique)}
    else:
        missing = [i for i in unique if i not in str_to_num.keys()]
        max_idx = max(str_to_num.values())
        for string in missing:
            str_to_num[string]=max_idx
            max_idx=+1
        
    num_list = [str_to_num[string] for string in str_list]
    return num_list, str_to_num

train_labels_num, train_str_to_num = convert_str_to_num(train_labels)
test_labels_num, str_to_num = convert_str_to_num(test_labels, train_str_to_num)

In [12]:
# if false means we have unseen labels in test set, which might be problematic
assert train_str_to_num == str_to_num

### 05. Neural Network (NN)
Train a CNN with two convolutional
layers and one fully connected layer, with the architecture specified as follows: number of
nodes: 20-50-500-21. The number of the nodes in the last layer is fixed as 21 as we are performing
21-category (20 CMU PIE faces plus 1 for yourself) classification. Convolutional
kernel sizes are set as 5. Each convolutional layer is followed by a max pooling layer with
a kernel size of 2 and stride of 2. The fully connected layer is followed by ReLU. Train the
network and report the final classification performance.

##### NN for Classification

In [16]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
import torch.optim as optim

In [30]:
# constants
NUM_EPOCHS = 15
BATCH_SIZE = 10
LEARNING_RATE = 1e-2
GENERATOR_PARAMS = {
    'shuffle': True,
    'num_workers': 0,
    'drop_last': True, #ignore last incomplete batch
    'pin_memory': True
}

# preferences
PRINT_ROUND = 50

# gpu
if torch.cuda.is_available():
    device = torch.device("cuda:0")
    # print("Running on the GPU")
else:
    device = torch.device("cpu")
    # print("Running on the CPU")

In [31]:
# prepare data into pytorch tensor format
def prepare_sequence(seq):
    return torch.tensor(seq, dtype=torch.long)

class FormatDataset(Dataset):
    def __init__(self, features, labels):
        self.features = features
        self.labels = labels

    def __len__(self):
        # Run multiple rows at once, i.e. reduce enumerate
        return len(self.features)

    def __getitem__(self, index):

        feats_in = prepare_sequence(self.features[index])
        target_out = prepare_sequence(self.labels[index])

        return feats_in, target_out

In [32]:
class CNN(nn.Module):
    def __init__(self, embed_dim, hidden_dim1, hidden_dim2, hidden_dim3, output_dim, kernel_size, pool_k_size, pool_stride):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(embed_dim, hidden_dim1, kernel_size=kernel_size)
        self.conv2 = nn.Conv2d(hidden_dim1, hidden_dim2, kernel_size=kernel_size)
        self.pool = nn.MaxPool2d(kernel_size=pool_k_size, stride=pool_stride)
        self.fc1 = nn.Linear(hidden_dim2*5*5, hidden_dim3)
        self.fc2 = nn.Linear(hidden_dim3, output_dim)
        
    def forward(self, feats_in):
        batch_size, e_, e_, s_ = feats_in.shape
        x = self.pool(F.relu(self.conv1(feats_in.float())))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, start_dim=1)
        x = F.relu(self.fc1(x))
        x = F.log_softmax(self.fc2(x),dim=1)
        return x


# load data
training_set = FormatDataset(train_data, np.asarray(train_labels_num))
training_generator = DataLoader(training_set, batch_size=BATCH_SIZE, **GENERATOR_PARAMS)

test_batch_size = test_data.shape[0]
test_set = FormatDataset(test_data, np.asarray(test_labels_num))
testing_generator = DataLoader(test_set, batch_size=test_batch_size, **GENERATOR_PARAMS)

# load model
model = CNN(embed_dim=1,
            hidden_dim1=20, 
            hidden_dim2=50,
            hidden_dim3=500,
            output_dim=26, 
            kernel_size=(5,5),
            pool_k_size=(2,2),
            pool_stride=2
           ).to(device)
loss_function = nn.CrossEntropyLoss().to(device)
optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE)

results_dict = {}
early_stopping = False

# train
for epoch in range(NUM_EPOCHS):
    print('Epoch {}'.format(epoch), '-'*80)
    results = {'train_loss': [], 'train_acc': []}
    for idx, (feats_in, target_out) in enumerate(training_generator):
        feats_in = feats_in.to(device).view(BATCH_SIZE, 32, 32).unsqueeze(1)
        target_out = target_out.to(device)
        model.zero_grad()
        tag_scores = model(feats_in).to(device)
        predicted = torch.argmax(tag_scores, dim=1).detach()
        correct = sum([1 for pred,act in zip(predicted,target_out) if pred==act])
        accuracy = correct/len(target_out)
        loss = loss_function(tag_scores, target_out)
        loss.backward()
        optimizer.step()

        results['train_loss'].append(loss.data.item())
        results['train_acc'].append(accuracy)

        if idx%PRINT_ROUND==0:
            print('Step {} | Training Loss: {}, Accuracy: {}%'.format(idx, round(loss.data.item(),3), round(accuracy*100,3)))

    results_dict[epoch] = results
    avg_acc = sum(results['train_acc'])/(idx+1)
    print('Average Accuracy: {}%'.format(round(avg_acc*100,3)))
    
# predict
with torch.no_grad():
    print('-'*80)
    for idx, (feats_in, target_out) in enumerate(testing_generator):
        feats_in = feats_in.to(device).view(test_batch_size, 32, 32).unsqueeze(1)
        target_out = target_out.to(device)
        tag_scores = model(feats_in).to(device)
        predicted = torch.argmax(tag_scores, dim=1).detach()
        correct = sum([1 for pred,act in zip(predicted,target_out) if pred==act])
        accuracy = correct/test_batch_size
        print('Testing Accuracy: {}%'.format(round(accuracy*100,3)))

Epoch 0 --------------------------------------------------------------------------------
Step 0 | Training Loss: 5.182, Accuracy: 0.0%
Step 50 | Training Loss: 3.228, Accuracy: 0.0%
Step 100 | Training Loss: 3.281, Accuracy: 0.0%
Step 150 | Training Loss: 2.619, Accuracy: 10.0%
Step 200 | Training Loss: 2.963, Accuracy: 20.0%
Step 250 | Training Loss: 1.387, Accuracy: 60.0%
Average Accuracy: 20.436%
Epoch 1 --------------------------------------------------------------------------------
Step 0 | Training Loss: 2.043, Accuracy: 50.0%
Step 50 | Training Loss: 0.859, Accuracy: 80.0%
Step 100 | Training Loss: 1.023, Accuracy: 80.0%
Step 150 | Training Loss: 0.194, Accuracy: 100.0%
Step 200 | Training Loss: 0.589, Accuracy: 90.0%
Step 250 | Training Loss: 0.4, Accuracy: 80.0%
Average Accuracy: 79.732%
Epoch 2 --------------------------------------------------------------------------------
Step 0 | Training Loss: 0.076, Accuracy: 100.0%
Step 50 | Training Loss: 0.498, Accuracy: 80.0%
Step 10

In [33]:
# selfies is numbered as 25
correct_selfies = sum([1 for pred,act in zip(predicted,target_out) if (pred==act) & (act==25)])
correct_PIE = correct - correct_selfies
total_selfies = 3
total_PIE = test_batch_size-total_selfies
print('Accuracy of NN for ALL test set: {}%'.format(round(accuracy*100,3)))
print('Accuracy of NN for PIE test set: {}%'.format(round((correct_PIE/total_PIE)*100,3)))
print('Accuracy of NN for selfies test set: {}%'.format(round((correct_selfies/total_selfies)*100,3)))

Accuracy of NN for ALL test set: 97.34%
Accuracy of NN for PIE test set: 97.333%
Accuracy of NN for selfies test set: 100.0%


##### Experimenting with different network architectures

In [36]:
# 1. Without ReLU between CNN Layers (failed model)
class CNN(nn.Module):
    def __init__(self, embed_dim, hidden_dim1, hidden_dim2, hidden_dim3, output_dim, kernel_size, pool_k_size, pool_stride):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(embed_dim, hidden_dim1, kernel_size=kernel_size)
        self.conv2 = nn.Conv2d(hidden_dim1, hidden_dim2, kernel_size=kernel_size)
        self.pool = nn.MaxPool2d(kernel_size=pool_k_size, stride=pool_stride)
        self.fc1 = nn.Linear(hidden_dim2*5*5, hidden_dim3)
        self.fc2 = nn.Linear(hidden_dim3, output_dim)
        
    def forward(self, feats_in):
        batch_size, e_, e_, s_ = feats_in.shape
        x = self.pool(self.conv1(feats_in.float()))
        x = self.pool(self.conv2(x))
        x = torch.flatten(x, start_dim=1)
        x = F.relu(self.fc1(x))
        x = F.log_softmax(self.fc2(x),dim=1)
        return x

# load data
training_set = FormatDataset(train_data, np.asarray(train_labels_num))
training_generator = DataLoader(training_set, batch_size=BATCH_SIZE, **GENERATOR_PARAMS)

test_batch_size = test_data.shape[0]
test_set = FormatDataset(test_data, np.asarray(test_labels_num))
testing_generator = DataLoader(test_set, batch_size=test_batch_size, **GENERATOR_PARAMS)

# load model
model = CNN(embed_dim=1,
            hidden_dim1=20, 
            hidden_dim2=50,
            hidden_dim3=500,
            output_dim=26, 
            kernel_size=(5,5),
            pool_k_size=(2,2),
            pool_stride=2
           ).to(device)
loss_function = nn.CrossEntropyLoss().to(device)
optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE)

results_dict = {}
early_stopping = False

# train
for epoch in range(NUM_EPOCHS):
    print('Epoch {}'.format(epoch), '-'*80)
    results = {'train_loss': [], 'train_acc': []}
    for idx, (feats_in, target_out) in enumerate(training_generator):
        feats_in = feats_in.to(device).view(BATCH_SIZE, 32, 32).unsqueeze(1)
        target_out = target_out.to(device)
        model.zero_grad()
        tag_scores = model(feats_in).to(device)
        predicted = torch.argmax(tag_scores, dim=1).detach()
        correct = sum([1 for pred,act in zip(predicted,target_out) if pred==act])
        accuracy = correct/len(target_out)
        loss = loss_function(tag_scores, target_out)
        loss.backward()
        optimizer.step()

        results['train_loss'].append(loss.data.item())
        results['train_acc'].append(accuracy)

        if idx%PRINT_ROUND==0:
            print('Step {} | Training Loss: {}, Accuracy: {}%'.format(idx, round(loss.data.item(),3), round(accuracy*100,3)))

    results_dict[epoch] = results
    avg_acc = sum(results['train_acc'])/(idx+1)
    print('Average Accuracy: {}%'.format(round(avg_acc*100,3)))

# load data
training_generator = DataLoader(training_set, batch_size=BATCH_SIZE, **GENERATOR_PARAMS)

test_batch_size = test_data.shape[0]
test_set = FormatDataset(test_data, np.asarray(test_labels_num))
testing_generator = DataLoader(test_set, batch_size=test_batch_size, **GENERATOR_PARAMS)

# predict
with torch.no_grad():
    print('-'*80)
    for idx, (feats_in, target_out) in enumerate(testing_generator):
        feats_in = feats_in.to(device).view(test_batch_size, 32, 32).unsqueeze(1)
        target_out = target_out.to(device)
        tag_scores = model(feats_in).to(device)
        predicted = torch.argmax(tag_scores, dim=1).detach()
        correct = sum([1 for pred,act in zip(predicted,target_out) if pred==act])
        accuracy = correct/test_batch_size
        print('Testing Accuracy: {}%'.format(round(accuracy*100,3)))
        
# selfies is numbered as 25
correct_selfies = sum([1 for pred,act in zip(predicted,target_out) if (pred==act) & (act==25)])
correct_PIE = correct - correct_selfies
total_selfies = 3
total_PIE = test_batch_size-total_selfies
print('Accuracy of NN for ALL test set: {}%'.format(round(accuracy*100,3)))
print('Accuracy of NN for PIE test set: {}%'.format(round((correct_PIE/total_PIE)*100,3)))
print('Accuracy of NN for selfies test set: {}%'.format(round((correct_selfies/total_selfies)*100,3)))

Epoch 0 --------------------------------------------------------------------------------
Step 0 | Training Loss: 11.669, Accuracy: 0.0%
Step 50 | Training Loss: nan, Accuracy: 10.0%
Step 100 | Training Loss: nan, Accuracy: 10.0%
Step 150 | Training Loss: nan, Accuracy: 0.0%
Step 200 | Training Loss: nan, Accuracy: 0.0%
Step 250 | Training Loss: nan, Accuracy: 0.0%
Average Accuracy: 3.792%
Epoch 1 --------------------------------------------------------------------------------
Step 0 | Training Loss: nan, Accuracy: 0.0%
Step 50 | Training Loss: nan, Accuracy: 10.0%
Step 100 | Training Loss: nan, Accuracy: 10.0%
Step 150 | Training Loss: nan, Accuracy: 0.0%
Step 200 | Training Loss: nan, Accuracy: 0.0%
Step 250 | Training Loss: nan, Accuracy: 10.0%
Average Accuracy: 3.792%
Epoch 2 --------------------------------------------------------------------------------
Step 0 | Training Loss: nan, Accuracy: 0.0%
Step 50 | Training Loss: nan, Accuracy: 0.0%
Step 100 | Training Loss: nan, Accuracy:

In [50]:
# 2. Reducing batch sizes
BATCH_SIZE=5

class CNN(nn.Module):
    def __init__(self, embed_dim, hidden_dim1, hidden_dim2, hidden_dim3, output_dim, kernel_size, pool_k_size, pool_stride):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(embed_dim, hidden_dim1, kernel_size=kernel_size)
        self.conv2 = nn.Conv2d(hidden_dim1, hidden_dim2, kernel_size=kernel_size)
        self.pool = nn.MaxPool2d(kernel_size=pool_k_size, stride=pool_stride)
        self.fc1 = nn.Linear(hidden_dim2*5*5, hidden_dim3)
        self.fc2 = nn.Linear(hidden_dim3, output_dim)
        
    def forward(self, feats_in):
        batch_size, e_, e_, s_ = feats_in.shape
        x = self.pool(F.relu(self.conv1(feats_in.float())))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, start_dim=1)
        x = F.relu(self.fc1(x))
        x = F.log_softmax(self.fc2(x),dim=1)
        return x

# load data
training_set = FormatDataset(train_data, np.asarray(train_labels_num))
training_generator = DataLoader(training_set, batch_size=BATCH_SIZE, **GENERATOR_PARAMS)

test_batch_size = test_data.shape[0]
test_set = FormatDataset(test_data, np.asarray(test_labels_num))
testing_generator = DataLoader(test_set, batch_size=test_batch_size, **GENERATOR_PARAMS)

# load model
model = CNN(embed_dim=1,
            hidden_dim1=20, 
            hidden_dim2=50,
            hidden_dim3=500,
            output_dim=26, 
            kernel_size=(5,5),
            pool_k_size=(2,2),
            pool_stride=2
           ).to(device)
loss_function = nn.CrossEntropyLoss().to(device)
optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE)

results_dict = {}
early_stopping = False

# train
for epoch in range(NUM_EPOCHS):
    print('Epoch {}'.format(epoch), '-'*80)
    results = {'train_loss': [], 'train_acc': []}
    for idx, (feats_in, target_out) in enumerate(training_generator):
        feats_in = feats_in.to(device).view(BATCH_SIZE, 32, 32).unsqueeze(1)
        target_out = target_out.to(device)
        model.zero_grad()
        tag_scores = model(feats_in).to(device)
        predicted = torch.argmax(tag_scores, dim=1).detach()
        correct = sum([1 for pred,act in zip(predicted,target_out) if pred==act])
        accuracy = correct/len(target_out)
        loss = loss_function(tag_scores, target_out)
        loss.backward()
        optimizer.step()

        results['train_loss'].append(loss.data.item())
        results['train_acc'].append(accuracy)

        if idx%PRINT_ROUND==0:
            print('Step {} | Training Loss: {}, Accuracy: {}%'.format(idx, round(loss.data.item(),3), round(accuracy*100,3)))

    results_dict[epoch] = results
    avg_acc = sum(results['train_acc'])/(idx+1)
    print('Average Accuracy: {}%'.format(round(avg_acc*100,3)))
    
# predict
with torch.no_grad():
    print('-'*80)
    for idx, (feats_in, target_out) in enumerate(testing_generator):
        feats_in = feats_in.to(device).view(test_batch_size, 32, 32).unsqueeze(1)
        target_out = target_out.to(device)
        tag_scores = model(feats_in).to(device)
        predicted = torch.argmax(tag_scores, dim=1).detach()
        correct = sum([1 for pred,act in zip(predicted,target_out) if pred==act])
        accuracy = correct/test_batch_size
        print('Testing Accuracy: {}%'.format(round(accuracy*100,3)))

Epoch 0 --------------------------------------------------------------------------------
Step 0 | Training Loss: 15.822, Accuracy: 0.0%
Step 50 | Training Loss: 3.286, Accuracy: 0.0%
Step 100 | Training Loss: 3.351, Accuracy: 0.0%
Step 150 | Training Loss: 3.267, Accuracy: 0.0%
Step 200 | Training Loss: 3.248, Accuracy: 20.0%
Step 250 | Training Loss: 3.303, Accuracy: 0.0%
Step 300 | Training Loss: 3.248, Accuracy: 0.0%
Step 350 | Training Loss: 3.27, Accuracy: 0.0%
Step 400 | Training Loss: 3.237, Accuracy: 0.0%
Step 450 | Training Loss: 3.266, Accuracy: 0.0%
Step 500 | Training Loss: 3.267, Accuracy: 20.0%
Step 550 | Training Loss: 3.251, Accuracy: 0.0%
Average Accuracy: 3.893%
Epoch 1 --------------------------------------------------------------------------------
Step 0 | Training Loss: 3.242, Accuracy: 0.0%
Step 50 | Training Loss: 3.239, Accuracy: 20.0%
Step 100 | Training Loss: 3.245, Accuracy: 0.0%
Step 150 | Training Loss: 3.253, Accuracy: 0.0%
Step 200 | Training Loss: 3.242,

Step 500 | Training Loss: 1.341, Accuracy: 60.0%
Step 550 | Training Loss: 0.595, Accuracy: 80.0%
Average Accuracy: 57.248%
Epoch 12 --------------------------------------------------------------------------------
Step 0 | Training Loss: 2.059, Accuracy: 20.0%
Step 50 | Training Loss: 0.346, Accuracy: 100.0%
Step 100 | Training Loss: 0.404, Accuracy: 80.0%
Step 150 | Training Loss: 0.01, Accuracy: 100.0%
Step 200 | Training Loss: 2.483, Accuracy: 20.0%
Step 250 | Training Loss: 1.584, Accuracy: 60.0%
Step 300 | Training Loss: 1.63, Accuracy: 60.0%
Step 350 | Training Loss: 3.126, Accuracy: 0.0%
Step 400 | Training Loss: 3.174, Accuracy: 0.0%
Step 450 | Training Loss: 2.658, Accuracy: 20.0%
Step 500 | Training Loss: 3.099, Accuracy: 0.0%
Step 550 | Training Loss: 2.699, Accuracy: 20.0%
Average Accuracy: 37.517%
Epoch 13 --------------------------------------------------------------------------------
Step 0 | Training Loss: 1.928, Accuracy: 60.0%
Step 50 | Training Loss: 2.543, Accuracy:

In [51]:
# 2. Increasing batch sizes
BATCH_SIZE=50

class CNN(nn.Module):
    def __init__(self, embed_dim, hidden_dim1, hidden_dim2, hidden_dim3, output_dim, kernel_size, pool_k_size, pool_stride):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(embed_dim, hidden_dim1, kernel_size=kernel_size)
        self.conv2 = nn.Conv2d(hidden_dim1, hidden_dim2, kernel_size=kernel_size)
        self.pool = nn.MaxPool2d(kernel_size=pool_k_size, stride=pool_stride)
        self.fc1 = nn.Linear(hidden_dim2*5*5, hidden_dim3)
        self.fc2 = nn.Linear(hidden_dim3, output_dim)
        
    def forward(self, feats_in):
        batch_size, e_, e_, s_ = feats_in.shape
        x = self.pool(F.relu(self.conv1(feats_in.float())))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, start_dim=1)
        x = F.relu(self.fc1(x))
        x = F.log_softmax(self.fc2(x),dim=1)
        return x

# load data
training_set = FormatDataset(train_data, np.asarray(train_labels_num))
training_generator = DataLoader(training_set, batch_size=BATCH_SIZE, **GENERATOR_PARAMS)

test_batch_size = test_data.shape[0]
test_set = FormatDataset(test_data, np.asarray(test_labels_num))
testing_generator = DataLoader(test_set, batch_size=test_batch_size, **GENERATOR_PARAMS)

# load model
model = CNN(embed_dim=1,
            hidden_dim1=20, 
            hidden_dim2=50,
            hidden_dim3=500,
            output_dim=26, 
            kernel_size=(5,5),
            pool_k_size=(2,2),
            pool_stride=2
           ).to(device)
loss_function = nn.CrossEntropyLoss().to(device)
optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE)

results_dict = {}
early_stopping = False

# train
for epoch in range(NUM_EPOCHS):
    print('Epoch {}'.format(epoch), '-'*80)
    results = {'train_loss': [], 'train_acc': []}
    for idx, (feats_in, target_out) in enumerate(training_generator):
        feats_in = feats_in.to(device).view(BATCH_SIZE, 32, 32).unsqueeze(1)
        target_out = target_out.to(device)
        model.zero_grad()
        tag_scores = model(feats_in).to(device)
        predicted = torch.argmax(tag_scores, dim=1).detach()
        correct = sum([1 for pred,act in zip(predicted,target_out) if pred==act])
        accuracy = correct/len(target_out)
        loss = loss_function(tag_scores, target_out)
        loss.backward()
        optimizer.step()

        results['train_loss'].append(loss.data.item())
        results['train_acc'].append(accuracy)

        if idx%PRINT_ROUND==0:
            print('Step {} | Training Loss: {}, Accuracy: {}%'.format(idx, round(loss.data.item(),3), round(accuracy*100,3)))

    results_dict[epoch] = results
    avg_acc = sum(results['train_acc'])/(idx+1)
    print('Average Accuracy: {}%'.format(round(avg_acc*100,3)))
    
# predict
with torch.no_grad():
    print('-'*80)
    for idx, (feats_in, target_out) in enumerate(testing_generator):
        feats_in = feats_in.to(device).view(test_batch_size, 32, 32).unsqueeze(1)
        target_out = target_out.to(device)
        tag_scores = model(feats_in).to(device)
        predicted = torch.argmax(tag_scores, dim=1).detach()
        correct = sum([1 for pred,act in zip(predicted,target_out) if pred==act])
        accuracy = correct/test_batch_size
        print('Testing Accuracy: {}%'.format(round(accuracy*100,3)))

Epoch 0 --------------------------------------------------------------------------------
Step 0 | Training Loss: 7.705, Accuracy: 6.0%
Step 50 | Training Loss: 2.776, Accuracy: 32.0%
Average Accuracy: 15.39%
Epoch 1 --------------------------------------------------------------------------------
Step 0 | Training Loss: 2.81, Accuracy: 18.0%
Step 50 | Training Loss: 0.812, Accuracy: 80.0%
Average Accuracy: 59.322%
Epoch 2 --------------------------------------------------------------------------------
Step 0 | Training Loss: 0.486, Accuracy: 88.0%
Step 50 | Training Loss: 0.479, Accuracy: 90.0%
Average Accuracy: 87.153%
Epoch 3 --------------------------------------------------------------------------------
Step 0 | Training Loss: 0.221, Accuracy: 94.0%
Step 50 | Training Loss: 0.198, Accuracy: 94.0%
Average Accuracy: 95.661%
Epoch 4 --------------------------------------------------------------------------------
Step 0 | Training Loss: 0.085, Accuracy: 98.0%
Step 50 | Training Loss: 0.

In [57]:
# 2. Increasing batch sizes
BATCH_SIZE=128
NUM_EPOCHS=300

class CNN(nn.Module):
    def __init__(self, embed_dim, hidden_dim1, hidden_dim2, hidden_dim3, output_dim, kernel_size, pool_k_size, pool_stride):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(embed_dim, hidden_dim1, kernel_size=kernel_size)
        self.conv2 = nn.Conv2d(hidden_dim1, hidden_dim2, kernel_size=kernel_size)
        self.pool = nn.MaxPool2d(kernel_size=pool_k_size, stride=pool_stride)
        self.fc1 = nn.Linear(hidden_dim2*5*5, hidden_dim3)
        self.fc2 = nn.Linear(hidden_dim3, output_dim)
        
    def forward(self, feats_in):
        batch_size, e_, e_, s_ = feats_in.shape
        x = self.pool(F.relu(self.conv1(feats_in.float())))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, start_dim=1)
        x = F.relu(self.fc1(x))
        x = F.log_softmax(self.fc2(x),dim=1)
        return x

# load data
training_set = FormatDataset(train_data, np.asarray(train_labels_num))
training_generator = DataLoader(training_set, batch_size=BATCH_SIZE, **GENERATOR_PARAMS)

test_batch_size = test_data.shape[0]
test_set = FormatDataset(test_data, np.asarray(test_labels_num))
testing_generator = DataLoader(test_set, batch_size=test_batch_size, **GENERATOR_PARAMS)

# load model
model = CNN(embed_dim=1,
            hidden_dim1=20, 
            hidden_dim2=50,
            hidden_dim3=500,
            output_dim=26, 
            kernel_size=(5,5),
            pool_k_size=(2,2),
            pool_stride=2
           ).to(device)
loss_function = nn.CrossEntropyLoss().to(device)
optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE)

results_dict = {}
early_stopping = False

# train
for epoch in range(NUM_EPOCHS):
#     print('Epoch {}'.format(epoch), '-'*80)
    results = {'train_loss': [], 'train_acc': []}
    for idx, (feats_in, target_out) in enumerate(training_generator):
        feats_in = feats_in.to(device).view(BATCH_SIZE, 32, 32).unsqueeze(1)
        target_out = target_out.to(device)
        model.zero_grad()
        tag_scores = model(feats_in).to(device)
        predicted = torch.argmax(tag_scores, dim=1).detach()
        correct = sum([1 for pred,act in zip(predicted,target_out) if pred==act])
        accuracy = correct/len(target_out)
        loss = loss_function(tag_scores, target_out)
        loss.backward()
        optimizer.step()

        results['train_loss'].append(loss.data.item())
        results['train_acc'].append(accuracy)

    if epoch%10==0:
        print('Epoch {} | Training Loss: {}, Accuracy: {}%'.format(epoch, round(loss.data.item(),3), round(accuracy*100,3)))
        if accuracy>0.99:
            break

    results_dict[epoch] = results
    avg_acc = sum(results['train_acc'])/(idx+1)
#     print('Average Accuracy: {}%'.format(round(avg_acc*100,3)))
    
# predict
with torch.no_grad():
    print('-'*80)
    for idx, (feats_in, target_out) in enumerate(testing_generator):
        feats_in = feats_in.to(device).view(test_batch_size, 32, 32).unsqueeze(1)
        target_out = target_out.to(device)
        tag_scores = model(feats_in).to(device)
        predicted = torch.argmax(tag_scores, dim=1).detach()
        correct = sum([1 for pred,act in zip(predicted,target_out) if pred==act])
        accuracy = correct/test_batch_size
        print('Testing Accuracy: {}%'.format(round(accuracy*100,3)))

Epoch 0 | Training Loss: 3.258, Accuracy: 7.031%
Epoch 10 | Training Loss: 2.282, Accuracy: 39.844%
Epoch 20 | Training Loss: 0.044, Accuracy: 99.219%
--------------------------------------------------------------------------------
Testing Accuracy: 96.557%


In [62]:
# 3. Comments on reproducibility of accuracies: Apply gradient clipping
from torch.nn.utils import clip_grad_value_

# revert values
BATCH_SIZE=10
NUM_EPOCHS=15

class CNN(nn.Module):
    def __init__(self, embed_dim, hidden_dim1, hidden_dim2, hidden_dim3, output_dim, kernel_size, pool_k_size, pool_stride):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(embed_dim, hidden_dim1, kernel_size=kernel_size)
        self.conv2 = nn.Conv2d(hidden_dim1, hidden_dim2, kernel_size=kernel_size)
        self.pool = nn.MaxPool2d(kernel_size=pool_k_size, stride=pool_stride)
        self.fc1 = nn.Linear(hidden_dim2*5*5, hidden_dim3)
        self.fc2 = nn.Linear(hidden_dim3, output_dim)
        
    def forward(self, feats_in):
        batch_size, e_, e_, s_ = feats_in.shape
        x = self.pool(F.relu(self.conv1(feats_in.float())))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, start_dim=1)
        x = F.relu(self.fc1(x))
        x = F.log_softmax(self.fc2(x),dim=1)
        return x


# load data
training_set = FormatDataset(train_data, np.asarray(train_labels_num))
training_generator = DataLoader(training_set, batch_size=BATCH_SIZE, **GENERATOR_PARAMS)

test_batch_size = test_data.shape[0]
test_set = FormatDataset(test_data, np.asarray(test_labels_num))
testing_generator = DataLoader(test_set, batch_size=test_batch_size, **GENERATOR_PARAMS)

########### RUN 1 #############
# load model
model = CNN(embed_dim=1,
            hidden_dim1=20, 
            hidden_dim2=50,
            hidden_dim3=500,
            output_dim=26, 
            kernel_size=(5,5),
            pool_k_size=(2,2),
            pool_stride=2
           ).to(device)
loss_function = nn.CrossEntropyLoss().to(device)
optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE)

results_dict = {}
early_stopping = False

# train
for epoch in range(NUM_EPOCHS):
    print('Epoch {}'.format(epoch), '-'*80)
    results = {'train_loss': [], 'train_acc': []}
    for idx, (feats_in, target_out) in enumerate(training_generator):
        feats_in = feats_in.to(device).view(BATCH_SIZE, 32, 32).unsqueeze(1)
        target_out = target_out.to(device)
        model.zero_grad()
        tag_scores = model(feats_in).to(device)
        predicted = torch.argmax(tag_scores, dim=1).detach()
        correct = sum([1 for pred,act in zip(predicted,target_out) if pred==act])
        accuracy = correct/len(target_out)
        loss = loss_function(tag_scores, target_out)
        loss.backward()
        clip_grad_value_(model.parameters(), 1)
        optimizer.step()

        results['train_loss'].append(loss.data.item())
        results['train_acc'].append(accuracy)

        if idx%PRINT_ROUND==0:
            print('Step {} | Training Loss: {}, Accuracy: {}%'.format(idx, round(loss.data.item(),3), round(accuracy*100,3)))

    results_dict[epoch] = results
    avg_acc = sum(results['train_acc'])/(idx+1)
    print('Average Accuracy: {}%'.format(round(avg_acc*100,3)))
    
# predict
with torch.no_grad():
    print('-'*80)
    for idx, (feats_in, target_out) in enumerate(testing_generator):
        feats_in = feats_in.to(device).view(test_batch_size, 32, 32).unsqueeze(1)
        target_out = target_out.to(device)
        tag_scores = model(feats_in).to(device)
        predicted = torch.argmax(tag_scores, dim=1).detach()
        correct = sum([1 for pred,act in zip(predicted,target_out) if pred==act])
        accuracy = correct/test_batch_size
        print('Testing Accuracy: {}%'.format(round(accuracy*100,3)))
        
########### RUN 2 #############
# load model
model = CNN(embed_dim=1,
            hidden_dim1=20, 
            hidden_dim2=50,
            hidden_dim3=500,
            output_dim=26, 
            kernel_size=(5,5),
            pool_k_size=(2,2),
            pool_stride=2
           ).to(device)
loss_function = nn.CrossEntropyLoss().to(device)
optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE)

results_dict = {}
early_stopping = False

# train
for epoch in range(NUM_EPOCHS):
    print('Epoch {}'.format(epoch), '-'*80)
    results = {'train_loss': [], 'train_acc': []}
    for idx, (feats_in, target_out) in enumerate(training_generator):
        feats_in = feats_in.to(device).view(BATCH_SIZE, 32, 32).unsqueeze(1)
        target_out = target_out.to(device)
        model.zero_grad()
        tag_scores = model(feats_in).to(device)
        predicted = torch.argmax(tag_scores, dim=1).detach()
        correct = sum([1 for pred,act in zip(predicted,target_out) if pred==act])
        accuracy = correct/len(target_out)
        loss = loss_function(tag_scores, target_out)
        loss.backward()
        clip_grad_value_(model.parameters(), 1)
        optimizer.step()

        results['train_loss'].append(loss.data.item())
        results['train_acc'].append(accuracy)

        if idx%PRINT_ROUND==0:
            print('Step {} | Training Loss: {}, Accuracy: {}%'.format(idx, round(loss.data.item(),3), round(accuracy*100,3)))

    results_dict[epoch] = results
    avg_acc = sum(results['train_acc'])/(idx+1)
    print('Average Accuracy: {}%'.format(round(avg_acc*100,3)))
    
# predict
with torch.no_grad():
    print('-'*80)
    for idx, (feats_in, target_out) in enumerate(testing_generator):
        feats_in = feats_in.to(device).view(test_batch_size, 32, 32).unsqueeze(1)
        target_out = target_out.to(device)
        tag_scores = model(feats_in).to(device)
        predicted = torch.argmax(tag_scores, dim=1).detach()
        correct = sum([1 for pred,act in zip(predicted,target_out) if pred==act])
        accuracy = correct/test_batch_size
        print('Testing Accuracy: {}%'.format(round(accuracy*100,3)))
        
########### RUN 3 #############
# load model
model = CNN(embed_dim=1,
            hidden_dim1=20, 
            hidden_dim2=50,
            hidden_dim3=500,
            output_dim=26, 
            kernel_size=(5,5),
            pool_k_size=(2,2),
            pool_stride=2
           ).to(device)
loss_function = nn.CrossEntropyLoss().to(device)
optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE)

results_dict = {}
early_stopping = False

# train
for epoch in range(NUM_EPOCHS):
    print('Epoch {}'.format(epoch), '-'*80)
    results = {'train_loss': [], 'train_acc': []}
    for idx, (feats_in, target_out) in enumerate(training_generator):
        feats_in = feats_in.to(device).view(BATCH_SIZE, 32, 32).unsqueeze(1)
        target_out = target_out.to(device)
        model.zero_grad()
        tag_scores = model(feats_in).to(device)
        predicted = torch.argmax(tag_scores, dim=1).detach()
        correct = sum([1 for pred,act in zip(predicted,target_out) if pred==act])
        accuracy = correct/len(target_out)
        loss = loss_function(tag_scores, target_out)
        loss.backward()
        clip_grad_value_(model.parameters(), 1)
        optimizer.step()

        results['train_loss'].append(loss.data.item())
        results['train_acc'].append(accuracy)

        if idx%PRINT_ROUND==0:
            print('Step {} | Training Loss: {}, Accuracy: {}%'.format(idx, round(loss.data.item(),3), round(accuracy*100,3)))

    results_dict[epoch] = results
    avg_acc = sum(results['train_acc'])/(idx+1)
    print('Average Accuracy: {}%'.format(round(avg_acc*100,3)))
    
# predict
with torch.no_grad():
    print('-'*80)
    for idx, (feats_in, target_out) in enumerate(testing_generator):
        feats_in = feats_in.to(device).view(test_batch_size, 32, 32).unsqueeze(1)
        target_out = target_out.to(device)
        tag_scores = model(feats_in).to(device)
        predicted = torch.argmax(tag_scores, dim=1).detach()
        correct = sum([1 for pred,act in zip(predicted,target_out) if pred==act])
        accuracy = correct/test_batch_size
        print('Testing Accuracy: {}%'.format(round(accuracy*100,3)))

Epoch 0 --------------------------------------------------------------------------------
Step 0 | Training Loss: 8.499, Accuracy: 10.0%
Step 50 | Training Loss: 3.262, Accuracy: 0.0%
Step 100 | Training Loss: 3.227, Accuracy: 20.0%
Step 150 | Training Loss: 2.929, Accuracy: 10.0%
Step 200 | Training Loss: 2.837, Accuracy: 20.0%
Step 250 | Training Loss: 3.35, Accuracy: 20.0%
Average Accuracy: 22.584%
Epoch 1 --------------------------------------------------------------------------------
Step 0 | Training Loss: 0.483, Accuracy: 90.0%
Step 50 | Training Loss: 0.793, Accuracy: 70.0%
Step 100 | Training Loss: 1.341, Accuracy: 70.0%
Step 150 | Training Loss: 0.912, Accuracy: 60.0%
Step 200 | Training Loss: 0.613, Accuracy: 90.0%
Step 250 | Training Loss: 1.007, Accuracy: 50.0%
Average Accuracy: 76.242%
Epoch 2 --------------------------------------------------------------------------------
Step 0 | Training Loss: 0.059, Accuracy: 100.0%
Step 50 | Training Loss: 0.219, Accuracy: 90.0%
Step 

Average Accuracy: 95.705%
Epoch 5 --------------------------------------------------------------------------------
Step 0 | Training Loss: 0.09, Accuracy: 100.0%
Step 50 | Training Loss: 0.002, Accuracy: 100.0%
Step 100 | Training Loss: 0.082, Accuracy: 100.0%
Step 150 | Training Loss: 0.068, Accuracy: 100.0%
Step 200 | Training Loss: 0.019, Accuracy: 100.0%
Step 250 | Training Loss: 0.015, Accuracy: 100.0%
Average Accuracy: 98.054%
Epoch 6 --------------------------------------------------------------------------------
Step 0 | Training Loss: 0.039, Accuracy: 100.0%
Step 50 | Training Loss: 0.012, Accuracy: 100.0%
Step 100 | Training Loss: 0.001, Accuracy: 100.0%
Step 150 | Training Loss: 0.17, Accuracy: 90.0%
Step 200 | Training Loss: 0.0, Accuracy: 100.0%
Step 250 | Training Loss: 0.002, Accuracy: 100.0%
Average Accuracy: 99.329%
Epoch 7 --------------------------------------------------------------------------------
Step 0 | Training Loss: 0.0, Accuracy: 100.0%
Step 50 | Training L

Average Accuracy: 99.698%
Epoch 10 --------------------------------------------------------------------------------
Step 0 | Training Loss: 0.001, Accuracy: 100.0%
Step 50 | Training Loss: 0.0, Accuracy: 100.0%
Step 100 | Training Loss: 0.0, Accuracy: 100.0%
Step 150 | Training Loss: 0.0, Accuracy: 100.0%
Step 200 | Training Loss: 0.0, Accuracy: 100.0%
Step 250 | Training Loss: 0.0, Accuracy: 100.0%
Average Accuracy: 100.0%
Epoch 11 --------------------------------------------------------------------------------
Step 0 | Training Loss: 0.0, Accuracy: 100.0%
Step 50 | Training Loss: 0.001, Accuracy: 100.0%
Step 100 | Training Loss: 0.0, Accuracy: 100.0%
Step 150 | Training Loss: 0.0, Accuracy: 100.0%
Step 200 | Training Loss: 0.0, Accuracy: 100.0%
Step 250 | Training Loss: 0.0, Accuracy: 100.0%
Average Accuracy: 100.0%
Epoch 12 --------------------------------------------------------------------------------
Step 0 | Training Loss: 0.0, Accuracy: 100.0%
Step 50 | Training Loss: 0.001, Ac