In [1]:
import torch
import torch.optim as optim
import torch.utils.data
import torch.backends.cudnn as cudnn
from torch.utils.data import random_split
from torch.utils.data.dataset import Dataset
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
import os

import numpy as np
from sklearn.model_selection import train_test_split

In [2]:
import torchvision
from torchvision import transforms, datasets

In [3]:
# Parse file nums to dicts
annotation_file_path = os.getcwd() + '/../data/annotations'
annotation_ids = []
for file in os.listdir(annotation_file_path):
    filename = os.fsdecode(file)
    file_path = os.path.join(annotation_file_path, filename)

    with open(file_path, 'r') as f:
        for line in f.readlines():
            idx = int(line.replace('\n', ''))
            annotation_ids.append(idx)
            
annotation_ids = list(set(annotation_ids))
train_ids, dev_ids = train_test_split(annotation_ids, test_size=0.2)

In [86]:
#--- hyperparameters ---
N_EPOCHS = 30
BATCH_SIZE_TRAIN = 128
BATCH_SIZE_DEV = 16
LR = 0.001

In [5]:
NUM_CLASSES = 14
DATA_DIR = os.getcwd() + '/../data/'

In [6]:
from collections import defaultdict

class ProjectDataset(Dataset):
    def __init__(self, image_folder, annotation_folder, selected_keys=None, transform=None, target_transform=None):
        data = defaultdict(list)
        self.data = []
        self.classes = []
        
        for file in os.listdir(annotation_folder):
            filename = os.fsdecode(file)
            cls = filename.split('.')[0]
            self.classes.append(cls)
            file_path = os.path.join(annotation_folder, filename)
            
            with open(file_path, 'r') as f:
                for line in f.readlines():
                    idx = int(line.replace('\n', ''))
                    data[idx].append(cls)
        
        for key, val in data.items():
            if not selected_keys or key in selected_keys:
                filename = f'im{key}.jpg'
                labels = torch.Tensor([x in val for x in self.classes])
                self.data.append({'file': filename, 'labels': labels})
        
        self.image_folder = image_folder
        self.transform = transform
        self.target_transform = target_transform
    
    def __getitem__(self, item):
        data = self.data[item]
        labels = data['labels']
        image_path = os.path.join(self.image_folder, data['file'])
        image = torchvision.io.read_image(image_path, torchvision.io.ImageReadMode.RGB)
        image = image.type(torch.FloatTensor)
        image = torch.div(image, 255)
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            labels = self.target_transform(labels)
            
        return image, labels
    
    def __len__(self):
        return len(self.data)

In [7]:
# We transform image files' contents to tensors
# Plus, we can add random transformations to the training data if we like
# Think on what kind of transformations may be meaningful for this data.
# Eg., horizontal-flip is definitely a bad idea for sign language data.
# You can use another transformation here if you find a better one.
train_transform = transforms.Compose([transforms.RandomHorizontalFlip(),
                                      #transforms.RandomPerspective(distortion_scale=0.2),
                                      #transforms.ColorJitter(),
                                      #transforms.ToTensor()
                                     ])
dev_transform = transforms.Compose([])

annotation_path = DATA_DIR + 'annotations'
image_path = DATA_DIR + 'images'
train_set = ProjectDataset(image_path, annotation_path, selected_keys=train_ids, transform=train_transform)
dev_set = ProjectDataset(image_path, annotation_path, selected_keys=dev_ids, transform=dev_transform)
# test_set  = datasets.ImageFolder(DATA_DIR % 'test',  transform=test_transform)


# Create Pytorch data loaders
train_loader = torch.utils.data.DataLoader(dataset=train_set, batch_size=BATCH_SIZE_TRAIN, shuffle=True)
dev_loader = torch.utils.data.DataLoader(dataset=dev_set, batch_size=BATCH_SIZE_DEV, shuffle=True)

In [8]:
#--- model ---
class CNN(nn.Module):
    def __init__(self, num_classes=NUM_CLASSES):
        super(CNN, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=5, padding=2),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(16, 32, kernel_size=5, padding=2),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.fc = nn.Linear(32768, NUM_CLASSES)
        self.sigm = nn.Sigmoid()

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        out = self.sigm(out)
        return out

In [69]:
def calc_batch_metrics(targets, preds):
    false_neg = 0
    false_pos = 0
    true_neg = 0
    true_pos = 0
    preds_bin = (preds > 0.5).float()
    
    for t_labels, p_labels in zip(targets, preds_bin):
            for t, p in zip(t_labels, p_labels):
                if(t == 1 and p == 0):
                    false_neg = false_neg + 1
                if(t == 0 and p == 1):
                    false_pos = false_pos + 1
                if(t == 0 and p == 0):
                    true_neg = true_neg + 1
                if(t == 1 and p == 1):
                    true_pos = true_pos + 1
    recall = true_pos / (true_pos + false_pos)
    precision = true_pos / (true_pos + false_neg)
    f1_score = 2 * (precision*recall / (precision+recall))
    return precision, recall, f1_score

In [None]:
#--- set up ---
if torch.cuda.is_available():
    device = torch.device('cuda')
    print('using gpu')
else:
    print('using cpu')
    device = torch.device('cpu')

model = CNN().to(device)

# OPTIMIZERS & REGURALIZATION
optimizer = optim.Adam(model.parameters(), lr=LR, weight_decay=0.05)
loss_function = nn.BCELoss()

#--- TRAINING ---

previous_train_loss = 1000000

batch_losses = []

for epoch in range(N_EPOCHS):
    train_loss = 0
    train_correct = 0
    total = 0
    for batch_num, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        
        # Compute prediction error
        optimizer.zero_grad()
        pred = model(data)
        loss = loss_function(pred, target)

        train_loss += loss.item()
                
        batch_losses.append(loss.item())
        
        total += len(target)
        train_correct += torch.sum((pred > 0.5).float() == target) / torch.sum(target)
        
        precision, recall, f1_score = calc_batch_metrics(target, pred)
        
        # Backpropagation
        loss.backward()
        optimizer.step()

        print('\r', 'Training: Epoch %d - Batch %d/%d: Loss: %.4f | Acc: %.3f%% (%d/%d)' % 
              (epoch, batch_num, len(train_loader), train_loss / (batch_num + 1), 
               100. * train_correct / total, train_correct, total), end='')
        print('---')
        print('Preicision: ' , precision)
        print('Recall: ' , recall) 
        print('F1 Score: ' , f1_score)
        print('---')
    
    #print()
    #dev_loss = 0
    #dev_total = 0
    #dev_correct = 0
    
    #for batch_num, (data, target) in enumerate(dev_loader):
    #    data, target = data.to(device), target.to(device)
    #            
        # Compute prediction error
    #    pred = model(data)
    #    loss = loss_function(pred, target)
    #    dev_loss += loss.item()
    #    
    #    dev_total += len(target)
    #    dev_correct += torch.sum((pred > 0.5).float() == target) / torch.sum(target)
    #    
    #    print('\r', 'Dev: Epoch %d - Batch %d/%d: Loss: %.4f | Acc: %.3f%% (%d/%d)' % 
    #          (epoch, batch_num, len(dev_loader), dev_loss / (batch_num + 1), 
    #           100. * dev_correct / dev_total, dev_correct, dev_total), end='')
    
    print()
    print("Epoch train loss: " + str(train_loss))
    print("Previous epoch dev loss: " + str(previous_train_loss))
    print()
    
    # EARLY STOPPING
    if train_loss > previous_train_loss:
        print('early stopping')
        break
        
    previous_train_loss = train_loss

plt.figure(figsize=(7, 7))
plt.plot(batch_losses)
plt.show()

using cpu


In [84]:
from sklearn.metrics import confusion_matrix
import seaborn as sn
import pandas as pd
import numpy as np

#--- train ---
test_loss = 0
test_correct = 0
test_total = 0

test_precision = []
test_recall = []
test_f1_score = []

test_acc_all = 0

preds = []
targets = []

with torch.no_grad():
    for batch_num, (data, target) in enumerate(dev_loader):
        data, target = data.to(device), target.to(device)
        
        pred = model(data)
        loss = loss_function(pred, target)
        
        test_loss += loss.item()
        test_total += len(target)
        test_correct += torch.sum((pred > 0.5).float() == target) / torch.sum(target)
        preds.extend(pred.argmax(1).cpu().numpy())
        targets.extend(target.cpu().numpy())
        
        test_acc_all += (test_correct / total)
        
        precision, recall, f1_score = calc_batch_metrics(target, pred)
        
        test_precision.append(precision)
        test_recall.append(recall)
        test_f1_score.append(f1_score)


        print('Evaluating: Batch %d/%d: Loss: %.4f | Test Acc: %.3f%% (%d/%d)' % 
              (batch_num, len(dev_loader), test_loss / (batch_num + 1), 
               100. * test_correct / test_total, test_correct, test_total))
    
    #cf_matrix = confusion_matrix(targets, preds)
    #df_cm = pd.DataFrame(cf_matrix / np.sum(cf_matrix) * 1000, 
    #                     index = [i for i in annotation_dict_train.keys()],
    #                     columns=[i for i in annotation_dict_train.keys()])
    #plt.figure(figsize=(12, 7))
    #sn.heatmap(df_cm, annot=True)

    
# print('AVG test acc:' + str(test_acc_all*100/len(dev_loader)) + '%')
print('AVG test precision:' + str(np.mean(test_precision)))
print('AVG test recall:' + str(np.mean(test_recall)))
print('AVG test f1 score:' + str(np.mean(test_f1_score)))

Evaluating: Batch 0/128: Loss: 0.3735 | Test Acc: 45.370% (7/16)
Evaluating: Batch 1/128: Loss: 0.3507 | Test Acc: 42.040% (13/32)
Evaluating: Batch 2/128: Loss: 0.4516 | Test Acc: 44.965% (21/48)
Evaluating: Batch 3/128: Loss: 0.4188 | Test Acc: 42.101% (26/64)
Evaluating: Batch 4/128: Loss: 0.4010 | Test Acc: 41.664% (33/80)
Evaluating: Batch 5/128: Loss: 0.4159 | Test Acc: 40.876% (39/96)
Evaluating: Batch 6/128: Loss: 0.4568 | Test Acc: 40.537% (45/112)
Evaluating: Batch 7/128: Loss: 0.5004 | Test Acc: 39.823% (50/128)
Evaluating: Batch 8/128: Loss: 0.4926 | Test Acc: 39.401% (56/144)
Evaluating: Batch 9/128: Loss: 0.4829 | Test Acc: 39.482% (63/160)
Evaluating: Batch 10/128: Loss: 0.4777 | Test Acc: 39.233% (69/176)
Evaluating: Batch 11/128: Loss: 0.4782 | Test Acc: 38.958% (74/192)
Evaluating: Batch 12/128: Loss: 0.4841 | Test Acc: 38.891% (80/208)
Evaluating: Batch 13/128: Loss: 0.4933 | Test Acc: 39.131% (87/224)
Evaluating: Batch 14/128: Loss: 0.4823 | Test Acc: 38.997% (93/24

Evaluating: Batch 120/128: Loss: 0.5214 | Test Acc: 39.097% (756/1936)
Evaluating: Batch 121/128: Loss: 0.5206 | Test Acc: 39.097% (763/1952)
Evaluating: Batch 122/128: Loss: 0.5203 | Test Acc: 39.111% (769/1968)
Evaluating: Batch 123/128: Loss: 0.5196 | Test Acc: 39.065% (775/1984)
Evaluating: Batch 124/128: Loss: 0.5186 | Test Acc: 39.031% (780/2000)
Evaluating: Batch 125/128: Loss: 0.5180 | Test Acc: 39.102% (788/2016)
Evaluating: Batch 126/128: Loss: 0.5176 | Test Acc: 39.114% (794/2032)
Evaluating: Batch 127/128: Loss: 0.5178 | Test Acc: 39.294% (800/2036)
AVG test precision:0.3067263491239578
AVG test recall:0.5466925864480765
AVG test f1 score:0.38982092041021255
