In [1]:
import torchvision
from glob import glob
from torch.utils.data import Dataset

import torch
import torch.nn as nn
import numpy as np
import pandas as pd

### Dataset class for getting positive and negative classes for training

### Creating a dictionary with (Family) \\\\ (ID) as the key and the path to the images under that one person

In [2]:
import pandas as pd
from Utils.DatasetClass import SmileDataset
import os
import random

train_file_path = "./train_relationships.csv"
train_images_path = "./train/"

all_images = glob(train_images_path + "*/*/*.jpg")
train_person_to_images = {}
val_person_to_images = {}

# Getting 0.1 of the total training as validation
percentage_val = 0.1
train_names = [folder for folder in os.listdir(train_images_path) if  os.path.isdir(os.path.join(train_images_path, folder))]
val_families = random.sample(train_names, int(percentage_val * len(train_names)))

train_images = []
val_images = []

for x in all_images:

    if x.split("\\")[-3] not in val_families:

        if x.split("\\")[-3] + "/" + x.split("\\")[-2] not in train_person_to_images:
            train_person_to_images[x.split("\\")[-3] + "/" + x.split("\\")[-2]] = [x]

        else:
            train_person_to_images[x.split("\\")[-3] + "/" + x.split("\\")[-2]].append(x)

        train_images.append(x)
    
    else:
        if x.split("\\")[-3] + "/" + x.split("\\")[-2] not in val_person_to_images:
            val_person_to_images[x.split("\\")[-3] + "/" + x.split("\\")[-2]] = [x]

        else:
            val_person_to_images[x.split("\\")[-3] + "/" + x.split("\\")[-2]].append(x)

        val_images.append(x)

train_people = [x.split("\\")[-3] + "/" + x.split("\\")[-2] for x in train_images]
train_people = list(dict.fromkeys(train_people)) # removing the duplicates

val_people = [x.split("\\")[-3] + "/" + x.split("\\")[-2] for x in val_images]
val_people = list(dict.fromkeys(val_people)) # removing the duplicates

relationships = pd.read_csv(train_file_path)
relationships = list(zip(relationships.p1.values, relationships.p2.values))

train_relationships = [x for x in relationships if x[0] in train_people and x[1] in train_people] #Check if people are in the training dataset
val_relationships = [x for x in relationships if x[0] in val_people and x[1] in val_people]

### Instantiate the SmileDataset class

In [3]:
from torch.utils.data import DataLoader

train_dataset = SmileDataset(relations = train_relationships, person_to_image= train_person_to_images)
trainloader = DataLoader(train_dataset, batch_size= 100, shuffle = True)

val_dataset = SmileDataset(relations = val_relationships, person_to_image= val_person_to_images)
valloader = DataLoader(val_dataset, batch_size= 100, shuffle = True)

### Training time!! :) (Work in progress)

In [4]:
def validate(model, valloader, val_dataset, device, criterion):
    model.eval()
    val_loss = 0.0
    running_corrects = 0
    
    for batch in valloader:
        tensor1, tensor2, label = batch
        tensor1, tensor2, label = tensor1.to(device), tensor2.to(device), label.float().view(-1,1).to(device)
        with torch.no_grad():
            output = model(tensor1, tensor2)
            preds = output>0.5
            loss = criterion(output, label)
            
        val_loss += loss.item()
        running_corrects += torch.sum(preds == (label>0.5))
    
    val_loss /= len(val_dataset)
    val_acc = running_corrects.item()/len(val_dataset)

    return val_loss, val_acc

def train(model, trainloader, train_dataset, optimizer, device, criterion):
    train_loss = 0.0
    running_loss = 0.0
    running_corrects = 0

    for batch in trainloader:
        optimizer.zero_grad()
        
        tensor1, tensor2, label = batch
        tensor1, tensor2, label = tensor1.to(device), tensor2.to(device), label.float().view(-1,1).to(device)
        output = model(tensor1, tensor2)

        preds = output>0.5
        
        loss = criterion(output, label)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
        running_loss += loss.item()
        running_corrects += torch.sum(preds == label)

    train_loss /= len(train_dataset)
    train_acc = running_corrects.item()/len(train_dataset)

    return train_loss, train_acc

In [10]:
from Utils.SiameseNet import SiameseNet, SiameseNet_large, MultiEncoding_SiameseNet, MultiEncoding_SiameseNet_Large
import os

name  = 'MultiEncoding_SiameseNet_large'

lr = 0.001

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = MultiEncoding_SiameseNet_Large().to(device)
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(params= model.parameters(), lr = lr)

num_epoch = 100
best_epoch = 0
best_val_acc = 0

history = []
accuracy = []

for epoch in range(num_epoch):
    
    train_loss, train_acc = train(model, trainloader, train_dataset, optimizer, device, criterion)
    val_loss, val_acc  = validate(model, valloader, val_dataset, device, criterion)

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        best_epoch = epoch

        save_path = os.getcwd() +'\\models\\{}_best.pt'.format(name)
        torch.save(model.state_dict(), save_path)


    print('[{}], \tval loss: {:.5}\tacc: {:.5}'.format(epoch+1, val_loss, val_acc))
    print('[{}], \ttrain loss: {:.5}\tacc: {:.5}'.format(epoch+1, train_loss, train_acc))

[1], 	val loss: 0.0066486	acc: 0.6653
[1], 	train loss: 0.0058163	acc: 0.66939
[2], 	val loss: 0.00643	acc: 0.65164
[2], 	train loss: 0.0054019	acc: 0.70244
[3], 	val loss: 0.0061044	acc: 0.68989
[3], 	train loss: 0.005218	acc: 0.71061
[4], 	val loss: 0.0060328	acc: 0.70902
[4], 	train loss: 0.0051097	acc: 0.72613
[5], 	val loss: 0.0065308	acc: 0.66393
[5], 	train loss: 0.0050355	acc: 0.73081
[6], 	val loss: 0.0066831	acc: 0.63661
[6], 	train loss: 0.0048584	acc: 0.73982
[7], 	val loss: 0.0061191	acc: 0.64617
[7], 	train loss: 0.0048953	acc: 0.74316
[8], 	val loss: 0.0069172	acc: 0.6776
[8], 	train loss: 0.0047343	acc: 0.75334
[9], 	val loss: 0.0068181	acc: 0.62295
[9], 	train loss: 0.0047194	acc: 0.75517
[10], 	val loss: 0.0069646	acc: 0.6571
[10], 	train loss: 0.0045373	acc: 0.76552
[11], 	val loss: 0.0070143	acc: 0.66257
[11], 	train loss: 0.0045079	acc: 0.77303
[12], 	val loss: 0.0066945	acc: 0.65164
[12], 	train loss: 0.0042551	acc: 0.78555
[13], 	val loss: 0.0071505	acc: 0.65984


In [11]:
print('best epoch : {}'.format(best_epoch+1))

best epoch : 4


In [12]:
save_path = os.getcwd() +'\\models\\{}_epoch{}.pt'.format(name, epoch+1)
torch.save(model.state_dict(), save_path)