In [1]:
import torchvision
from glob import glob
from torch.utils.data import Dataset

import torch
import torch.nn as nn
import numpy as np
import pandas as pd

### Dataset class for getting positive and negative classes for training

### Creating a dictionary with (Family) \\\\ (ID) as the key and the path to the images under that one person

In [2]:
import pandas as pd
from Utils.DatasetClass import SmileDataset
import os
import random

train_file_path = "./train_relationships.csv"
train_images_path = "./data/baseline/train/"

all_images = glob(train_images_path + "*/*/*.jpg")


train_person_to_images = {}
val_person_to_images = {}

# Getting 0.1 of the total training as validation
percentage_val = 0.1
train_names = [folder for folder in os.listdir(train_images_path) if  os.path.isdir(os.path.join(train_images_path, folder))]
val_families = random.sample(train_names, int(percentage_val * len(train_names)))

train_images = []
val_images = []

for x in all_images:

    if x.split("\\")[-3] not in val_families:

        if x.split("\\")[-3] + "/" + x.split("\\")[-2] not in train_person_to_images:
            train_person_to_images[x.split("\\")[-3] + "/" + x.split("\\")[-2]] = [x]

        else:
            train_person_to_images[x.split("\\")[-3] + "/" + x.split("\\")[-2]].append(x)

        train_images.append(x)
    
    else:
        if x.split("\\")[-3] + "/" + x.split("\\")[-2] not in val_person_to_images:
            val_person_to_images[x.split("\\")[-3] + "/" + x.split("\\")[-2]] = [x]

        else:
            val_person_to_images[x.split("\\")[-3] + "/" + x.split("\\")[-2]].append(x)

        val_images.append(x)

train_people = [x.split("\\")[-3] + "/" + x.split("\\")[-2] for x in train_images]
train_people = list(dict.fromkeys(train_people)) # removing the duplicates

val_people = [x.split("\\")[-3] + "/" + x.split("\\")[-2] for x in val_images]
val_people = list(dict.fromkeys(val_people)) # removing the duplicates

relationships = pd.read_csv(train_file_path)
relationships = list(zip(relationships.p1.values, relationships.p2.values))

#relationships = [x for x in relationships for _ in range(2)] #Adding more data

train_relationships = [x for x in relationships if x[0] in train_people and x[1] in train_people] #Check if people are in the training dataset
val_relationships = [x for x in relationships if x[0] in val_people and x[1] in val_people]

In [3]:
len(relationships)

3598

In [4]:
def validate(model, valloader, val_dataset, device, criterion):
    model.eval()
    val_loss = 0.0
    running_corrects = 0
    
    for batch in valloader:
        tensor1, tensor2, label = batch
        tensor1, tensor2, label = tensor1.to(device), tensor2.to(device), label.float().view(-1,1).to(device)
        with torch.no_grad():
            output = model(tensor1, tensor2)
            preds = output>0.5
            loss = criterion(output, label)
            
        val_loss += loss.item()
        running_corrects += torch.sum(preds == (label>0.5))
    
    val_loss /= len(val_dataset)
    val_acc = running_corrects.item()/len(val_dataset)

    return val_loss, val_acc

def train(model, trainloader, train_dataset, optimizer, device, criterion, scheduler = None):
    train_loss = 0.0
    running_loss = 0.0
    running_corrects = 0

    for batch in trainloader:
        optimizer.zero_grad()
        
        tensor1, tensor2, label = batch
        tensor1, tensor2, label = tensor1.to(device), tensor2.to(device), label.float().view(-1,1).to(device)
        output = model(tensor1, tensor2)

        preds = output>0.5
        
        loss = criterion(output, label)
        loss.backward()
        optimizer.step()

        if scheduler:
            scheduler.step()
        
        train_loss += loss.item()
        running_loss += loss.item()
        running_corrects += torch.sum(preds == label)

    train_loss /= len(train_dataset)
    train_acc = running_corrects.item()/len(train_dataset)

    return train_loss, train_acc

In [8]:
from Utils.SiameseNet import SiameseNet, SiameseNet_large, MultiEncoding_SiameseNet, MultiEncoding_SiameseNet_Large
from Utils.SiameseNetLargeLarge import MultiEncoding_SiameseNet_LargeLarge
from torch.optim.lr_scheduler import StepLR
import os
from Utils.EarlyStopper import EarlyStopper

## Random sampling and creation of models

In [9]:
from torch.utils.data import DataLoader

n = 10
sample_percentage = 0.7
length_of_train = len(train_relationships)

batch_size = 128

val_dataset = SmileDataset(relations = val_relationships, person_to_image= val_person_to_images, rgb = True)
valloader = DataLoader(val_dataset, batch_size= batch_size, shuffle = True)

num_epoch = 50
patience = 5

accuracy = []

name  = 'MultiEncoding_SiameseNet_Large_Random_Sample'

for m in range(n):

    print('[Random Sample {}] :'.format(m+1))

    # Get random sample of rtrain data
    random_sample = random.sample(train_relationships, round(sample_percentage * length_of_train))
    train_dataset = SmileDataset(relations = train_relationships, person_to_image= train_person_to_images, rgb= True)
    trainloader = DataLoader(train_dataset, batch_size= batch_size, shuffle = True)

    best_val_acc = 0
    best_epoch = 0

    # Instantiate the model and related stuff
    lr = 0.001
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = MultiEncoding_SiameseNet_Large().to(device)
    criterion = nn.BCEWithLogitsLoss()
    optimizer = torch.optim.Adam(params= model.parameters(), lr = lr)
    early_stopper = EarlyStopper(patience=patience, min_delta=0)

    for epoch in range(num_epoch):
        
        train_loss, train_acc = train(model, trainloader, train_dataset, optimizer, device, criterion)
        val_loss, val_acc  = validate(model, valloader, val_dataset, device, criterion)

        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_epoch = epoch

            save_path = os.getcwd() +'\\models\\random_sample\\{}_best_bagging{}.pt'.format(name, m+1)
            torch.save(model.state_dict(), save_path)


        print('[{}], \tval loss: {:.5}\tacc: {:.5}'.format(epoch+1, val_loss, val_acc))
        print('[{}], \ttrain loss: {:.5}\tacc: {:.5}'.format(epoch+1, train_loss, train_acc))

        if early_stopper.early_stop(val_loss):
            print("Done! Early stopped at {}".format(epoch+1))
            break

[Random Sample 1] :
[1], 	val loss: 0.0055463	acc: 0.69723
[1], 	train loss: 0.0047436	acc: 0.64806
[2], 	val loss: 0.00521	acc: 0.6955
[2], 	train loss: 0.0044345	acc: 0.68663
[3], 	val loss: 0.0049496	acc: 0.72318
[3], 	train loss: 0.0042823	acc: 0.69801
[4], 	val loss: 0.0044767	acc: 0.73875
[4], 	train loss: 0.0042392	acc: 0.71152
[5], 	val loss: 0.0053653	acc: 0.67993
[5], 	train loss: 0.0041701	acc: 0.72486
[6], 	val loss: 0.0052733	acc: 0.69031
[6], 	train loss: 0.0043325	acc: 0.69964
[7], 	val loss: 0.0047519	acc: 0.72837
[7], 	train loss: 0.0041883	acc: 0.71412
[8], 	val loss: 0.0051396	acc: 0.69723
[8], 	train loss: 0.0040615	acc: 0.72502
[9], 	val loss: 0.0049819	acc: 0.69377
[9], 	train loss: 0.0040216	acc: 0.74048
Done! Early stopped at 9
[Random Sample 2] :
[1], 	val loss: 0.005678	acc: 0.70934
[1], 	train loss: 0.0046681	acc: 0.67133
[2], 	val loss: 0.0052992	acc: 0.63841
[2], 	train loss: 0.0045007	acc: 0.68174
[3], 	val loss: 0.0051261	acc: 0.66263
[3], 	train loss: 0.

### Training time!! :) (Work in progress)

In [7]:
save_path = os.getcwd() +'\\models\\{}_epoch{}.pt'.format(name, epoch+1)
torch.save(model.state_dict(), save_path)