In [6]:
import torchvision
from glob import glob
from torch.utils.data import Dataset

import torch
import torch.nn as nn
import numpy as np
import pandas as pd

### Dataset class for getting positive and negative classes for training

### Creating a dictionary with (Family) \\\\ (ID) as the key and the path to the images under that one person

In [10]:
import pandas as pd
from Utils.DatasetClass import SmileDataset
import os
import random

train_file_path = "./train_relationships.csv"
train_images_path = "./train/"

all_images = glob(train_images_path + "*/*/*.jpg")
train_person_to_images = {}
val_person_to_images = {}

# Getting 0.1 of the total training as validation
percentage_val = 0.1
train_names = [folder for folder in os.listdir(train_images_path) if  os.path.isdir(os.path.join(train_images_path, folder))]
val_families = random.sample(train_names, int(percentage_val * len(train_names)))

train_images = []
val_images = []

for x in all_images:

    if x.split("\\")[-3] not in val_families:

        if x.split("\\")[-3] + "/" + x.split("\\")[-2] not in train_person_to_images:
            train_person_to_images[x.split("\\")[-3] + "/" + x.split("\\")[-2]] = [x]

        else:
            train_person_to_images[x.split("\\")[-3] + "/" + x.split("\\")[-2]].append(x)

        train_images.append(x)
    
    else:
        if x.split("\\")[-3] + "/" + x.split("\\")[-2] not in val_person_to_images:
            val_person_to_images[x.split("\\")[-3] + "/" + x.split("\\")[-2]] = [x]

        else:
            val_person_to_images[x.split("\\")[-3] + "/" + x.split("\\")[-2]].append(x)

        val_images.append(x)

train_people = [x.split("\\")[-3] + "/" + x.split("\\")[-2] for x in train_images]
train_people = list(dict.fromkeys(train_people)) # removing the duplicates

val_people = [x.split("\\")[-3] + "/" + x.split("\\")[-2] for x in val_images]
val_people = list(dict.fromkeys(val_people)) # removing the duplicates

relationships = pd.read_csv(train_file_path)
relationships = list(zip(relationships.p1.values, relationships.p2.values))

train_relationships = [x for x in relationships if x[0] in train_people and x[1] in train_people] #Check if people are in the training dataset
val_relationships = [x for x in relationships if x[0] in val_people and x[1] in val_people]

### Instantiate the SmileDataset class

In [11]:
from torch.utils.data import DataLoader

train_dataset = SmileDataset(relations = train_relationships, person_to_image= train_person_to_images)
trainloader = DataLoader(train_dataset, batch_size= 100, shuffle = True)

val_dataset = SmileDataset(relations = val_relationships, person_to_image= val_person_to_images)
valloader = DataLoader(val_dataset, batch_size= 100, shuffle = True)

### Training time!! :) (Work in progress)

In [12]:
def validate(model, valloader, val_dataset, device, criterion):
    model.eval()
    val_loss = 0.0
    running_corrects = 0
    
    for batch in valloader:
        tensor1, tensor2, label = batch
        tensor1, tensor2, label = tensor1.to(device), tensor2.to(device), label.float().view(-1,1).to(device)
        with torch.no_grad():
            output = model(tensor1, tensor2)
            preds = output>0.5
            loss = criterion(output, label)
            
        val_loss += loss.item()
        running_corrects += torch.sum(preds == (label>0.5))
    
    val_loss /= len(val_dataset)
    val_acc = running_corrects.item()/len(val_dataset)

    return val_loss, val_acc

def train(model, trainloader, train_dataset, optimizer, device, criterion):
    train_loss = 0.0
    running_loss = 0.0
    running_corrects = 0

    for batch in trainloader:
        optimizer.zero_grad()
        
        tensor1, tensor2, label = batch
        tensor1, tensor2, label = tensor1.to(device), tensor2.to(device), label.float().view(-1,1).to(device)
        output = model(tensor1, tensor2)

        preds = output>0.5
        
        loss = criterion(output, label)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
        running_loss += loss.item()
        running_corrects += torch.sum(preds == label)

    train_loss /= len(train_dataset)
    train_acc = running_corrects.item()/len(train_dataset)

    return train_loss, train_acc

In [13]:
from Utils.SiameseNet import SiameseNet, SiameseNet_large
import os

lr = 0.001

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = SiameseNet_large().to(device)
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(params= model.parameters(), lr = lr)

num_epoch = 250
best_epoch = 0
best_val_acc = 0

history = []
accuracy = []

for epoch in range(num_epoch):
    
    train_loss, train_acc = train(model, trainloader, train_dataset, optimizer, device, criterion)
    val_loss, val_acc  = validate(model, valloader, val_dataset, device, criterion)

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        best_epoch = epoch

        save_path = os.getcwd() +'\\models\\SiameseNetLarge_best.pt'
        torch.save(model.state_dict(), save_path)


    print('[{}], \tval loss: {:.5}\tacc: {:.5}'.format(epoch+1, val_loss, val_acc))
    print('[{}], \ttrain loss: {:.5}\tacc: {:.5}'.format(epoch+1, train_loss, train_acc))

  from .autonotebook import tqdm as notebook_tqdm


[1], 	val loss: 0.0067308	acc: 0.55224
[1], 	train loss: 0.0067433	acc: 0.56591
[2], 	val loss: 0.0064789	acc: 0.62985
[2], 	train loss: 0.0062383	acc: 0.61909
[3], 	val loss: 0.0066386	acc: 0.60149
[3], 	train loss: 0.0058326	acc: 0.66386
[4], 	val loss: 0.0058546	acc: 0.66418
[4], 	train loss: 0.0058292	acc: 0.66716
[5], 	val loss: 0.0067074	acc: 0.64179
[5], 	train loss: 0.005601	acc: 0.68451
[6], 	val loss: 0.0063645	acc: 0.63134
[6], 	train loss: 0.0055554	acc: 0.68731
[7], 	val loss: 0.0064641	acc: 0.61343
[7], 	train loss: 0.0054356	acc: 0.70152
[8], 	val loss: 0.0061657	acc: 0.66269
[8], 	train loss: 0.0053273	acc: 0.71506
[9], 	val loss: 0.0065514	acc: 0.6403
[9], 	train loss: 0.0053014	acc: 0.70499
[10], 	val loss: 0.0061593	acc: 0.64328
[10], 	train loss: 0.0051506	acc: 0.72762
[11], 	val loss: 0.0065439	acc: 0.6194
[11], 	train loss: 0.0050983	acc: 0.72646
[12], 	val loss: 0.0065423	acc: 0.63284
[12], 	train loss: 0.0051292	acc: 0.72729
[13], 	val loss: 0.006772	acc: 0.6179

In [14]:
print('best epoch : {}'.format(best_epoch))

best epoch : 27


In [15]:
save_path = os.getcwd() +'\\models\\SiameseNetLarge_epoch{}.pt'.format(epoch)
torch.save(model.state_dict(), save_path)