In [None]:
import torch;
import torch.nn as nn
import torch.nn.functional as F
import torch.utils
import torch.distributions
import torchvision
import numpy as np
import pandas as pd
import torchvision.models as models 
from torchvision import transforms, utils
from torch.utils.data.sampler import SubsetRandomSampler
import torch.optim as optim
import matplotlib.pyplot as plt
from torch.autograd import Variable
from torch.optim import lr_scheduler
import seaborn as sns
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
import copy
from tqdm import tqdm
#from model import VAE

import matplotlib.pyplot as plt; plt.rcParams['figure.dpi'] = 200
from dataloader_pw import *
from cnn_model_1 import *

In [1]:
def main(iteration):
    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    #### dataset
    #only on validation set
    csv_path = 'pairwise_lstm.csv'
    img_path = '/home/vip/sayan-mandal/datasets/obj_criteria/good_reduced/'

    CDL = PWDataLoader(csv_path, img_path, transform = transforms.Compose([transforms.Resize(64),transforms.ToTensor(),transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]))

    #random sampler
    batch_size = 64
    validation_split = .2
    test_split = .2
    train_split = 1-validation_split-test_split
    shuffle_dataset = True

    # Creating data indices for training and validation splits:
    dataset_size = len(CDL)
    indices = list(range(dataset_size))
    split_train = int(np.floor(train_split * dataset_size))
    split_valid = int(np.floor((train_split+validation_split) * dataset_size))
    if shuffle_dataset :
        np.random.shuffle(indices)
    train_indices, val_indices, test_indices =  indices[:split_train],indices[split_train:split_valid],indices[split_valid:]

    # Creating PT data samplers and loaders:
    train_sampler = SubsetRandomSampler(train_indices)
    valid_sampler = SubsetRandomSampler(val_indices)
    test_sampler = SubsetRandomSampler(test_indices)

    train_loader = torch.utils.data.DataLoader(CDL, batch_size=batch_size, sampler=train_sampler)
    valid_loader = torch.utils.data.DataLoader(CDL, batch_size=batch_size, sampler=valid_sampler)
    test_loader = torch.utils.data.DataLoader(CDL, batch_size=batch_size, sampler=test_sampler)

    ##### Model
    torch.cuda.empty_cache()
    tl = iter(train_loader)
    dat,_,_ = next(tl)
    bs, ts, C, H, W = dat.shape
    n_out = 1

    model = CNNLSTMNet(channels = C, ts = ts, n_out = n_out, device = device).to(device)
    optimizer = optim.SGD(model.parameters(), lr=0.1)  #,momentum=0.9, nesterov=True)
    scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[100], gamma= 0.1)
    criterion = nn.MSELoss()  # this is for regression mean squared loss

    ##### trainingloop
    epochs = 150
    best_loss = float('inf')
    ep, trainloss, validloss, testloss = [],[],[],[]
    testep, totloss = [],[]
    besttrloss, bestvalloss, besttsloss = 0,0,0
    for epoch in range(1,epochs+1):
        model.train()
        #loop = tqdm(enumerate(train_loader), total=len(train_loader), leave=True)
        trloss = 0
        for batch_idx, (data, targets, _) in loop:
            optimizer.zero_grad() 
            data = Variable(data).to(device)
            targets = Variable(targets.to(torch.float).view(targets.shape[0],-1)).to(device)

            #forward pass
            scores = model(data)
            loss = criterion(scores, targets)
            trloss += loss.item()
            #backward pass   
            loss.backward()

            #gradient descent
            optimizer.step()
            #loop.set_description(f"Epoch [{epoch}/{epochs}]")
            #loop.set_postfix(batch_loss = loss.item(), running_loss = trloss )
        scheduler.step()

        model.eval()
        valloss = 0
        with torch.no_grad():
            for data, targets, _ in valid_loader:
                data = Variable(data).to(device)
                targets = Variable(targets.to(torch.float).view(targets.shape[0],-1)).to(device)

                
                scores = model(data)
                loss = criterion(scores, targets)

                valloss += loss.item()

        if valloss < best_loss:
            tsloss = 0
            with torch.no_grad():
                for data, targets, _ in test_loader:
                    data = Variable(data).to(device)
                    targets = Variable(targets.to(torch.float).view(targets.shape[0],-1)).to(device)

                    
                    scores = model(data)
                    loss = criterion(scores, targets)

                    tsloss += loss.item()
                testloss += [tsloss]
                testep += [epoch]
            #to_print = "Train Loss: {:.4f} | Valid Loss: {:.4f} ===========> {:.4f} | Test Loss: {:.4f} | Saving model...".format(trloss, best_loss, valloss, tsloss)
            best_loss = valloss
            bestvalloss = valloss
            besttrloss = trloss
            besttsloss = tsloss
            best_e = epoch
            torch.save(model.state_dict(),'pw_statedict.pt')
            #best_model = copy.deepcopy(model)
        
        #print(to_print)
        ep+=[epoch]
        trainloss += [trloss]
        validloss += [valloss]
        totloss += [trloss + valloss]

    #### testing
    tsloss = 0
    model.load_state_dict(torch.load('pw_statedict.pt'))
    model.eval()
    with torch.no_grad():
        for batch_idx, (data, targets, _) in enumerate(test_loader):
            data = Variable(data).to(device)
            targets = Variable(targets.view(targets.shape[0],-1)).to(device)

            
            scores = model(data)
            loss = criterion(scores, targets)

            if batch_idx == 0:
                true_scores = targets.cpu().numpy()
                pred_scores = scores.cpu().numpy()
            else:
                true_scores = np.append(true_scores, targets.cpu().numpy(), axis = 0)
                pred_scores = np.append(pred_scores, scores.cpu().numpy(), axis = 0)


            tsloss += loss.item()

    dfx = pd.DataFrame.from_dict({'Slope ($\mu$m/y)': true_scores.ravel().tolist(), 'Sample':'true scores'})
    dfy = pd.DataFrame.from_dict({'Slope ($\mu$m/y)': pred_scores.ravel().tolist(), 'Sample':'predicted scores'})
    df = pd.concat(axis=0, ignore_index=True, objs=[dfx,dfy])
    df.to_csv('predscores'+int(iteration)+'.csv', index=False)

    ndf = pd.DataFrame({'ep': ep,
    'train': trainloss,
    'valid': validloss,'total':totloss})
    ndf.to_csv('losses'+int(iteration)+'.csv', index=False)

    ndf = pd.DataFrame({'ep': testep, 'test':testloss})
    ndf.to_csv('testl'+int(iteration)+'.csv', index=False)
    return besttrloss, bestvalloss, besttsloss, best_e


In [None]:
btrl, bvl, btsl, bep = [],[],[],[]
for iteration in range(30):
    besttrloss, bestvalloss, besttsloss, best_e = main(iteration)
    btrl += [besttrloss]
    bvl += [bestvalloss]
    btsl += [besttsloss]
    bep += [best_e]


ndf = pd.DataFrame({'ep': bep,
    'train': btrl,
    'valid': bvl,
    'test': btsl})
ndf.to_csv('bestloss.csv', index=False)
