In [4]:
import sys
sys.path.append("../")

import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F

import scipy.io
from dataset import Dataset, loadTrain
from losses import  cc_loss, weighted_cc_loss, min_loss, naive_loss, iexplr_loss, regularized_cc_loss, sample_loss_function, sample_reward_function, select_loss_function, select_reward_function, svm_loss, cour_loss
from networks import LSTM
from networks import Prediction_Net,LeNet5, Prediction_Net_Linear, Selection_Net, Phi_Net, G_Net_Tie, G_Net_Full, G_Net_Hyperparameter, G_Net_Y, G_Net_XY
import sys
from IPython.core.debugger import Pdb
import random
import csv
import os
import json
import argparse
import numpy as np
import pandas as pd
from torch.optim.lr_scheduler import LambdaLR

batch_size_train = 64
batch_size_test = 1000
learning_rate = 0.001
momentum = 0.5
log_interval = 10

epsilon = 10e-12

#Reproducibility
def set_random_seeds(random_seed):
    torch.backends.cudnn.enabled = False
    torch.manual_seed(random_seed)
    np.random.seed(random_seed)
    random.seed(random_seed)

set_random_seeds(1)


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device = torch.device('cpu')

def weighted_train(epoch, train_loader, p_net, p_optimizer, g_net, g_optimizer, method, class_dim):
    p_net.train()
    
    row = np.asarray(list(range(class_dim)))
    one_hot_gpu = torch.zeros((row.size, class_dim))
    one_hot_gpu = one_hot_gpu.to(device)
    one_hot_gpu[torch.arange(row.size), row] = 1
    
    for batch_idx, (data, target) in enumerate(train_loader):
        
        
        data, target = data.to(device), target.to(device)
        p_optimizer.zero_grad()
        output = p_net(data)
        batch = data.shape[0]
        
        one_hot = one_hot_gpu
        one_hot = one_hot.expand(batch, class_dim, class_dim).reshape(batch*class_dim, class_dim)
        #Pdb().set_trace()
        
        #class_dim is EOS
        #class_dim +1 is NULL
        
        if("loss_xy_lstm" in method):
            
            max_req = int(torch.max(target.sum(dim=1)).item())
            relevant_indices = target.nonzero()
            
            input_x = data[relevant_indices[:,0]]
            input_y = relevant_indices[:,1]
            input_y_onehot = torch.zeros((input_y.shape[0], class_dim+2))
            input_y_onehot = input_y_onehot.to(device)
            
            input_y = torch.unsqueeze(input_y, dim = -1)
            input_y_onehot.scatter_(1, input_y, 1)
            input_y = input_y_onehot
            
            
            input_x = torch.unsqueeze(input_x, dim = 0)
            input_x = torch.repeat_interleave(input_x, max_req+1, dim=0)
            
            input_y = torch.unsqueeze(input_y, dim = 0)
            input_y = torch.repeat_interleave(input_y, max_req+1, dim=0)
            
            target_set = target[relevant_indices[:,0]]
            
            target_set2 = torch.ones(target_set.shape[0], max_req)
            target_set2 = target_set2.to(device)
            target_set = torch.cat([target_set, target_set2], dim=1)
            
            idx = torch.arange(target_set.shape[1], 0, -1).to(device).float()
            tmp2 = target_set * idx
            indices = torch.topk(tmp2, k = max_req+1, dim=1)[1]
            #mask = (indices > output_dim)
            indices[indices > class_dim] = class_dim+1
            indices = torch.transpose(indices, dim0=0, dim1=1)
            indices = torch.unsqueeze(indices, dim = -1)
            
            target_set = torch.zeros(indices.shape[0], indices.shape[1], class_dim+2)
            target_set = target_set.to(device)
            target_set = target_set.to(device)
            target_set = target_set.scatter(-1, indices, 1)
            
            #Pdb().set_trace()
            g_output = torch.zeros(input_x.shape[1], 1)
            g_output = g_output.to(device)
            
            g_net.hidden_cell = (torch.zeros(1, input_x.shape[1], g_net.hidden_layer_size).to(device),
                                torch.zeros(1, input_x.shape[1] , g_net.hidden_layer_size).to(device))
            
            
            #Pdb().set_trace()
            for seq_step in range(max_req+1):
                x = input_x[seq_step]
                y = input_y[seq_step]
                s = target_set[seq_step]
                
                
                    
                g_optimizer.zero_grad()
                
                
                if(seq_step == 0):
                    y_pred = g_net(x.clone(), y.clone(), y.clone())
                else:
                    y_pred = g_net(x.clone(), y.clone(), target_set[seq_step-1].clone())
                
                
                #index = s.argmax(dim=1).unsqueeze(-1)
                y_pred = y_pred.log_softmax(dim=1)
                
                
                loss = nn.CrossEntropyLoss(reduce = False, ignore_index = class_dim+1)
                
                g_output -= (loss(y_pred, s.argmax(dim=1))).unsqueeze(-1)
                #g_output += (torch.gather(y_pred, 1, index))
                #if(epoch == 200):
                #    Pdb().set_trace()
                
            g_output = g_output.flatten()
            
            temp = torch.zeros((batch*class_dim))
            temp = temp.to(device)
            project_index = relevant_indices[:,0] * class_dim + relevant_indices[:,1]
            temp[project_index] = g_output
            g_output = temp
            
            
        elif("loss_xy" in method):
            
            relevant_indices = target.nonzero()
            input_x = data[relevant_indices[:,0]]
            input_y = relevant_indices[:,1]
            
            g_output = g_net((input_x,input_y),device)
            log_sigmoid = nn.LogSigmoid()
            
            target_concat = target[relevant_indices[:,0]]
            g_output = log_sigmoid(g_output) * target_concat + (log_sigmoid(-g_output))*(1-target_concat)
            g_output = g_output.sum(dim=1)
            
            temp = torch.zeros((batch*class_dim))
            temp = temp.to(device)
            project_index = relevant_indices[:,0] * class_dim + relevant_indices[:,1]
            temp[project_index] = g_output
            g_output = temp
        else:    
            g_output = g_net(one_hot)
            
            log_sigmoid = nn.LogSigmoid()
            target_concat = target.repeat_interleave(class_dim, dim=0)
            g_output = log_sigmoid(g_output) * target_concat + (log_sigmoid(-g_output))*(1-target_concat)
            
            g_output = g_output.sum(dim=1)
        
        split_g_output = g_output.view(batch, class_dim)
        
        if('iexplr' in method):
            log_prob =  split_g_output + torch.log_softmax(output, dim=1)
            prob = torch.exp(log_prob).detach()
            #prob = log_prob.detach()
            target_probs = (prob*target.float()).sum(dim=1)
            mask = ((target == 1) & (abs(prob) > epsilon))
            #Pdb().set_trace()
            loss = -(prob[mask]*log_prob[mask]/ target_probs.unsqueeze(1).expand_as(mask)[mask]).sum() / mask.size(0)
            
        else:
            
            mask = (target != 1)
            
            log_target_prob2 = split_g_output
            log_target_prob_for_max2 = (split_g_output).masked_fill(mask,-float('inf'))
            log_max_prob2,max_prob_index2 = log_target_prob_for_max2.max(dim=1)
            exp_argument2 = log_target_prob2 - log_max_prob2.unsqueeze(dim=1)
            
            norm = log_max_prob2 +torch.log((target*torch.exp(exp_argument2*target)).sum(dim=1))
            #print(norm)
            #norm = torch.ones_like(norm)
            #norm = norm * np.log(pow(0.2,23))
            #log_target_prob = split_g_output  +  F.log_softmax(output, dim = 1)
            #log_target_prob_for_max = (split_g_output  +  F.log_softmax(output, dim = 1)).masked_fill(mask,-float('inf'))
            
            log_target_prob = split_g_output  - norm.unsqueeze(dim=1)  +  F.log_softmax(output, dim = 1)
            log_target_prob_for_max = (split_g_output - norm.unsqueeze(dim=1) +  F.log_softmax(output, dim = 1)).masked_fill(mask,-float('inf'))
            
            #log_target_prob = split_g_output  +  F.log_softmax(output, dim = 1)
            #log_target_prob_for_max = (split_g_output +  F.log_softmax(output, dim = 1)).masked_fill(mask,-float('inf'))
            
            log_max_prob,max_prob_index = log_target_prob_for_max.max(dim=1)
            exp_argument = log_target_prob - log_max_prob.unsqueeze(dim=1) 
            #exp_argument = exp_argument
            
            #print(torch.exp(norm))
            
            summ = (target*torch.exp(exp_argument*target)).sum(dim=1)
            #summ = summ/norm
            log_total_prob = log_max_prob + torch.log(summ + epsilon)
            loss = (-1.0*log_total_prob).mean(dim=-1)
            #if(torch.isnan(loss)):
            #    Pdb().set_trace()
        loss.backward()
        
        p_optimizer.step()
        g_optimizer.step()
        
        if batch_idx % log_interval == 0:
          print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
            epoch, batch_idx * len(data), len(train_loader.dataset),
            100. * batch_idx / len(train_loader), loss.item()))

         
def p_accuracy(test_data, p_net, loss_function):
    p_net.eval()
    correct = 0
    loss = 0
    confidence = 0
    with torch.no_grad():
        for data, target in test_data:
            data, target = data.to(device), target.to(device)
            output = p_net.forward(data)
            pred = output.data.max(1, keepdim=True)[1]
            
            prob = torch.softmax(output, dim=1)
            confidence += torch.gather(prob, 1, pred).sum()
            
            correct += torch.gather(target, 1, pred).sum()
            this_loss =  (loss_function(output, target))
            loss += this_loss * len(data)
         
    return {'acc':(100. * float(correct.item()) / len(test_data.dataset)), 'loss':loss.item()/(len(test_data.dataset)), 'confidence': confidence.item()/len(test_data.dataset)}

def intersection(s1, s2):
    count = 0
    for i in s1:
        for j in s2:
            if(i == j):
                count+=1
    return count

def union(s1, s2):
    return len(s1) + len(s2) - intersection(s1, s2)

In [5]:
def lstm_accuracy(epoch, real_train_loader, train_loader, g_net, technique, class_dim):
    g_net.eval()
    
    row = np.asarray(list(range(class_dim)))
    one_hot_gpu = torch.zeros((row.size, class_dim))
    one_hot_gpu = one_hot_gpu.to(device)
    one_hot_gpu[torch.arange(row.size), row] = 1
    
    correct = []
    predicted = []
    
    for batch_idx, (data, target) in enumerate(train_loader):
        temp = target.nonzero()[:,1].tolist()
        correct.append(temp)
    sum_iou_neg = 0 
    remove = 0
    for batch_idx, (data, target) in enumerate(real_train_loader):
        input_x = data
        data, target = data.to(device), target.to(device)
        
        #output = p_net(data)
        batch = data.shape[0]
        
            
        #Pdb().set_trace()
        g_net.hidden_cell = (torch.zeros(1, input_x.shape[0], g_net.hidden_layer_size).to(device),
                            torch.zeros(1, input_x.shape[0] , g_net.hidden_layer_size).to(device))
        
        
        y = target
        
        count = 0
        run_sum = 0
        
        for gold_y_prime in correct[batch_idx]:
            if(gold_y_prime == target.argmax()):
                continue
            gold_y = torch.zeros_like(target).to(device)
            gold_y[0,gold_y_prime] = 1
            y = gold_y
            
            row = torch.tensor(list(range(class_dim+1))).to(device)
            mask = (row > -1).unsqueeze(0).to(device)
            mask = mask.float()
            pred_set = []
            while True:
                x = data

                y_pred = g_net(x.clone(), gold_y.clone(), y.clone())

                y_pred = y_pred.softmax(dim=1) * mask
                
                if(torch.isnan(y_pred.sum())):
                    return (0,0)
                
                y = y_pred.argmax(dim=1)
                y_pred[y_pred!=0] = 0
                y_pred[0, y] = 1

                
                
                if(y.item() == class_dim):
                    break
                mask = (row > y).unsqueeze(0).to(device)
                mask = mask.float()
                pred_set.append(y.item())
                y = y_pred
            inter = intersection(correct[batch_idx], pred_set)
            unio = union(correct[batch_idx], pred_set)
            count += 1
            run_sum += float(inter)/unio
        if(count == 0):
            remove+=1
        else:
            sum_iou_neg += run_sum/count
        
    for batch_idx, (data, target) in enumerate(real_train_loader):
        input_x = data
        data, target = data.to(device), target.to(device)
        
        #output = p_net(data)
        batch = data.shape[0]
        
            
        #Pdb().set_trace()
        g_net.hidden_cell = (torch.zeros(1, input_x.shape[0], g_net.hidden_layer_size).to(device),
                            torch.zeros(1, input_x.shape[0] , g_net.hidden_layer_size).to(device))
        
        #y = target.argmax(dim=1)
        #y = y.unsqueeze(-1)
        #Pdb().set_trace()
        y = target
        gold_y = target
        #Pdb().set_trace()
        #print(y.shape)
        row = torch.tensor(list(range(class_dim+1))).to(device)
        mask = (row > -1).unsqueeze(0).to(device)
        mask = mask.float()
        pred_set = []
        while True:
            x = data
            
            y_pred = g_net(x.clone(), gold_y.clone(), y.clone())
            
            y_pred = y_pred.softmax(dim=1) * mask
            y = y_pred.argmax(dim=1)
            y_pred[y_pred!=0] = 0
            y_pred[0, y] = 1
            
            if(torch.isnan(y_pred.sum())):
                return (0,0)
                
            if(y.item() == class_dim):
                break
            mask = (row > y).unsqueeze(0).to(device)
            mask = mask.float()
            pred_set.append(y.item())
            y = y_pred
        predicted.append(pred_set)
    
            #predicted.append(pred_set)
    
    inter = 0
    unio = 0
    count = 0
    sum_iou = 0
    for i in range(len(correct)):
        inter = intersection(correct[i], predicted[i])
        unio = union(correct[i], predicted[i])
        sum_iou += float(inter)/unio
        count+=1
    
    return(float(sum_iou)/count , float(sum_iou_neg)/(count-remove))




In [14]:
dump_dir = "../results/lstm"
dataset_folder = "../datasets"
datasets = "MSRCv2"
#technique = "weighted_loss_xy_lstm_SGD_0.1_1e-06_100"
datasets = [str(item) for item in datasets.split(',')]
model = '3layer'
k = 10


for filename in datasets:
    
    #tar = pd.read_csv("../file.csv")
    #for rowIndex, row in tar.iterrows(): #iterate over rows
        
    #    for columnIndex, value in row.items():
        directory = "weighted_loss_xy_lstm_iexplr_Adam_0.01_0.0001_100_freeze"
    #        if(rowIndex >= 4):
        model = "train_best.pth"
    #        else:
    #            model = "train_best.pth"
        fold_no = int(float(4))


        train_dataset, real_train_dataset, val_dataset, real_val_dataset, test_dataset, real_test_dataset, input_dim, output_dim = loadTrain(os.path.join(dataset_folder,filename+".mat"), fold_no, k)

        batch_size_test = 1
        batch_size_train = 1
        train_loader = torch.utils.data.DataLoader(train_dataset,
          batch_size=batch_size_train, shuffle=False)
        test_loader = torch.utils.data.DataLoader(test_dataset,
          batch_size=batch_size_test, shuffle=False)
        val_loader = torch.utils.data.DataLoader(val_dataset,
          batch_size=batch_size_test, shuffle=False)

        real_train_loader = torch.utils.data.DataLoader(real_train_dataset,
          batch_size=batch_size_train, shuffle=False)
        real_test_loader = torch.utils.data.DataLoader(real_test_dataset,
          batch_size=batch_size_test, shuffle=False)
        real_val_loader = torch.utils.data.DataLoader(real_val_dataset,
          batch_size=batch_size_test, shuffle=False)


        directories = os.listdir(os.path.join(dump_dir, filename, '3layer'))
    #directories = ["weighted_loss_xy_lstm_SGD_0.1_1e-05_100"]
    #for directory in directories:
        if(not('lstm' in directory)):
            continue
        #if(not('pq' in directory) and not('freeze' in directory)):
        #    continue

        logs = []
        flag = False



        dataset_technique_path = os.path.join(filename, '3layer', directory, str(fold_no))

        p_net = Prediction_Net(input_dim, output_dim)
        p_net.to(device) 


        g_net = LSTM(input_dim, output_dim, directory)


        g_net.to(device)
        if(not(os.path.exists(os.path.join(dump_dir, filename, '3layer', directory, str(fold_no), "models")))):
            continue
        models = os.listdir(os.path.join(dump_dir, filename, '3layer', directory, str(fold_no), "models"))
        models = ['train_best.pth', 'train_best_real.pth']
        result_log_filename_json = os.path.join(dump_dir, dataset_technique_path, "logs", "log.json")
        if(not(os.path.exists(result_log_filename_json))):
            continue
        df = pd.read_json(result_log_filename_json, orient ='records', lines = True)

        tr_accuracies = [-1] * df.shape[0]
        val_accuracies = [-1] * df.shape[0]
        test_accuracies = [-1] * df.shape[0]

        tr_accuracies_n = [-1] * df.shape[0]
        val_accuracies_n = [-1] * df.shape[0]
        test_accuracies_n = [-1] * df.shape[0]

        #for model in models:
        print(filename+" "+str(fold_no)+" "+model +" "+ directory)
        if(not('pth') in model):
            continue
        train_checkpoint = os.path.join(dump_dir, filename, '3layer', directory, str(fold_no), "models", model) 
        checkpoint = torch.load(train_checkpoint)

        p_net.load_state_dict(checkpoint['p_net_state_dict'])
        g_net.load_state_dict(checkpoint['g_net'].state_dict())



        best_val_epoch = -1

        acc = lstm_accuracy(-1, real_train_loader, train_loader, g_net, directory, output_dim)
        print(acc)
        if(acc[0] == 0):
            print("NAN")
        if("real" in model):
            tr_accuracies[-1] = acc[0]
            tr_accuracies_n[-1] = acc[1]
        else:
            tr_accuracies[-2] = acc[0]
            tr_accuracies_n[-2] = acc[1]

        acc = lstm_accuracy(-1, real_val_loader, val_loader, g_net, directory, output_dim)
        print(acc)
        epoch = model.split("_")[1].split(".")[0]
        if("real" in model):
            val_accuracies[-1] = acc[0]
            val_accuracies_n[-1] = acc[1]
        else:
            val_accuracies[-2] = acc[0]
            val_accuracies_n[-2] = acc[1]

        acc = lstm_accuracy(-1, real_test_loader, test_loader, g_net, directory, output_dim)
        print(acc)
        epoch = model.split("_")[1].split(".")[0]
        if("real" in model):
            test_accuracies[-1] = acc[0]
            test_accuracies_n[-1] = acc[1]
        else:
            test_accuracies[-2] = acc[0]
            test_accuracies_n[-2] = acc[1]



        df['train_IOU']  = tr_accuracies
        df['val_IOU']  = val_accuracies
        df['test_IOU']  = test_accuracies

        df['train_IOU_neg']  = tr_accuracies_n
        df['val_IOU_neg']  = val_accuracies_n
        df['test_IOU_neg']  = test_accuracies_n

        
    #print(df['train_IOU'])
        df.to_json(result_log_filename_json +"_lstm", orient ='records', lines = True)


  torch.nn.init.xavier_uniform(self.fc1.weight)
  torch.nn.init.xavier_uniform(self.fc2.weight)
  torch.nn.init.xavier_uniform(self.fc3.weight)


MSRCv2 4 train_best.pth weighted_loss_xy_lstm_iexplr_Adam_0.01_0.0001_100_freeze
(0.8004801587301584, 0.36206921986516954)
(0.5595714285714289, 0.2764754357775831)
(0.5536099773242631, 0.2906220963995356)
