In [337]:
# Standard Imports
import os
import sys
import math
import evals
import random
import datetime
import argparse
import numpy as np
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
from copy import copy, deepcopy
from PIL import Image

# Progress bar
from tqdm import tqdm_notebook as tqdm

# Function for shuffling the dataset
from sklearn.utils import shuffle

# Module Imports
#from model import VAE, compute_loss
from utils import build_path, get_label, get_feat, THRESHOLDS

# Torch Imports
import torch
from torch import nn, optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.utils.data.dataset import Dataset
from torch.utils.tensorboard import SummaryWriter
from torchvision import transforms

### Model class

In [338]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from utils import log_normal, log_normal_mixture
# Import the pretrained default model resnet18/resnet50/resnet101
from torchvision.models import resnet50,resnet152,resnet101,efficientnet_v2_l

device = torch.device( "cpu") #"cuda:0" if torch.cuda.is_available() else

class base_class(nn.Module):
    def __init__(self,args):
        super(base_class, self).__init__()
        self.args=args
        self.model=resnet50() #weights='ResNet101_Weights.IMAGENET1K_V1'
        self.model.fc=nn.Flatten() # Flatten the last layer
        print(self.model)
    def forward(self,data):
        return self.model(data)

class VAE(nn.Module):
    def __init__(self, args):
        super(VAE, self).__init__()
        self.args = args
        self.dropout = nn.Dropout(p=args.drop)

        self.base_model=base_class(args)
        
        """Feature encoder"""
        self.fx = nn.Sequential(
            nn.Linear(args.feature_dim, 256), # Set args.feature dim according to flatten shape. By default it is 2048
            nn.ReLU(),
            self.dropout,
            nn.Linear(256, 512,bias=True),
            nn.ReLU(),
            self.dropout,
            nn.Linear(512, 512,bias=True),
            nn.ReLU(),
            self.dropout,
            nn.Linear(512, 256,bias=True),
            nn.ReLU(),
            self.dropout
        )
        self.fx_mu = nn.Linear(256, args.latent_dim,bias=True)
        self.fx_logvar = nn.Linear(256, args.latent_dim,bias=True)

        """Label encoder"""
        self.label_lookup = nn.Linear(args.label_dim, args.emb_size)
        self.fe = nn.Sequential(
            nn.Linear(args.emb_size, 512,bias=True),
            nn.ReLU(),
            self.dropout,
            nn.Linear(512, 256,bias=True),
            nn.ReLU(),
            self.dropout
        )
        self.fe_mu = nn.Linear(256, args.latent_dim,bias=True)
        self.fe_logvar = nn.Linear(256, args.latent_dim,bias=True)

        """Decoder"""
        self.fd = nn.Sequential(
            nn.Linear(args.feature_dim + args.latent_dim, 512,bias=True),
            nn.ReLU(),
            nn.Linear(512, args.emb_size,bias=True),
            nn.LeakyReLU()
        )

    def label_encode(self, x):
        h0 = self.dropout(F.relu(self.label_lookup(x)))
        h = self.fe(h0)
        mu = self.fe_mu(h)
        logvar = self.fe_logvar(h)
        fe_output = {
            'fe_mu': mu,
            'fe_logvar': logvar
        }
        return fe_output

    def feat_encode(self, x):
        #print(x.shape)
        h = self.fx(x)
        mu = self.fx_mu(h)
        logvar = self.fx_logvar(h)
        fx_output = {
            'fx_mu': mu,
            'fx_logvar': logvar
        }
        return fx_output

    def decode(self, z):
        d = self.fd(z)
        d = F.normalize(d, dim=1)
        return d

    def label_forward(self, x, feat):
        n_label = x.shape[1]
        all_labels = torch.eye(n_label).to(device)
        fe_output = self.label_encode(all_labels)
        mu = fe_output['fe_mu']
        
        z = torch.matmul(x, mu) / x.sum(1, keepdim=True)
        print(feat.shape,z.shape)
        label_emb = self.decode(torch.cat((feat, z), 1))

        fe_output['label_emb'] = label_emb
        return fe_output

    def feat_forward(self, x):
        fx_output = self.feat_encode(x)
        mu = fx_output['fx_mu']
        logvar = fx_output['fx_logvar']

        if not self.training:
            z = mu
            z2 = mu
        else:
            z = reparameterize(mu, logvar)
            z2 = reparameterize(mu, logvar)
        feat_emb = self.decode(torch.cat((x, z), 1))
        feat_emb2 = self.decode(torch.cat((x, z2), 1))
        fx_output['feat_emb'] = feat_emb
        fx_output['feat_emb2'] = feat_emb2
        return fx_output

    def forward(self, label, feature):
        # Apply resnet model to get feature embeddings
        feature=self.base_model(feature)
        fe_output = self.label_forward(label, feature)
        label_emb = fe_output['label_emb']
        fx_output = self.feat_forward(feature)
        feat_emb, feat_emb2 = fx_output['feat_emb'], fx_output['feat_emb2']

        embs = self.label_lookup.weight
        label_out = torch.matmul(label_emb, embs)
        feat_out = torch.matmul(feat_emb, embs)
        feat_out2 = torch.matmul(feat_emb2, embs)
        
        fe_output.update(fx_output)
        output = fe_output
        output['embs'] = embs
        output['label_out'] = label_out
        output['feat_out'] = feat_out
        output['feat_out2'] = feat_out2
        output['feat'] = feature
        return output


def reparameterize(mu, logvar):
    std = torch.exp(0.5*logvar)
    eps = torch.randn_like(std)
    return mu + eps*std


def compute_loss(input_label, output, args=None):
    fe_out, fe_mu, fe_logvar, label_emb = \
        output['label_out'], output['fe_mu'], output['fe_logvar'], output['label_emb']
    fx_out, fx_mu, fx_logvar, feat_emb = \
        output['feat_out'], output['fx_mu'], output['fx_logvar'], output['feat_emb']
    fx_out2 = output['feat_out2']
    embs = output['embs']

    fx_sample = reparameterize(fx_mu, fx_logvar)
    fx_var = torch.exp(fx_logvar)
    fe_var = torch.exp(fe_logvar)
    kl_loss = (log_normal(fx_sample, fx_mu, fx_var) - \
        log_normal_mixture(fx_sample, fe_mu, fe_var, input_label)).mean()

    pred_e = torch.sigmoid(fe_out)
    pred_x = torch.sigmoid(fx_out)
    pred_x2 = torch.sigmoid(fx_out2)

    def compute_BCE_and_RL_loss(E):
        #compute negative log likelihood (BCE loss) for each sample point
        sample_nll = -(
            torch.log(E) * input_label + torch.log(1 - E) * (1 - input_label)
        )
        logprob = -torch.sum(sample_nll, dim=2)

        #the following computation is designed to avoid the float overflow (log_sum_exp trick)
        maxlogprob = torch.max(logprob, dim=0)[0]
        Eprob = torch.mean(torch.exp(logprob - maxlogprob), axis=0)
        nll_loss = torch.mean(-torch.log(Eprob) - maxlogprob)
        return nll_loss

    def supconloss(label_emb, feat_emb, embs, temp=1.0):
        features = torch.cat((label_emb, feat_emb))
        labels = torch.cat((input_label, input_label)).float()
        n_label = labels.shape[1]
        emb_labels = torch.eye(n_label).to(device)
        mask = torch.matmul(labels, emb_labels)

        anchor_dot_contrast = torch.div(
            torch.matmul(features, embs),
            temp)
        logits_max, _ = torch.max(anchor_dot_contrast, dim=1, keepdim=True)
        logits = anchor_dot_contrast - logits_max.detach()

        exp_logits = torch.exp(logits)
        log_prob = logits - torch.log(exp_logits.sum(1, keepdim=True))

        mean_log_prob_pos = (mask * log_prob).sum(1) / mask.sum(1)
        loss = -mean_log_prob_pos
        loss = loss.mean()
        return loss

    nll_loss = compute_BCE_and_RL_loss(pred_e.unsqueeze(0))
    nll_loss_x = compute_BCE_and_RL_loss(pred_x.unsqueeze(0))
    nll_loss_x2 = compute_BCE_and_RL_loss(pred_x2.unsqueeze(0))
    sum_nll_loss = nll_loss + nll_loss_x + nll_loss_x2
    cpc_loss = supconloss(label_emb, feat_emb, embs)
    total_loss = sum_nll_loss * args.nll_coeff + kl_loss * 6. + cpc_loss
    return total_loss, nll_loss, nll_loss_x, 0., 0., kl_loss, cpc_loss, pred_e, pred_x


In [339]:
sys.path.append('./')

METRICS = ['ACC', 'HA', 'ebF1', 'miF1', 'maF1', 'meanAUC', 'medianAUC', 'meanAUPR', 'medianAUPR', 'meanFDR', 'medianFDR', 'p_at_1', 'p_at_3', 'p_at_5']


### Parser values definition

In [340]:
# Create an ArgumentParser object
paser = argparse.ArgumentParser()
args = paser.parse_args("")

# Device 
args.device = torch.device("cpu") #"cuda:0" if torch.cuda.is_available() else 
print(args.device)

# Dataset name
args.dataset='MS_COCO'

# General Model variables
args.seed=100
args.lr=1e-3
args.lr_decay_ratio=0.9
args.lr_decay_times=4
args.nll_coeff=0.5
args.l2_coeff=1
args.c_coeff=0
args.class_weights=1  # Find the class weights depending on count
args.current_step=0 # For Plotting the loss and other variables w.r.t iterations

# Set Dataloader parameters
args.BATCH_SIZE = 64
args.NUM_WORKERS = 2
args.max_epoch=200

# scheduler variables
args.eta_min=2e-4
args.T_mult=2
args.T0=50
args.retrain=input("Enter True or False if you want to retrain")=='True'

# VAE model variables
args.latent_dim=100
args.drop=0.1 # dropout value
args.feature_dim=2048 # Output dimension from flattened ResNet
args.emb_size=100 # What is emb_size ?
args.label_dim=78 # Label Dimension
args.param_setting = "lr-{}_lr-decay_{:.2f}_lr-times_{:.1f}_nll-{:.2f}_l2-{:.2f}_c-{:.2f}".format(args.lr, args.lr_decay_ratio, args.lr_decay_times, args.nll_coeff, args.l2_coeff, args.c_coeff)

# Directory variables
args.summary_dir = 'summary/{}/{}'.format(args.dataset, args.param_setting)
args.model_dir = 'model/model_{}/{}'.format(args.dataset, args.param_setting)
args.checkpoint_path=''


cpu


### Define Seed values

In [341]:
np.random.seed(args.seed) # set the random seed of numpy
torch.manual_seed(args.seed) # set same seed for torch

<torch._C.Generator at 0x2800678feb0>

### DataLoader

In [342]:
DIR = Path("../Dataset/MS_COCO_2017").absolute() # --- Enter Dataset Path Here ---
DATA_DIR = DIR / 'labels'
IMG_DIR_TRAIN = DIR / 'imgs/train/'
IMG_DIR_TEST = DIR / 'imgs/test/'
label_path = DATA_DIR / "labels/categories.csv"
data_csv= DATA_DIR / 'labels/labels_train.csv'

In [343]:
cats = pd.read_csv(label_path, header=None)
cats = list(cats[0])
data = pd.read_csv(data_csv, names=["Image Name"] + cats)
column_sums_sorted = data.drop("Image Name", axis = 1).sum().sort_values(ascending=True)
print(column_sums_sorted)
#data.head(2)

hair drier         159.0
toaster            181.0
parking meter      583.0
bear               791.0
scissors           792.0
                  ...   
cup               7662.0
dining table      9842.0
car              10283.0
chair            10612.0
person           53529.0
Length: 80, dtype: float64


In [344]:
df = data.drop(["hair drier", "toaster"], axis = 1)

# Check if all entries except column 0 are 0
mask = (df.iloc[:, 1:] != 0).any(axis=1)

# Keep rows where at least one entry is not 0
filtered_df = df[mask]

len(data), len(filtered_df)

(97774, 97762)

In [345]:
class CreateDataset(Dataset):
    def __init__(self, csv_path, label_path, transformations, type):
        """
        Args:
            csv_path (string): path to csv file
            transformations: pytorch transforms for transforms and tensor conversion
            train: flag to determine if train or val set
        """
        cats = pd.read_csv(label_path, header=None)
        cats = list(cats[0])
        data = pd.read_csv(csv_path, names=["Image Name"] + cats)
        df = data.drop(["hair drier", "toaster"], axis = 1)

        # Check if all entries except column 0 are 0
        mask = (df.iloc[:, 1:] != 0).any(axis=1)

        # Keep rows where at least one entry is not 0
        filtered_df = df[mask]
        
        # IMPORTANT: dataset needs to be shuffled because by default,
        # the loaded dataset has an ordering which may cause examples of 
        # certain classes from being missed in the training set if following 
        # the below method to create a train/val/test split
        self.data_info = shuffle(filtered_df)
        self.data_info=self.data_info.reset_index()

        # Transforms
        self.transforms = transformations

        # Read the csv file
        self.data_feature = list(filtered_df.iloc[:, 0])
        self.data_label = np.asarray(filtered_df.iloc[:, 1:])
        
        # 70 : 15 : 15 split
        train_split = int(0.7*len(self.data_info))
        val_split = int(0.15*len(self.data_info))

        # validation set
        if type == "train":
            self.image_arr = (self.data_feature[:train_split])
            self.label_arr = (self.data_label[:train_split])
        if type == "val":
            self.image_arr = (self.data_feature[train_split:train_split+val_split])
            self.label_arr = (self.data_label[train_split:train_split+val_split])
        if type == "test":
            self.image_arr = (self.data_feature[train_split+val_split:])
            print(len(self.image_arr))
            self.label_arr = (self.data_label[train_split+val_split:])

        #else:
           # raise AttributeError("'type' attribute can only be train, val, test")
  
        self.label_arr = torch.from_numpy(np.asarray(self.label_arr))
        self.data_len = len(self.label_arr)

    def __getitem__(self, index):
        # Get image name from the pandas df
        single_image_name = self.image_arr[index]
        
        # Open image
        img_as_img = Image.open(str(IMG_DIR_TRAIN) + "/" + single_image_name).convert('RGB')

        img_as_tensor = self.transforms(img_as_img)
        
        single_image_label = self.label_arr[index]

        return  {'images':img_as_tensor.to(torch.float), 'label':single_image_label.to(torch.float)}
    
    def __len__(self):
        return self.data_len


In [346]:
transform = transforms.Compose([
    transforms.Resize((256, 256)), # Resize image to 256 x 256 to ensure consistency in input
    transforms.RandomHorizontalFlip(), # Data Augmentation
    transforms.ToTensor()
    # Best values found for normalization in ImageNet (from pytorch documentation)
])

In [347]:
train_data = CreateDataset(data_csv, label_path,transform, type="train")
val_data = CreateDataset(data_csv, label_path, transform, type="val")
test_data = CreateDataset(data_csv, label_path, transform, type="test")

14665


In [348]:
# Testing dataloader
test_data[0]

{'images': tensor([[[0.0078, 0.0078, 0.0078,  ..., 0.1882, 0.1569, 0.0667],
          [0.0667, 0.0471, 0.0353,  ..., 0.1882, 0.1686, 0.0784],
          [0.1137, 0.0902, 0.0902,  ..., 0.2275, 0.1922, 0.0980],
          ...,
          [0.3490, 0.7373, 0.7804,  ..., 0.6980, 0.7020, 0.7098],
          [0.3373, 0.6980, 0.7059,  ..., 0.4078, 0.4275, 0.4000],
          [0.3608, 0.7451, 0.7137,  ..., 0.0353, 0.0314, 0.0314]],
 
         [[0.0039, 0.0039, 0.0039,  ..., 0.1765, 0.1490, 0.0627],
          [0.0588, 0.0392, 0.0275,  ..., 0.1725, 0.1529, 0.0667],
          [0.1059, 0.0824, 0.0824,  ..., 0.2000, 0.1647, 0.0784],
          ...,
          [0.2824, 0.6275, 0.6510,  ..., 0.6078, 0.6078, 0.6157],
          [0.2745, 0.5882, 0.5765,  ..., 0.3412, 0.3608, 0.3333],
          [0.2980, 0.6353, 0.5843,  ..., 0.0039, 0.0039, 0.0039]],
 
         [[0.0196, 0.0196, 0.0118,  ..., 0.1569, 0.1294, 0.0510],
          [0.0784, 0.0588, 0.0392,  ..., 0.1490, 0.1333, 0.0549],
          [0.1255, 0.0980, 0.0

In [349]:
torch.save(train_data, './data/MS_COCO_2017/train_data.pt')
torch.save(val_data, './data/MS_COCO_2017/val_data.pt')
torch.save(test_data, './data/MS_COCO_2017/test_data.pt')

In [350]:

# Create dataloaders for train, val and test data
train_loader = DataLoader(train_data, batch_size=args.BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_data, batch_size=len(val_data))
test_loader = DataLoader(test_data, batch_size=len(test_data))

In [351]:
train_loader, val_loader, test_loader

(<torch.utils.data.dataloader.DataLoader at 0x2835ab9ac40>,
 <torch.utils.data.dataloader.DataLoader at 0x282dadd0d30>,
 <torch.utils.data.dataloader.DataLoader at 0x2835ab9ac10>)

In [352]:
next(iter(train_loader))

{'images': tensor([[[[0.1059, 0.1020, 0.0980,  ..., 0.5608, 0.5529, 0.5451],
           [0.1059, 0.1020, 0.0980,  ..., 0.5686, 0.5647, 0.5569],
           [0.1059, 0.1020, 0.0980,  ..., 0.5608, 0.5608, 0.5569],
           ...,
           [0.1529, 0.1490, 0.1569,  ..., 0.2000, 0.1882, 0.1176],
           [0.1647, 0.1569, 0.1686,  ..., 0.2275, 0.2235, 0.1569],
           [0.1686, 0.1569, 0.1686,  ..., 0.2392, 0.2275, 0.1765]],
 
          [[0.0510, 0.0471, 0.0431,  ..., 0.4863, 0.4784, 0.4706],
           [0.0510, 0.0471, 0.0431,  ..., 0.4941, 0.4902, 0.4824],
           [0.0510, 0.0471, 0.0431,  ..., 0.4863, 0.4863, 0.4824],
           ...,
           [0.0471, 0.0392, 0.0392,  ..., 0.1529, 0.1451, 0.0784],
           [0.0588, 0.0471, 0.0471,  ..., 0.1725, 0.1686, 0.1059],
           [0.0627, 0.0471, 0.0510,  ..., 0.1725, 0.1647, 0.1098]],
 
          [[0.1176, 0.1137, 0.1098,  ..., 0.3647, 0.3529, 0.3412],
           [0.1176, 0.1137, 0.1098,  ..., 0.3725, 0.3647, 0.3529],
           [0.

### Function call definition

In [353]:
#building the model 
model = VAE(args).to(device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [354]:
model.train()

VAE(
  (dropout): Dropout(p=0.1, inplace=False)
  (base_model): base_class(
    (model): ResNet(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_

In [355]:
model=model.to(args.device)

In [356]:
#log the learning rate 
writer = SummaryWriter(log_dir=args.summary_dir)
writer.add_scalar('learning_rate', args.lr)

### Training Functions

In [357]:
# Define Optimizer
args.optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=5e-4)
args.scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(args.optimizer, eta_min=args.eta_min, T_0=args.T0, T_mult=args.T_mult)
print(args.retrain)
if args.retrain==True:
    model.load_state_dict(torch.load(args.checkpoint_path))
    print("Model Loaded")

False


In [358]:
# Define the losses

# smooth means average. Every batch has a mean loss value w.r.t. different losses
smooth_nll_loss=0.0 # label encoder decoder cross entropy loss
smooth_nll_loss_x=0.0 # feature encoder decoder cross entropy loss
smooth_c_loss = 0.0 # label encoder decoder ranking loss
smooth_c_loss_x=0.0 # feature encoder decoder ranking loss
smooth_kl_loss = 0.0 # kl divergence
smooth_total_loss=0.0 # total loss
smooth_macro_f1 = 0.0 # macro_f1 score
smooth_micro_f1 = 0.0 # micro_f1 score

best_loss = 1e10
best_iter = 0
best_macro_f1 = 0.0 # best macro f1 for ckpt selection in validation
best_micro_f1 = 0.0 # best micro f1 for ckpt selection in validation
best_acc = 0.0 # best subset acc for ckpt selction in validation

temp_label=[]
temp_pred_x=[]


best_test_metrics = None

In [359]:
# Train Function
def train(model,train_loader, args):
    print("Training Started")
    counter=0
    model=model.to(args.device)
    # smooth means average. Every batch has a mean loss value w.r.t. different losses
    smooth_nll_loss=0.0 # label encoder decoder cross entropy loss
    smooth_nll_loss_x=0.0 # feature encoder decoder cross entropy loss
    smooth_c_loss = 0.0 # label encoder decoder ranking loss
    smooth_c_loss_x=0.0 # feature encoder decoder ranking loss
    smooth_kl_loss = 0.0 # kl divergence
    smooth_total_loss=0.0 # total loss
    smooth_macro_f1 = 0.0 # macro_f1 score
    smooth_micro_f1 = 0.0 # micro_f1 score
    
    for i, data in enumerate(train_loader):
        print("Entered Loop")
        x = data['images'].to(args.device)
        print("Got x")
        targets = data['label'].to(args.device)
        args.optimizer.zero_grad()
        output = model(targets,x)
        print("Output Done!")
        total_loss, nll_loss, nll_loss_x, c_loss, c_loss_x, kl_loss, cpc_loss, _, pred_x = \
                    compute_loss(targets, output, args)
        
        total_loss.backward()
        args.optimizer.step()

        train_metrics = evals.compute_metrics(pred_x.cpu().data.numpy(), targets.cpu().data.numpy(), 0.5, all_metrics=False)
        macro_f1, micro_f1 = train_metrics['maF1'], train_metrics['miF1']
       
        smooth_nll_loss += nll_loss
        smooth_nll_loss_x += nll_loss_x
        smooth_c_loss += c_loss
        smooth_c_loss_x += c_loss_x
        smooth_kl_loss += kl_loss
        smooth_total_loss += total_loss
        smooth_macro_f1 += macro_f1
        smooth_micro_f1 += micro_f1

        counter+=1
        print("Train Func",counter)
        del x,targets,outputs,
        
    nll_loss = smooth_nll_loss / counter
    nll_loss_x = smooth_nll_loss_x / counter
    c_loss = smooth_c_loss / counter
    c_loss_x = smooth_c_loss_x / counter
    kl_loss = smooth_kl_loss / counter
    total_loss = smooth_total_loss / counter
    macro_f1 = smooth_macro_f1 / counter
    micro_f1 = smooth_micro_f1 / counter
       
   
    return model, train_metrics, nll_loss, nll_loss_x, c_loss, c_loss_x, kl_loss, total_loss, macro_f1, micro_f1

In [360]:
# Test function
test_temp_label = []
test_temp_pred_x = []
def test(model, device, test_loader):
    counter = 0
    model.eval()
    with torch.no_grad():
     for i, data in enumerate(test_loader):
        x_test = data['images'].to(device)
        targets = data['label'].to(device)
        output = model(targets,x_test)

        total_loss, nll_loss, nll_loss_x, c_loss, c_loss_x, kl_loss, cpc_loss, _, pred_x = \
                    compute_loss(targets, output, args)

        test_metrics = evals.compute_metrics(pred_x.cpu().data.numpy(), targets.cpu().data.numpy(), 0.5, all_metrics=False)
        macro_f1, micro_f1 = test_metrics['maF1'], test_metrics['miF1']
       
        smooth_nll_loss += nll_loss
        smooth_nll_loss_x += nll_loss_x
        smooth_c_loss += c_loss
        smooth_c_loss_x += c_loss_x
        smooth_kl_loss += kl_loss
        smooth_total_loss += total_loss
        smooth_macro_f1 += macro_f1
        smooth_micro_f1 += micro_f1

        counter+=1
        
    nll_loss = smooth_nll_loss / counter
    nll_loss_x = smooth_nll_loss_x / counter
    c_loss = smooth_c_loss / counter
    c_loss_x = smooth_c_loss_x / counter
    kl_loss = smooth_kl_loss / counter
    total_loss = smooth_total_loss / counter
    macro_f1 = smooth_macro_f1 / counter
    micro_f1 = smooth_micro_f1 / counter
          
    return test_metrics, nll_loss, nll_loss_x, c_loss, c_loss_x, kl_loss, total_loss, macro_f1, micro_f1

In [361]:
def experiment(model, train_loader, device, args):

    for epoch in range(args.max_epoch):
        model.train()
        model, train_metrics, nll_loss, nll_loss_x, c_loss, c_loss_x, kl_loss, total_loss, macro_f1, micro_f1= train(model, train_loader, args)
        train_acc=train_metrics['ACC']
        train_ha_acc=train_metrics['HA']
        print('- Epoch :', epoch+1)
        print('*** Training Metrics ***')
        print('- NLL Loss : %.5f' % nll_loss,'- Total Loss : %.5f' % total_loss, '- Total Accuracy : %.3f',train_acc,'- Hamming Accuracy : %.3f',train_ha_acc)
       
        # Validation
        test_metrics, nll_loss, nll_loss_x, c_loss, c_loss_x, kl_loss, total_loss, macro_f1, micro_f1=test(model,val_loader,args)
        test_acc=test_metrics['ACC']
        test_ha_acc=test_metrics['HA']
        print('- Epoch :', epoch+1)
        print('*** Training Metrics ***')
        print('- NLL Loss : %.5f' % nll_loss,'- Total Loss : %.5f' % total_loss, '- Total Accuracy : %.3f',test_acc,'- Hamming Accuracy : %.3f',test_ha_acc)
       
        '''
        # Learning rate Updatation scheme when loss not decreasing
        if(epoch!=0):
            #print(train_loss[int(epoch)],train_loss[int(epoch-1)])
            if(round(list_train_loss[epoch],5)>=round(list_train_loss[epoch-1],5)):
                counter+=1
            else:
                counter=0
        if(train_loss<0.0002 and counter>=10):
           counter=0
           args.lr = optimizer.param_groups[0]['lr']*0.5
           print("\nLearning rate changed :",args.lr)
           optimizer = optim.Adam(model.parameters(), lr=args.lr,weight_decay=5e-4)
           scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=args.step_size, gamma=args.gamma)


        val_score,_,_,_=test(model, device,test_loader, args)
   
        # Saving the best model which has the highest R2 value
        if(val_score>val_r2_score):  # and score>train_r2_score  # Changed
            val_r2_score=val_score
            train_r2_score=score   
            print("Saving Model of score:",val_score,score)       
            save_checkpoint(epoch, model, optimizer, 'ECC_model.pt')  # Saving the best model
        '''   
        args.scheduler.step()
    
    return args

In [362]:
# Call the train model here

args = experiment(model, train_loader, device, args)

Training Started


Entered Loop
Got x
torch.Size([64, 2048]) torch.Size([64, 100])
Output Done!


UnboundLocalError: local variable 'smooth_nll_loss' referenced before assignment