In [10]:
import numpy as np
import logging
import argparse
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import torchvision
from tqdm import tqdm
import utils
import logging


# THISneeds modification
import models.Inception_V3_finetune.net as net
from models.Inception_V3_finetune import data_loader
from evaluate import EvaluateMetrics
from models.Inception_V3_finetune import net as net

import sklearn
from sklearn.metrics import accuracy_score, hamming_loss, precision_score, recall_score, f1_score


%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Instantiating the model from the torch site

In [3]:
def train(model, optimizer, loss_fn, dataloader, metrics, params, img_count, cuda_present):
    """Train the model on `num_steps` batches
    Args:
        model: (torch.nn.Module) the neural network
        optimizer: (torch.optim) optimizer for parameters of model
        loss_fn: a function that takes batch_output and batch_labels and computes the loss for the batch
        dataloader: (DataLoader) a torch.utils.data.DataLoader object that fetches training data
        metrics: (dict) a dictionary of functions that compute a metric using the output and labels of each batch
        params: (Params) hyperparameters
        num_steps: (int) number of batches to train on, each of size params.batch_size
    """
    
    # set model to training mode
    model.train()

    # summary for current training loop and a running average object for loss
    epoch_metric_summ = []
    loss_avg = utils.RunningAverage()
    loss_class_wts = torch.tensor(params.wts, dtype=torch.float32)
    threshold = params.threshold
    
    y_pred = torch.zeros(img_count, params.class_count)
    y_true = torch.zeros(img_count, params.class_count)
    
    if cuda_present:
        loss_class_wts = loss_class_wts.cuda()
    k = 0
    
    for i, (train_batch, labels_batch) in enumerate(dataloader):

   
        batch_size = labels_batch.size()[0] 
        y_true[k:k+ batch_size, :] = labels_batch #build entire array of predicted labels
        
        #If CUDA available, move data to GPU
        if cuda_present:
           train_batch = train_batch.cuda(async=True)
           labels_batch = labels_batch.cuda(async=True)
        
        # convert to torch Variables
        train_batch, labels_batch = Variable(train_batch), Variable(labels_batch)
        
        # compute model output and loss
        prim_out, aux_out = model(train_batch)
        
        #Compute primary, Aux and total weighted loss
        loss_prim =loss_fn(prim_out, labels_batch,loss_class_wts)
        loss_aux =loss_fn(aux_out, labels_batch, loss_class_wts)
        loss = loss_prim + 0.2 * loss_aux
        
        #send the primary output after thresholding for metrics calc
        yp = ((prim_out > threshold).int()*1).cpu()
        y_pred[k:k+ batch_size, :] = yp #build entire array of predicted labels
        k += batch_size
        
        # clear previous gradients, compute gradients of all variables wrt loss
        optimizer.zero_grad()
        loss.backward()

        # performs updates using calculated gradients
        optimizer.step()
        
        # Evaluate metrics only once in a while
        if i % params.save_summary_steps == 0:
            # extract data from torch Variable, move to cpu, convert to numpy arrays
            prim_out = prim_out.data.cpu()
            labels_batch = labels_batch.data.cpu()
            
            # compute all metrics on this batch
            summary_batch = metrics(prim_out, labels_batch, threshold)
            
            summary_batch['loss'] = loss.item()
            epoch_metric_summ.append(summary_batch)
        
            metrics_string = " ; ".join("{}: {:05.3f}".format(k, v) for k, v in summary_batch.items())
            logging.info("Batch: {} : - Train metrics: ".format(i) + metrics_string)
            
        # update the average loss
        loss_avg.update(loss.item())
        
    #Calculate the metrics of the entire training dataset
    epoch_metrics = metrics(y_pred, y_true, threshold)
    epoch_metrics['loss'] = loss_avg()
    
    # compute epoch mean of all metrics in summary
    metrics_mean = {metric:np.mean([x[metric] for x in epoch_metric_summ]) for metric in epoch_metric_summ[0]}
    metrics_string = " ; ".join("{}: {:05.3f}".format(k, v) for k, v in metrics_mean.items())
    logging.info("Batch: {} : - Train set average metrics: ".format(i) + metrics_string)
    
    train_metrics_string = " ; ".join("{}: {:05.3f}".format(k, v) for k, v in epoch_metrics.items())
    logging.info("Batch: {} : - metrics for Entire train dataset: ".format(i) + train_metrics_string)
        

In [4]:
def train_and_evaluate(params, train_dataloader, optimizer, loss_fn, metrics, model_dir, cuda_present):
    """Train the model and evaluate every epoch.
    Args:
        model: (torch.nn.Module) the neural network
        train_dataloader: (DataLoader) a torch.utils.data.DataLoader object that fetches training data
        val_dataloader: (DataLoader) a torch.utils.data.DataLoader object that fetches validation data
        optimizer: (torch.optim) optimizer for parameters of model
        loss_fn: a function that takes batch_output and batch_labels and computes the loss for the batch
        metrics: (dict) a dictionary of functions that compute a metric using the output and labels of each batch
        params: (Params) hyperparameters
        model_dir: (string) directory containing config, weights and log
        restore_file: (string) optional- name of file to restore from (without its extension .pth.tar)
    """

    for epoch in range(params.num_epochs):
        
        '''Do the following for every epoch'''
        # Run one epoch
        logging.info("Epoch {}/{}".format(epoch + 1, params.num_epochs))
        image_dict = train_dataloader.load_data("train", params)
        labels_dict = train_dataloader.load_labels("train", params)
        
        img_count = len(image_dict)
        data_generator = train_dataloader.data_iterator(params, image_dict, labels_dict)
        
        # compute number of batches in one epoch (one full pass over the training set)
        train(model, optimizer, loss_fn, data_generator, metrics, params, img_count, cuda_present)

        # Evaluate for one epoch on validation set
        #val_metrics = evaluate(model, loss_fn, val_dataloader, metrics, params)
        
        #val_acc = val_metrics['accuracy']
        #is_best = val_acc>=best_val_acc
        
    is_best = True
        
    # Save weights
    utils.save_checkpoint({'epoch': epoch + 1,
                           'state_dict': model.state_dict(),
                           'optim_dict' : optimizer.state_dict(),
                           'is_best': is_best,
                           'checkpoint': model_dir}) 

# Run all the images (resized to 299 X 299 and normalized to 0 mean and stddev =1 ) through the Inception-V3 pretrained network except the final FC layer

In [7]:
cur_dir = os.getcwd()

model_dir = "models/Inception_V3_finetune"
json_path = os.path.join(cur_dir,'models/Inception_V3_finetune//params.json')

print(json_path)
assert os.path.isfile(json_path), "No json configuration found at {}".format(json_path)
params = utils.Params(json_path)

#Set the logger
utils.set_logger(os.path.join(model_dir, 'train2.log'))

logging.info("Generating the dataloader")
dataloader = data_loader.Dataloader(params)

cuda_present = torch.cuda.is_available() #Boolean

if cuda_present:
    print("cuda available")
else:
    print("cuda not available")

inceptionV3 = net.myInceptionV3(model_dir, 28)

for param in inceptionV3.parameters():
    param.requires_grad = True
if cuda_present:
    model = inceptionV3.cuda()
else:
    model = inceptionV3

print("done transferring model to CUDA")
optimizer = optim.Adam(model.parameters(), lr = params.learning_rate)

#loss_fn = nn.BCEWithLogitsLoss()  # moving to net.py
loss_fn = net.loss_fn

metrics = net.metrics


Generating the dataloader


/home/bony/Deep_Learning_Stanford_CS230/Project/Code/models/Inception_V3_finetune//params.json
/home/bony/Deep_Learning_Stanford_CS230/Project/Data/train299_test
cuda available
done loading weights


RuntimeError: cuda runtime error (2) : out of memory at /pytorch/aten/src/THC/generic/THCStorage.cu:58

In [6]:
#cuda_present = False
train_and_evaluate(params, dataloader, optimizer, loss_fn, metrics, model_dir, cuda_present)

Epoch 1/4


train
datatype is train
image id count =  1884
output image size =   299 299
1884 58 32
mini batch index =  0
torch.Size([32, 3, 299, 299]) torch.Size([32, 28])


RuntimeError: cuda runtime error (2) : out of memory at /pytorch/aten/src/THC/generic/THCStorage.cu:58

In [None]:
import seaborn as sns
import matplotlib as mpl

In [None]:
# Target labels
LABEL_MAP = {
    0: "Nucleoplasm",
    1: "Nuclear membrane",
    2: "Nucleoli",
    3: "Nucleoli fibrillar center",   
    4: "Nuclear speckles",
    5: "Nuclear bodies",
    6: "Endoplasmic reticulum",
    7: "Golgi apparatus",
    8: "Peroxisomes",
    9:  "Endosomes",
    10: "Lysosomes",
    11: "Intermediate filaments", 
    12: "Actin filaments",
    13: "Focal adhesion sites",
    14: "Microtubules",
    15: "Microtubule ends",
    16: "Cytokinetic bridge",
    17: "Mitotic spindle",
    18: "Microtubule organizing center",  
    19: "Centrosome",
    20: "Lipid droplets",
    21: "Plasma membrane",
    22: "Cell junctions",
    23: "Mitochondria",
    24: "Aggresome",
    25: "Cytosol",
    26: "Cytoplasmic bodies",
    27: "Rods & rings"
}

# Defines
DIM_HEIGHT = DIM_WIDTH = 512
N_CHANNELS = 4

label = np.zeros((28))
for j in range(len(labels_dict)):
    item = key_list[j]
    tgt_label = labels_dict[item]
    label += tgt_label
    
print(label)
xx = list(LABEL_MAP.values())

plt.figure(figsize=(15, 7))
mpl.style.use('seaborn-darkgrid')
sns.barplot(x = label, y = xx)
plt.title('Target Occurences', fontsize=16)
plt.show()


# Instantiating the model and initializing the weights from my laptop, downloaded from the torch sitem the local machine. Don't need this now

In [None]:
from models.VGG19_1.vgg import VGG
#from models import vgg
vgg19_bn = VGG.vgg19_bn(pretrained=False)
curdir = os.getcwd()
vgg_dict_path = os.path.join(curdir, "models/VGG19_1/vgg19_bn-c79401a0.pth")
model_vgg19_bn.load_state_dict(torch.load(vgg_dict_path))

for param in model_vgg19_bn.parameters():
    param.requires_grad = False

In [8]:
a = 10
b=100
c = min(a,b)

In [55]:
xx = torch.tensor(torch.randn((4,6)))
yt = torch.tensor([[1,0,0,1,0,0],[0,1,1,0,0,0], [0,0,0,0,1,1], [0,0,0,0,1,0]]).numpy()
print(yt)

[[1 0 0 1 0 0]
 [0 1 1 0 0 0]
 [0 0 0 0 1 1]
 [0 0 0 0 1 0]]


In [56]:
th = torch.tensor(0.5)
yp = ((xx > th).int()*1).numpy()
print(xx)
print(yp)

tensor([[-1.0711, -0.3548,  0.5433,  0.9025, -1.7128,  0.5722],
        [ 0.8344,  0.4142, -1.4495, -1.4303,  1.3956,  0.8774],
        [-1.1169,  0.5034,  1.6012,  0.9544,  0.1362,  0.7587],
        [-0.6040,  0.7875,  1.4694,  0.6269,  0.4638, -0.6025]])
[[0 0 1 1 0 1]
 [1 0 0 0 1 1]
 [0 1 1 1 0 1]
 [0 1 1 1 0 0]]


In [57]:
F1_score = f1_score(yt, yp, average = None)
F1_score
precision = precision_score(yt, yp, average = None)
precision

array([0.        , 0.        , 0.        , 0.33333333, 0.        ,
       0.33333333])