In [17]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim as optim
import torch.utils.data
import torchvision.datasets as dset
import torchvision.transforms as transforms
import torchvision.utils as vutils
import torch.nn.init as init
from torch.autograd import Variable

import os
import time
import numpy as np
from PIL import Image
from utils.dataloader import *
#use AUC for AUC and CI, auc2 for precision, AUC and CI, auc3 precision auc and CI
from utils.auc import *
from utils import new_transforms
import argparse
import random

In [2]:
ngpu = 1
nc = 3
imgSize = 299

step_freq = 20000000


root_dir = '/gpfs/data/abl/deepomics/tsirigoslab/histopathology/Tiles/LngTilesSorted/'
num_classes = 3
tile_dict_path = '/gpfs/data/abl/deepomics/tsirigoslab/histopathology/Tiles/Lng_FileMappingDict.p'


In [3]:
manualSeed = random.randint(1, 10000) # fix seed

random.seed(manualSeed)
torch.manual_seed(manualSeed)

cudnn.benchmark = True

In [4]:
# Random data augmentation
augment = transforms.Compose([new_transforms.Resize((imgSize, imgSize)),
                              transforms.RandomHorizontalFlip(),
                              new_transforms.RandomRotate(),
                              new_transforms.ColorJitter(0.25, 0.25, 0.25, 0.05),
                              transforms.ToTensor(),
                              transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

transform = transforms.Compose([new_transforms.Resize((imgSize,imgSize)),
                                transforms.ToTensor(),
                                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

data = {}
loaders = {}

dset_type = 'test'
test_data = TissueData(root_dir, dset_type, train_log='/gpfs/scratch/bilals01/test-repo/logs/exp6_train.log', transform = transform, metadata=False)

test_loader = torch.utils.data.DataLoader(test_data, batch_size=32, shuffle=False, num_workers=8)

classes = test_data.classes
class_to_idx = test_data.class_to_idx

print('Class encoding:')
print(class_to_idx)



Loading from: TCGA-LUSC
number of samples: 52735
Class encoding:
{'TCGA-LUSC': 2}


In [5]:
class_to_idx_invert = {v: k for k, v in class_to_idx.items()}
class_to_idx_invert

{2: 'TCGA-LUSC'}

In [6]:
def get_tile_probability(tile_path):

    """
    Returns an array of probabilities for each class given a tile
    @param tile_path: Filepath to the tile
    @return: A ndarray of class probabilities for that tile
    """

    # Some tiles are empty with no path, return nan
    if tile_path == '':
        return np.full(num_classes, np.nan)

    tile_path = root_dir + tile_path

    with open(tile_path, 'rb') as f:
        with Image.open(f) as img:
            img = img.convert('RGB')

    # Model expects a 4D tensor, unsqueeze first dimension
    img = transform(img).unsqueeze(0)
    img = img.cuda()

    # Turn output into probabilities with softmax
    var_img = Variable(img, volatile=True)
    output = F.softmax(model(var_img)).data.squeeze(0)

    return output.cpu().numpy()

with open(tile_dict_path, 'rb') as f:
    tile_dict = pickle.load(f)

    
def aggregate(file_list, method):

    """
    Given a list of files, return scores for each class according to the
    method and labels for those files.
    @param file_list: A list of file paths to do predictions on
    @param method: 'average' - returns the average probability score across
                               all tiles for that file
                   'max' - predicts each tile to be the class of the maximum
                           score, and returns the proportion of tiles for
                           each class
    @return: a ndarray of class probabilities for all files in the list
             a ndarray of the labels
    """

    model.eval()
    predictions = []
    true_labels = []

    for file in file_list:
        tile_paths, label = tile_dict[file]
        folder = class_to_idx_invert[label]

        def add_folder(tile_path):
            if tile_path == '':
                return ''
            else:
                return folder + '/' + tile_path

        # Add the folder for the class name in front
        add_folder_v = np.vectorize(add_folder)
        tile_paths = add_folder_v(tile_paths)

        # Get the probability array for the file
        prob_v = np.vectorize(get_tile_probability, otypes=[np.ndarray])
        probabilities = prob_v(tile_paths)

        """
        imgSize = probabilities.shape()
        newShape = (imgSize[0], imgSize[1], 3)
        probabilities = np.reshape(np.stack(probabilities.flat), newShape)
        """
        
        if method == 'average':
            probabilities = np.stack(probabilities.flat)
            prediction = np.nanmean(probabilities, axis = 0)

        elif method == 'max':
            probabilities = np.stack(probabilities.flat)
            probabilities = probabilities[~np.isnan(probabilities).all(axis=1)]
            votes = np.nanargmax(probabilities, axis=1)           
            out = np.array([sum(votes == i) for i in range(num_classes)])
            prediction = out / out.sum()

        else:
            raise ValueError('Method not valid')

        predictions.append(prediction)
        true_labels.append(label)

    return np.array(predictions), np.array(true_labels)



class BasicConv2d(nn.Module):

    def __init__(self, in_channels, out_channels, pool, **kwargs):
        super(BasicConv2d, self).__init__()

        self.pool = pool
        self.conv = nn.Conv2d(in_channels, out_channels, **kwargs)
        self.bn = nn.BatchNorm2d(out_channels, eps=0.001)
        self.relu = nn.LeakyReLU()
        
        self.dropout = nn.Dropout(p=0.1)

    def forward(self, x):
        x = self.conv(x)

        if self.pool:
            x = F.max_pool2d(x, 2)
        
        x = self.relu(x)
        x = self.bn(x)
        x = self.dropout(x)
        return x

# Define model
class cancer_CNN(nn.Module):
    def __init__(self, nc, imgSize, ngpu):
        super(cancer_CNN, self).__init__()
        self.nc = nc
        self.imgSize = imgSize
        self.ngpu = ngpu
        #self.data = opt.data
        self.conv1 = BasicConv2d(nc, 16, False, kernel_size=5, padding=1, stride=2, bias=True)
        self.conv2 = BasicConv2d(16, 32, False, kernel_size=3, bias=True)
        self.conv3 = BasicConv2d(32, 64, True, kernel_size=3, padding=1, bias=True)
        self.conv4 = BasicConv2d(64, 64, True, kernel_size=3, padding=1, bias=True)
        self.conv5 = BasicConv2d(64, 128, True, kernel_size=3, padding=1, bias=True)
        self.conv6 = BasicConv2d(128, 64, True, kernel_size=3, padding=1, bias=True)
        self.linear = nn.Linear(5184, num_classes)

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.conv5(x)
        x = self.conv6(x)
        x = x.view(x.size(0), -1)
        x = self.linear(x)
        return x



In [7]:
model = cancer_CNN(3, imgSize, 1)
model.cuda()

model_path = '/gpfs/scratch/bilals01/test-repo/experiments/exp2/checkpoints/epoch_18.pth'
state_dict = torch.load(model_path)
model.load_state_dict(state_dict)


predictions, labels = aggregate(test_data.filenames, method='average')
data = np.column_stack((test_data.filenames,np.asarray(predictions),np.asarray(labels)))





In [8]:
data

array([['test_TCGA-66-2782-01A-01-TS1.87ca26b3-ff31-414d-afa8-4d992870128b',
        '0.06785143173991257', '0.0012589603789908174',
        '0.9308896082118011', '2'],
       ['test_TCGA-39-5036-01A-01-TS1.e9596e31-d551-4130-971a-feaaf8b188ad',
        '0.0987874454695041', '0.013180692141556484',
        '0.8880318617412036', '2'],
       ['test_TCGA-63-A5MN-01A-02-TS2.F4F8AAF4-85AC-438D-969D-0AAECAD81F8E',
        '0.039594568926042406', '0.016352557970564698',
        '0.9440528733837735', '2'],
       ['test_TCGA-60-2721-01A-01-BS1.e9a37468-bda9-4485-8545-fdb49a85fe6a',
        '0.050556040752930985', '0.00016863039277334834',
        '0.9492753281188873', '2'],
       ['test_TCGA-85-8666-01A-01-TS1.8bede180-da7c-46e5-b2ab-fd9c0011e66d',
        '0.017455052978947585', '0.06121043121718893',
        '0.9213345140362015', '2'],
       ['test_TCGA-22-1017-01A-01-TS1.9e5d298a-095d-4784-a73f-a92c0be4fe3a',
        '0.08789440218672584', '0.01538824120327584',
        '0.89671735684835

In [18]:
roc_auc  = get_auc('/gpfs/scratch/bilals01/test-repo/experiments/exp2/outputs',
                   predictions, labels, classes = range(num_classes))

  mean_tpr /= number_class
  if np.any(dx < 0):


In [19]:
roc_auc

{0: nan, 1: nan, 2: nan, 'micro': 0.9975021208407956, 'macro': nan}

In [None]:
classes  = range(num_classes)

In [None]:
labels = label_binarize(labels, classes = classes)
labels

In [None]:
predictions

In [None]:
from sklearn.metrics import roc_curve, auc

fpr = dict()
tpr = dict()
roc_auc = dict()

for i in classes:
    print(i)
    fpr[i], tpr[i], _ = roc_curve(labels[:, i], predictions[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

In [None]:
roc_auc

In [None]:
fpr

In [None]:
tpr

In [None]:
predictions[:,0]

In [None]:
labels[0]