In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.utils.data
import torchvision.datasets as dset
import torchvision.transforms as transforms
import torchvision.utils as vutils
import torch.nn.init as init
from torch.autograd import Variable
import os
from matplotlib.pyplot import GridSpec
from matplotlib.colors import rgb_to_hsv, hsv_to_rgb
os.chdir('..')

import argparse
import numpy as np
from PIL import Image
from utils.dataloader import *
from utils.auc import *
from utils import new_transforms
%matplotlib inline

In [6]:
root_dir = "/beegfs/jmw784/Capstone/LungTilesSorted/"
num_classes = 3
tile_dict_path = '/beegfs/jmw784/Capstone/Lung_FileMappingDict.p'
imgSize = 299

In [7]:
transform = transforms.Compose([new_transforms.Resize((imgSize,imgSize)),
                                transforms.ToTensor(),
                                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

test_data = TissueData(root_dir, 'test', transform = transform, metadata=False)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=32, shuffle=False)

Loading from: Solid_Tissue_Normal
number of samples: 16085
Loading from: TCGA-LUSC
number of samples: 41969
Loading from: TCGA-LUAD
number of samples: 40063


In [8]:
classes = test_data.classes
classes[0]

'Solid_Tissue_Normal'

In [10]:
def get_tile_probability(tile_path):

    """
    Returns an array of probabilities for each class given a tile

    @param tile_path: Filepath to the tile
    @return: A ndarray of class probabilities for that tile
    """

    # Some tiles are empty with no path, return nan
    if tile_path == '':
        return np.full(num_classes, np.nan)

    tile_path = root_dir + tile_path

    with open(tile_path, 'rb') as f:
        with Image.open(f) as img:
            img = img.convert('RGB')

    # Model expects a 4D tensor, unsqueeze first dimension
    img = transform(img).unsqueeze(0)

    # Turn output into probabilities with softmax
    var_img = Variable(img, volatile=True)
    output = F.softmax(model(var_img)).data.squeeze(0)
    
    #SB, 112418: printing the tile probabilities, be careful about this part!!!!!!!!
    #for i in range(0,3):
    #    if (output[i]>0.98):
    #        print(classes[i])
    #        print(tile_path)
    #        print(output)

    return output.numpy()

In [11]:
with open(tile_dict_path, 'rb') as f:
    tile_dict = pickle.load(f)

In [12]:
def aggregate(file_list, method):

    """
    Given a list of files, return scores for each class according to the
    method and labels for those files.

    @param file_list: A list of file paths to do predictions on
    @param method: 'average' - returns the average probability score across
                               all tiles for that file
                   'max' - predicts each tile to be the class of the maximum
                           score, and returns the proportion of tiles for
                           each class

    @return: a ndarray of class probabilities for all files in the list
             a ndarray of the labels

    """

    model.eval()
    predictions = []
    true_labels = []

    for file in file_list:
        tile_paths, label = tile_dict[file]

        folder = classes[label]

        def add_folder(tile_path):
            if tile_path == '':
                return ''
            else:
                return folder + '/' + tile_path

        # Add the folder for the class name in front
        add_folder_v = np.vectorize(add_folder)
        tile_paths = add_folder_v(tile_paths)

        # Get the probability array for the file
        prob_v = np.vectorize(get_tile_probability, otypes=[np.ndarray])
        probabilities = prob_v(tile_paths)


        """
        imgSize = probabilities.shape()
        newShape = (imgSize[0], imgSize[1], 3)
        probabilities = np.reshape(np.stack(probabilities.flat), newShape)
        """

        if method == 'average':
            probabilities = np.stack(probabilities.flat)
            prediction = np.nanmean(probabilities, axis = 0)

        elif method == 'max':
            probabilities = np.stack(probabilities.flat)
            probabilities = probabilities[~np.isnan(probabilities).all(axis=1)]
            votes = np.nanargmax(probabilities, axis=1)
            
            out = np.array([sum(votes == i) for i in range(num_classes)])
            prediction = out / out.sum()

        else:
            raise ValueError('Method not valid')

        predictions.append(prediction)
        true_labels.append(label)

    return np.array(predictions), np.array(true_labels)

In [13]:
class BasicConv2d(nn.Module):

    def __init__(self, in_channels, out_channels, pool, **kwargs):
        super(BasicConv2d, self).__init__()

        self.pool = pool
        self.conv = nn.Conv2d(in_channels, out_channels, **kwargs)
        self.bn = nn.BatchNorm2d(out_channels, eps=0.001)
        self.relu = nn.LeakyReLU()
        
        self.dropout = nn.Dropout(p=0.1)

    def forward(self, x):
        x = self.conv(x)

        if self.pool:
            x = F.max_pool2d(x, 2)
        
        x = self.relu(x)
        x = self.bn(x)
        x = self.dropout(x)
        return x

# Define model
class cancer_CNN(nn.Module):
    def __init__(self, nc, imgSize, ngpu):
        super(cancer_CNN, self).__init__()
        self.nc = nc
        self.imgSize = imgSize
        self.ngpu = ngpu
        self.data = 'lung'
        self.conv1 = BasicConv2d(nc, 16, False, kernel_size=5, padding=1, stride=2, bias=True)
        self.conv2 = BasicConv2d(16, 32, False, kernel_size=3, bias=True)
        self.conv3 = BasicConv2d(32, 64, True, kernel_size=3, padding=1, bias=True)
        self.conv4 = BasicConv2d(64, 64, True, kernel_size=3, padding=1, bias=True)
        self.conv5 = BasicConv2d(64, 128, True, kernel_size=3, padding=1, bias=True)
        self.conv6 = BasicConv2d(128, 64, True, kernel_size=3, padding=1, bias=True)
        self.linear = nn.Linear(5184, num_classes)

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.conv5(x)
        x = self.conv6(x)
        x = x.view(x.size(0), -1)
        x = self.linear(x)
        return x

In [14]:
model = cancer_CNN(3, imgSize, 1)

In [15]:
model_path = "/scratch/jmw784/capstone/Charrrrtreuse/experiments/joy15/epoch_10.pth"
state_dict = torch.load(model_path, map_location=lambda storage, loc: storage)
model.load_state_dict(state_dict)

In [16]:
len(test_data.filenames[:])

296

In [None]:
#test_TCGA-44-7667-11A-01-TS1.4d2b08b2-07b1-478a-9944-934679cf74bb_26_47.jpeg
#test_TCGA-73-4659-11A-01-BS1.1615c681-e9d6-4255-9c48-3fad043abe88_76_31.jpeg


In [18]:
predictions, labels = aggregate(test_data.filenames[:15], method='average')

Solid_Tissue_Normal
/beegfs/jmw784/Capstone/LungTilesSorted/Solid_Tissue_Normal/test_TCGA-60-2709-11A-01-BS1.177526d2-5863-4829-ad4b-7de930de16ad_27_9.jpeg

 0.9884
 0.0017
 0.0099
[torch.FloatTensor of size 3]

Solid_Tissue_Normal
/beegfs/jmw784/Capstone/LungTilesSorted/Solid_Tissue_Normal/test_TCGA-60-2709-11A-01-BS1.177526d2-5863-4829-ad4b-7de930de16ad_27_10.jpeg

 0.9884
 0.0029
 0.0087
[torch.FloatTensor of size 3]

Solid_Tissue_Normal
/beegfs/jmw784/Capstone/LungTilesSorted/Solid_Tissue_Normal/test_TCGA-60-2709-11A-01-BS1.177526d2-5863-4829-ad4b-7de930de16ad_25_11.jpeg

 0.9981
 0.0014
 0.0005
[torch.FloatTensor of size 3]

Solid_Tissue_Normal
/beegfs/jmw784/Capstone/LungTilesSorted/Solid_Tissue_Normal/test_TCGA-60-2709-11A-01-BS1.177526d2-5863-4829-ad4b-7de930de16ad_16_12.jpeg

 0.9997
 0.0001
 0.0001
[torch.FloatTensor of size 3]

Solid_Tissue_Normal
/beegfs/jmw784/Capstone/LungTilesSorted/Solid_Tissue_Normal/test_TCGA-60-2709-11A-01-BS1.177526d2-5863-4829-ad4b-7de930de16ad_20_

In [20]:
predictions, labels = aggregate(test_data.filenames[15:20], method='average')

Solid_Tissue_Normal
/beegfs/jmw784/Capstone/LungTilesSorted/Solid_Tissue_Normal/test_TCGA-44-5645-11A-03-TS3.1DF60D63-00A1-42A4-AB5E-5327AC83AB29_63_7.jpeg

 0.9978
 0.0015
 0.0007
[torch.FloatTensor of size 3]

Solid_Tissue_Normal
/beegfs/jmw784/Capstone/LungTilesSorted/Solid_Tissue_Normal/test_TCGA-44-5645-11A-03-TS3.1DF60D63-00A1-42A4-AB5E-5327AC83AB29_68_7.jpeg

 0.9883
 0.0070
 0.0048
[torch.FloatTensor of size 3]

Solid_Tissue_Normal
/beegfs/jmw784/Capstone/LungTilesSorted/Solid_Tissue_Normal/test_TCGA-44-5645-11A-03-TS3.1DF60D63-00A1-42A4-AB5E-5327AC83AB29_12_8.jpeg

 0.9851
 0.0058
 0.0092
[torch.FloatTensor of size 3]

Solid_Tissue_Normal
/beegfs/jmw784/Capstone/LungTilesSorted/Solid_Tissue_Normal/test_TCGA-44-5645-11A-03-TS3.1DF60D63-00A1-42A4-AB5E-5327AC83AB29_18_8.jpeg

 0.9867
 0.0063
 0.0070
[torch.FloatTensor of size 3]

Solid_Tissue_Normal
/beegfs/jmw784/Capstone/LungTilesSorted/Solid_Tissue_Normal/test_TCGA-44-5645-11A-03-TS3.1DF60D63-00A1-42A4-AB5E-5327AC83AB29_62_8.j

In [19]:
predictions, labels = aggregate(test_data.filenames[120:130], method='average')

TCGA-LUSC
/beegfs/jmw784/Capstone/LungTilesSorted/TCGA-LUSC/test_TCGA-98-A53A-01A-01-TS1.EADCFE65-613A-4C37-A034-C5B11D452D4F_17_8.jpeg

 0.0003
 0.0183
 0.9814
[torch.FloatTensor of size 3]

TCGA-LUSC
/beegfs/jmw784/Capstone/LungTilesSorted/TCGA-LUSC/test_TCGA-98-A53A-01A-01-TS1.EADCFE65-613A-4C37-A034-C5B11D452D4F_18_8.jpeg

 0.0000
 0.0166
 0.9834
[torch.FloatTensor of size 3]

TCGA-LUSC
/beegfs/jmw784/Capstone/LungTilesSorted/TCGA-LUSC/test_TCGA-98-A53A-01A-01-TS1.EADCFE65-613A-4C37-A034-C5B11D452D4F_17_9.jpeg

 0.0000
 0.0039
 0.9961
[torch.FloatTensor of size 3]

TCGA-LUSC
/beegfs/jmw784/Capstone/LungTilesSorted/TCGA-LUSC/test_TCGA-98-A53A-01A-01-TS1.EADCFE65-613A-4C37-A034-C5B11D452D4F_18_9.jpeg

 0.0000
 0.0007
 0.9993
[torch.FloatTensor of size 3]

TCGA-LUSC
/beegfs/jmw784/Capstone/LungTilesSorted/TCGA-LUSC/test_TCGA-98-A53A-01A-01-TS1.EADCFE65-613A-4C37-A034-C5B11D452D4F_67_9.jpeg

 0.0000
 0.0193
 0.9807
[torch.FloatTensor of size 3]

TCGA-LUSC
/beegfs/jmw784/Capstone/LungTi

In [21]:
predictions, labels = aggregate(test_data.filenames[130:140], method='average')

TCGA-LUSC
/beegfs/jmw784/Capstone/LungTilesSorted/TCGA-LUSC/test_TCGA-66-2755-01A-01-BS1.e052d5dd-852d-4835-bace-71bbb1ca3325_9_2.jpeg

 0.0021
 0.0083
 0.9895
[torch.FloatTensor of size 3]

TCGA-LUSC
/beegfs/jmw784/Capstone/LungTilesSorted/TCGA-LUSC/test_TCGA-66-2755-01A-01-BS1.e052d5dd-852d-4835-bace-71bbb1ca3325_10_2.jpeg

 0.0000
 0.0002
 0.9997
[torch.FloatTensor of size 3]

TCGA-LUSC
/beegfs/jmw784/Capstone/LungTilesSorted/TCGA-LUSC/test_TCGA-66-2755-01A-01-BS1.e052d5dd-852d-4835-bace-71bbb1ca3325_11_2.jpeg

 0.0107
 0.0021
 0.9871
[torch.FloatTensor of size 3]

TCGA-LUSC
/beegfs/jmw784/Capstone/LungTilesSorted/TCGA-LUSC/test_TCGA-66-2755-01A-01-BS1.e052d5dd-852d-4835-bace-71bbb1ca3325_7_3.jpeg

 1.3389e-06
 1.6795e-03
 9.9832e-01
[torch.FloatTensor of size 3]

TCGA-LUSC
/beegfs/jmw784/Capstone/LungTilesSorted/TCGA-LUSC/test_TCGA-66-2755-01A-01-BS1.e052d5dd-852d-4835-bace-71bbb1ca3325_8_3.jpeg

 0.0002
 0.0005
 0.9993
[torch.FloatTensor of size 3]

TCGA-LUSC
/beegfs/jmw784/Capsto

In [22]:
predictions, labels = aggregate(test_data.filenames[140:145], method='average')

TCGA-LUSC
/beegfs/jmw784/Capstone/LungTilesSorted/TCGA-LUSC/test_TCGA-66-2791-01A-01-TS1.a0fd779c-2778-4279-a740-af4035538b06_26_2.jpeg

 0.0068
 0.0113
 0.9819
[torch.FloatTensor of size 3]

TCGA-LUSC
/beegfs/jmw784/Capstone/LungTilesSorted/TCGA-LUSC/test_TCGA-66-2791-01A-01-TS1.a0fd779c-2778-4279-a740-af4035538b06_27_2.jpeg

 0.0004
 0.0034
 0.9962
[torch.FloatTensor of size 3]

TCGA-LUSC
/beegfs/jmw784/Capstone/LungTilesSorted/TCGA-LUSC/test_TCGA-66-2791-01A-01-TS1.a0fd779c-2778-4279-a740-af4035538b06_13_3.jpeg

 0.0002
 0.0182
 0.9816
[torch.FloatTensor of size 3]

TCGA-LUSC
/beegfs/jmw784/Capstone/LungTilesSorted/TCGA-LUSC/test_TCGA-66-2791-01A-01-TS1.a0fd779c-2778-4279-a740-af4035538b06_14_3.jpeg

 0.0013
 0.0037
 0.9950
[torch.FloatTensor of size 3]

TCGA-LUSC
/beegfs/jmw784/Capstone/LungTilesSorted/TCGA-LUSC/test_TCGA-66-2791-01A-01-TS1.a0fd779c-2778-4279-a740-af4035538b06_15_3.jpeg

 0.0034
 0.0017
 0.9949
[torch.FloatTensor of size 3]

TCGA-LUSC
/beegfs/jmw784/Capstone/LungTi

In [23]:
predictions, labels = aggregate(test_data.filenames[240:250], method='average')

TCGA-LUSC
/beegfs/jmw784/Capstone/LungTilesSorted/TCGA-LUAD/test_TCGA-91-8499-01A-01-TS1.157b5269-563d-4a4d-b46e-f355362035c0_9_1.jpeg

 1.4801e-06
 6.7927e-03
 9.9321e-01
[torch.FloatTensor of size 3]

TCGA-LUSC
/beegfs/jmw784/Capstone/LungTilesSorted/TCGA-LUAD/test_TCGA-91-8499-01A-01-TS1.157b5269-563d-4a4d-b46e-f355362035c0_10_1.jpeg

 6.9763e-06
 9.2053e-03
 9.9079e-01
[torch.FloatTensor of size 3]

TCGA-LUSC
/beegfs/jmw784/Capstone/LungTilesSorted/TCGA-LUAD/test_TCGA-91-8499-01A-01-TS1.157b5269-563d-4a4d-b46e-f355362035c0_11_1.jpeg

 0.0000
 0.0123
 0.9876
[torch.FloatTensor of size 3]

TCGA-LUSC
/beegfs/jmw784/Capstone/LungTilesSorted/TCGA-LUAD/test_TCGA-91-8499-01A-01-TS1.157b5269-563d-4a4d-b46e-f355362035c0_8_2.jpeg

 3.4776e-06
 1.2225e-02
 9.8777e-01
[torch.FloatTensor of size 3]

TCGA-LUSC
/beegfs/jmw784/Capstone/LungTilesSorted/TCGA-LUAD/test_TCGA-91-8499-01A-01-TS1.157b5269-563d-4a4d-b46e-f355362035c0_9_2.jpeg

 1.4771e-07
 3.2488e-04
 9.9967e-01
[torch.FloatTensor of size

In [24]:
predictions, labels = aggregate(test_data.filenames[250:260], method='average')

TCGA-LUAD
/beegfs/jmw784/Capstone/LungTilesSorted/TCGA-LUAD/test_TCGA-78-7536-01A-01-TS1.29490ee7-b0ce-437f-9a5d-fc643c3d38d8_6_1.jpeg

 0.0000
 0.9946
 0.0054
[torch.FloatTensor of size 3]

TCGA-LUAD
/beegfs/jmw784/Capstone/LungTilesSorted/TCGA-LUAD/test_TCGA-44-5645-01B-04-BS4.76ED5128-FB3D-44C7-A679-848860CAA3C5_13_4.jpeg

 0.0012
 0.9900
 0.0088
[torch.FloatTensor of size 3]

TCGA-LUAD
/beegfs/jmw784/Capstone/LungTilesSorted/TCGA-LUAD/test_TCGA-44-5645-01B-04-BS4.76ED5128-FB3D-44C7-A679-848860CAA3C5_12_5.jpeg

 3.3843e-06
 9.9757e-01
 2.4292e-03
[torch.FloatTensor of size 3]

TCGA-LUAD
/beegfs/jmw784/Capstone/LungTilesSorted/TCGA-LUAD/test_TCGA-44-5645-01B-04-BS4.76ED5128-FB3D-44C7-A679-848860CAA3C5_12_6.jpeg

 0.0015
 0.9962
 0.0023
[torch.FloatTensor of size 3]

TCGA-LUAD
/beegfs/jmw784/Capstone/LungTilesSorted/TCGA-LUAD/test_TCGA-44-5645-01B-04-BS4.76ED5128-FB3D-44C7-A679-848860CAA3C5_11_7.jpeg

 7.5625e-06
 9.9944e-01
 5.5673e-04
[torch.FloatTensor of size 3]

TCGA-LUAD
/beegfs

In [25]:
predictions, labels = aggregate(test_data.filenames[260:270], method='average')

TCGA-LUAD
/beegfs/jmw784/Capstone/LungTilesSorted/TCGA-LUAD/test_TCGA-67-6217-01A-01-BS1.49c857c1-9817-48ed-8b34-5b168c2496c7_12_5.jpeg

 0.0002
 0.9815
 0.0182
[torch.FloatTensor of size 3]

TCGA-LUAD
/beegfs/jmw784/Capstone/LungTilesSorted/TCGA-LUAD/test_TCGA-67-6217-01A-01-BS1.49c857c1-9817-48ed-8b34-5b168c2496c7_13_5.jpeg

 0.0030
 0.9913
 0.0057
[torch.FloatTensor of size 3]

TCGA-LUAD
/beegfs/jmw784/Capstone/LungTilesSorted/TCGA-LUAD/test_TCGA-67-6217-01A-01-BS1.49c857c1-9817-48ed-8b34-5b168c2496c7_7_6.jpeg

 0.0001
 0.9991
 0.0009
[torch.FloatTensor of size 3]

TCGA-LUAD
/beegfs/jmw784/Capstone/LungTilesSorted/TCGA-LUAD/test_TCGA-67-6217-01A-01-BS1.49c857c1-9817-48ed-8b34-5b168c2496c7_9_6.jpeg

 0.0000
 0.9842
 0.0158
[torch.FloatTensor of size 3]

TCGA-LUAD
/beegfs/jmw784/Capstone/LungTilesSorted/TCGA-LUAD/test_TCGA-67-6217-01A-01-BS1.49c857c1-9817-48ed-8b34-5b168c2496c7_11_6.jpeg

 8.4664e-06
 9.9130e-01
 8.6870e-03
[torch.FloatTensor of size 3]

TCGA-LUAD
/beegfs/jmw784/Capst

In [15]:
labels

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [23]:
#predictions.shape
#labels.shape
data = np.column_stack((np.asarray(predictions),np.asarray(labels)))


In [24]:
data.shape

(2, 4)

In [25]:
# data will become vstack((a,b)), or combine columns?
#data = np.column_stack((np.asarray(predictions),np.asarray(labels)))
#data.dump(open('pred_label_max.npy', 'wb'))

In [None]:
roc_auc = get_auc('experiments/joy15/images/test_AUC_max.jpg',
                  predictions, labels, classes = range(num_classes))

In [None]:
roc_auc

In [None]:
predictions, labels = aggregate(test_data.filenames, method='average')

In [None]:
roc_auc = get_auc('experiments/joy15/images/test_AUC_avg.jpg',
                  predictions, labels, classes = range(num_classes))

In [None]:
roc_auc

In [31]:
#p_l = np.load('pred_label_max.npy')
#p_l[1]

array([ 0.98579783,  0.00501253,  0.00918964,  0.        ])

# Visualize predictions

In [None]:
def pil_loader(path):
    with open(path, 'rb') as f:
        with Image.open(f) as img:
            return img.convert('RGB')
        
def shift_hue(arr,hout):
    hsv=rgb_to_hsv(arr)
    hsv[:,:,0]=hout
    rgb=hsv_to_rgb(hsv)*255
    return rgb

In [None]:
filename = tile_dict[slide][0][10, 10]
label =  classes[tile_dict[slide][1]]
img = pil_loader(root_dir + '/' + label + '/' + filename)
img = np.array(img)

gs = GridSpec(1, 3, wspace=0, hspace=0)

plt.figure(figsize=(10, 8))
for i in range(3):
    ax = plt.subplot(gs[0, i])
    hue = i/3
    shifted = shift_hue(img, hue)
    plt.imshow(shifted)
    plt.axis('off')
    plt.title('%s' % (classes[i]))

In [None]:
slide = test_data.filenames[1]
rows, columns = tile_dict[slide][0].shape
length = 0.5

height = length * rows
width = length * columns

plt.figure(figsize=(width, height))
gs = GridSpec(rows, columns, wspace=0, hspace=0)

for i in range(rows):
    for j in range(columns):
        ax = plt.subplot(gs[i, j])
        
        filename = tile_dict[slide][0][i, j]
        label =  classes[tile_dict[slide][1]]
        
        if filename != '': 
            img = pil_loader(root_dir + '/' + label + '/' + filename)
            plt.imshow(img)
            
        plt.axis('off')
        ax.grid(False)

In [None]:
model.eval()

slide = test_data.filenames[1]
rows, columns = tile_dict[slide][0].shape

length = 0.5

height = length * rows
width = length * columns

plt.figure(figsize=(width, height))
gs = GridSpec(rows, columns, wspace=0, hspace=0)

label =  classes[tile_dict[slide][1]]

for i in range(rows):
    for j in range(columns):
        ax = plt.subplot(gs[i, j])
        
        filename = tile_dict[slide][0][i, j]
        
        if filename != '': 
            img = pil_loader(root_dir + '/' + label + '/' + filename)
            var_img = Variable(transform(img).unsqueeze(0), volatile=True)
            probs = F.softmax(model(var_img)).data.squeeze(0).numpy()
            
            prediction, prob = np.argmax(probs), np.max(probs)
            
            img = np.array(img)
            hue = prediction/3
            
            img = shift_hue(img, hue)
            plt.imshow(img, alpha = prob)
            
        plt.axis('off')
        ax.grid(False)
        
plt.title('True label: %s' % (label))