# Import and High-Level Setup

In [1]:
# General Python Packages
import os
import time

# Torch Packages
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from torch.optim import lr_scheduler, SGD
from torch.autograd import Variable
from torch import nn
from torch.nn import DataParallel
from torch.nn import Module

# General Analytics Packages
import pandas as pd
import numpy as np

# Visualization / Image Packages
import matplotlib.pyplot as plt
from PIL import Image

# Randomization Functions
from random import random as randuni

  'Matplotlib is building the font cache using fc-list. '


In [2]:
# Put MatPlotLib in interactive mode
plt.ion()

# Define Data Manipulation Classes

### Helper Utility Classes

In [7]:
def is_image_file(fname):
    """Checks if a file is an image.
    Args:
        fname (string): path to a file
    Returns:
        bool: True if the filename ends with a known image extension
    """
    return fname.lower().endswith('.png')

def create_label_maps(details_df):
    """ Take a descriptive dataframe and extract the unique labels and map to index values
    Args:
        details_df: Dataframe with the image details
    Returns:
        label_list: list of unique labels in the dataframe
        label_to_index: map from labels to indices
    """
    """ TODO: Research paper also excludes these labels but need to figure out how to handle
              cases that have these as positive findings (completely exclude?)
    excluded_labels = ['Edema','Hernia','Emphysema','Fibrosis','No Finding'
                      'Pleural_Thickening','Consolidation']
    """
    excluded_labels = ['No Finding']
    
    label_groups = details_df['Finding Labels'].unique()
    unique_labels = set([label for sublist in label_groups.tolist() for label in sublist.split('|')])
    
    # Drop some label that we do not want to include
    unique_labels = [l for l in unique_labels if l not in excluded_labels]

    index_to_label = {idx: val for idx, val in enumerate(unique_labels)}
    label_to_index = {val: idx for idx, val in index_to_label.items()}

    label_list = list(label_to_index.keys())

    return label_list, label_to_index

def create_image_list(dir):
    """ Create a full list of images available 
    Args:
        dir (string): root directory of images with subdirectories underneath
                      that have the .png images within them
    Returns:
        image_list: list of tuples with (image_name, full_image_path)
    """
    image_list = []
    dir = os.path.expanduser(dir)
    for subfolder in sorted(os.listdir(dir)):
        d = os.path.join(dir, subfolder)
        if not os.path.isdir(d):
            continue
        for subfolder_path, _, fnames in sorted(os.walk(d)):
            for fname in sorted(fnames):
                if is_image_file(fname):
                    path = os.path.join(subfolder_path, fname)
                    image_list.append((fname, path))
    return image_list

def pil_loader(path):
    """ Opens path as file with Pillow (https://github.com/python-pillow/Pillow/issues/835)
    Args:
        path (string): File path to the image
    Returns:
        img: Image in RGB format
    """
    with open(path, 'rb') as f:
        with Image.open(f) as img:
            return img.convert('RGB')
        
def imshow(inp, title=None):
    """ Convert tensor array to an image (only use post-dataset transform) """
    inp = inp[0]
    inp = inp.numpy().transpose((1, 2, 0))
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    inp = std * inp + mean
    inp = np.clip(inp, 0, 1)
    plt.imshow(inp)
    if title is not None:
        plt.title(title)
    plt.pause(0.001)  # pause a bit so that plots are updated

### Implementation of Torch's Dataset

In [8]:
class XrayImageSet(Dataset):
    """
    Args:
        image_root (string): root directory of the images in form image/subfolder/*.png
        csv_file (string): path to the CSV data file
        transform (callable, optional): A function/transform that  takes in an PIL image
            and returns a transformed version. E.g, ``transforms.RandomCrop``
        target_transform (callable, optional): A function/transform that takes in the
            target and transforms it.
        loader (callable, optional): A function to load an image given its path.
     Attributes:
        labels (list): list of the possible label names.
        label_to_index (dict): look from label name to a label index
        imgs (list): List of (filename, image path) tuples
    """
    
    def __init__(self, image_root, csv_file, transform=None, target_transform=None, loader = pil_loader):
        """ Create an instance of the Xray Dataset """
        img_details = pd.read_csv(csv_file)
        
        labels, label_to_index = create_label_maps(img_details)
        imgs = create_image_list(image_root)

        self.imgs = imgs
        self.image_details = img_details
        self.image_root = image_root
        self.labels = labels
        self.label_to_index = label_to_index
        self.transform = transform
        self.target_transform = target_transform
        self.loader = loader
        self.max_label_index = max(label_to_index.values())

    def __getitem__(self, index):
        """ Get image,labels pair by index
        Args:
            index (int): Index
        Returns:
            tuple: (image, target) where target is class_index of the target class.
        """
        fname, path = self.imgs[index]
        target = self.get_one_hot_labels(fname)
        img = self.loader(path)
        if self.transform is not None:
            img = self.transform(img)
        if self.target_transform is not None:
            target = self.target_transform(target)

        return img, target

    def __len__(self):
        """ Calculate length of the dataset (number of images) """
        return len(self.imgs)
    
    def get_labels(self, fname):
        """ Return the label string for the file """
        return self.image_details[self.image_details['Image Index'] == fname]['Finding Labels'].values[0]
    
    def one_hot_labels(self, labels):
        """ Convert the labels string (with each label separated by |) into 1-hot encoding """
        if labels == None:
            return None
        
        split_label_indices = [self.label_to_index.get(label)
                               for label in labels.split('|')
                               if label != 'No Finding']
        
        out = [1 if idx in split_label_indices else 0 for idx in range(self.max_label_index+1)]
        # This code UNHOTs the labels:
        # out = '|'.join([index_to_label.get(idx) for idx, val in enumerate(one_hot_tuple) if val == 1])
        return out

    def get_one_hot_labels(self, fname):
        """ Get the 1-hot encoded label array for the provided file """
        labels = self.get_labels(fname)
        one_hot_labels = self.one_hot_labels(labels)
        return torch.FloatTensor(one_hot_labels)

### Create the dataset with necessary transformations

In [9]:
img_transforms = transforms.Compose(
    [transforms.Resize(224),
     transforms.ToTensor(),
     transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

In [84]:
img_data_train = XrayImageSet(image_root = '/user/images/',
                              csv_file = '/user/img_details.csv',
                              transform = img_transforms,
                              target_transform = None)

img_data_train.imgs = [img for i, img in enumerate(img_data_train.imgs) if i % 10 > 0]# and randuni() < 0.1]

In [85]:
img_data_val   = XrayImageSet(image_root = '/user/images/',
                              csv_file = '/user/img_details.csv',
                              transform = img_transforms,
                              target_transform = None)

img_data_val.imgs = [img for i, img in enumerate(img_data_val.imgs) if i % 10 == 0]# and randuni() < 0.1]

In [86]:
print("Training Set Size: {}".format(len(img_data_train)))
print("Validation Set Size: {}".format(len(img_data_val)))

Training Set Size: 100908
Validation Set Size: 11212


### Put the dataset into a Dataloader to handle batching

In [87]:
batch_size = 1000
num_gpus = torch.cuda.device_count()
pin_mem_setting = True

print("Number of GPU: {}".format(num_gpus))

Number of GPU: 1


In [88]:
img_loader_train = DataLoader(img_data_train,
                              batch_size = batch_size * num_gpus,
                              shuffle = True,
                              num_workers = 10,
                              pin_memory = pin_mem_setting)

img_loader_val   = DataLoader(img_data_val,
                              batch_size = batch_size * num_gpus,
                              shuffle = True,
                              num_workers = 10,
                              pin_memory = pin_mem_setting)

In [89]:
dataloaders = {
    'train': img_loader_train,
    'val': img_loader_val
}

# Define model training procedure

In [90]:
class printer_writer:
    def __init__(self, output_folder_path):
        self.start_time = time.strftime('%Y%m%d-%Hh%Mm%Ss')
        
        self.outprefix = output_folder_path + '/' + self.start_time
        
        # Print Output File
        self.print_out_path = self.outprefix + '_print.txt'
        self.print_out_file = open(self.print_out_path, 'w', 1)
        
    def printw(self, string):
        print(string)
        try:
            self.print_out_file.write(string + "\n")
        except: # Ignore errors
            pass
        
    def save_checkpoint(self, epoch, model, optimizer, scheduler, val_error):
        model_out_path = self.outprefix + '_model_' + str(epoch+1) + '.tar'
        
        torch.save({
            'epoch': epoch+1,
            'state': model.state_dict(),
            'optimizer': optimizer,
            'scheduler': scheduler,
            'val_error': val_error
        }, model_out_path)
        
    def close(self):
        self.print_out_file.close()

In [91]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25, outfolder = '/user/xrayproj/output/'):
    since = time.time()
    scribe = printer_writer(outfolder)

    for epoch in range(num_epochs):
        scribe.printw('Epoch {}/{}'.format(epoch, num_epochs - 1))
        scribe.printw('-' * 10)

        for phase in ['train', 'val']:
            if phase == 'train':
                scheduler.step()
                model.train(True)
            else:
                model.train(False)

            running_loss = 0.0
            running_corrects = 0
            obs_counter = 0

            # Iterate over data.
            for data in dataloaders[phase]:
                # get the inputs
                inputs, labels = data

                # wrap them in Variable
                inputs = Variable(inputs.cuda())
                labels = Variable(labels.cuda())

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                outputs = model(inputs)
                loss = criterion(outputs, labels)

                # backward + optimize only if in training phase
                if phase == 'train':
                    loss.backward()
                    optimizer.step()

                # Store statistics (convert from autograd.Variable to float/int)
                loss_val = loss.data[0]
                correct_val = torch.sum( ((outputs.sigmoid()>0.5) == (labels>0.5)).long() ).data[0]
                
                running_loss += loss_val
                running_corrects += correct_val
                
                obs_counter += len(inputs)
                
                batch_loss = 1.0 * loss_val / len(inputs)
                batch_acc = 1.0 * correct_val / len(inputs)
                status = ' |~~ {}@{}  Loss: {:.6f} Acc: {:.4f}'.format(
                    phase, obs_counter, batch_loss, batch_acc)
                scribe.printw(status)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects / len(dataloaders[phase].dataset)
            scribe.printw('{}  Loss: {:.6f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val':
                scribe.save_checkpoint(epoch, model, optimizer, scheduler, epoch_loss)

    time_elapsed = time.time() - since
    scribe.printw('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    scribe.close()

    return model

# Define Weighted Cost Metrics

In [92]:
def imbalance_weighted_bce_with_logit(input, target, size_average=True):
    if not (target.size() == input.size()):
        raise ValueError("Target size ({}) must be the same as input size ({})".format(target.size(), input.size()))

    max_val = (-input).clamp(min=0)
    loss = input - input * target + max_val + ((-max_val).exp() + (-input - max_val).exp()).log()

    # Determine |P| and |N|
    positive_labels = target.sum()
    negative_labels = (1-target).sum()

    # Upweight the less common class (very often the 1s)
    beta_p = (positive_labels + negative_labels) / positive_labels
    beta_n = (positive_labels + negative_labels) / negative_labels

    # Adjust the losses accordingly
    loss_weight = target * beta_p + (1-target) * beta_n
    
    loss = loss * loss_weight

    if size_average:
        return loss.mean()
    else:
        return loss.sum()

In [93]:
class BCEWithLogitsImbalanceWeightedLoss(Module):
    def __init__(self, class_weight=None, size_average=True):
        super(BCEWithLogitsImbalanceWeightedLoss, self).__init__()
        self.size_average = size_average

    def forward(self, input, target):
        return imbalance_weighted_bce_with_logit(input, target, size_average=self.size_average)

# Setup Neural Network

### Define the model specifications

In [138]:
def ResNet18PlusFlexibleFC():
    # Create a base ResNet18 model
    m = models.resnet18(pretrained=True)
    for param in m.parameters():
        param.requires_grad = False

    # Replace the final FC layer
    m.fc = nn.Linear(m.fc.in_features, len(img_data_train.labels))
    
    return m

    def model(self, ):

### Pull the ResNet-18 pre-trained model and replace the fully connected layer at the end

In [139]:
model_base = ResNet18PlusFlexibleFC()

### Push model to CUDA/GPU

In [95]:
model_ft = DataParallel(model_base).cuda()

### Define loss measure and learning rates/procedures

In [96]:
criterion = BCEWithLogitsImbalanceWeightedLoss()

# Observe that all parameters are being optimized
optimizer_ft = SGD(model_ft.module.fc.parameters(), lr=0.001, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

### Future code for allowing optimization of the base layer with a lower learning rate

```
ignored_params = list(map(id, model.fc.parameters()))
base_params = filter(lambda p: id(p) not in ignored_params,
                     model.parameters())

optimizer = torch.optim.SGD([
            {'params': base_params},
            {'params': model.fc.parameters(), 'lr': opt.lr}
        ], lr=opt.lr*0.1, momentum=0.9)
```

# Begin Training Network (Normal Cost)

In [97]:
train_model(model_ft,
            criterion,
            optimizer_ft,
            exp_lr_scheduler,
            num_epochs=10)

Epoch 0/9
----------
 |~~ train@1000  Loss: 0.001541 Acc: 6.1330
 |~~ train@2000  Loss: 0.001542 Acc: 6.0640
 |~~ train@3000  Loss: 0.001528 Acc: 6.0640
 |~~ train@4000  Loss: 0.001492 Acc: 6.0320
 |~~ train@5000  Loss: 0.001473 Acc: 6.0100
 |~~ train@6000  Loss: 0.001430 Acc: 6.0380
 |~~ train@7000  Loss: 0.001393 Acc: 6.1250
 |~~ train@8000  Loss: 0.001379 Acc: 6.2670
 |~~ train@9000  Loss: 0.001336 Acc: 6.4420
 |~~ train@10000  Loss: 0.001304 Acc: 6.6770
 |~~ train@11000  Loss: 0.001281 Acc: 7.0010
 |~~ train@12000  Loss: 0.001281 Acc: 7.3510
 |~~ train@13000  Loss: 0.001254 Acc: 7.7840
 |~~ train@14000  Loss: 0.001272 Acc: 8.1270
 |~~ train@15000  Loss: 0.001246 Acc: 8.5020
 |~~ train@16000  Loss: 0.001251 Acc: 8.8060
 |~~ train@17000  Loss: 0.001250 Acc: 8.9860
 |~~ train@18000  Loss: 0.001251 Acc: 9.1680
 |~~ train@19000  Loss: 0.001269 Acc: 9.3130
 |~~ train@20000  Loss: 0.001231 Acc: 9.4640
 |~~ train@21000  Loss: 0.001246 Acc: 9.4760
 |~~ train@22000  Loss: 0.001221 Acc: 9.591

 |~~ train@85000  Loss: 0.001163 Acc: 9.6080
 |~~ train@86000  Loss: 0.001156 Acc: 9.6520
 |~~ train@87000  Loss: 0.001162 Acc: 9.6020
 |~~ train@88000  Loss: 0.001144 Acc: 9.6370
 |~~ train@89000  Loss: 0.001132 Acc: 9.6460
 |~~ train@90000  Loss: 0.001168 Acc: 9.6340
 |~~ train@91000  Loss: 0.001178 Acc: 9.6290
 |~~ train@92000  Loss: 0.001166 Acc: 9.6010
 |~~ train@93000  Loss: 0.001127 Acc: 9.6260
 |~~ train@94000  Loss: 0.001132 Acc: 9.6780
 |~~ train@95000  Loss: 0.001168 Acc: 9.6510
 |~~ train@96000  Loss: 0.001154 Acc: 9.6720
 |~~ train@97000  Loss: 0.001150 Acc: 9.6600
 |~~ train@98000  Loss: 0.001167 Acc: 9.6690
 |~~ train@99000  Loss: 0.001149 Acc: 9.7180
 |~~ train@100000  Loss: 0.001171 Acc: 9.6860
 |~~ train@100908  Loss: 0.001281 Acc: 9.7401
train  Loss: 0.001169 Acc: 9.5941
 |~~ val@1000  Loss: 0.001154 Acc: 9.6690
 |~~ val@2000  Loss: 0.001136 Acc: 9.7370
 |~~ val@3000  Loss: 0.001153 Acc: 9.6350
 |~~ val@4000  Loss: 0.001159 Acc: 9.6650
 |~~ val@5000  Loss: 0.001129 A

 |~~ train@59000  Loss: 0.001150 Acc: 9.8230
 |~~ train@60000  Loss: 0.001116 Acc: 9.8630
 |~~ train@61000  Loss: 0.001144 Acc: 9.8410
 |~~ train@62000  Loss: 0.001119 Acc: 9.8380
 |~~ train@63000  Loss: 0.001137 Acc: 9.7970
 |~~ train@64000  Loss: 0.001145 Acc: 9.8180
 |~~ train@65000  Loss: 0.001126 Acc: 9.8800
 |~~ train@66000  Loss: 0.001106 Acc: 9.8390
 |~~ train@67000  Loss: 0.001155 Acc: 9.7970
 |~~ train@68000  Loss: 0.001141 Acc: 9.7830
 |~~ train@69000  Loss: 0.001132 Acc: 9.7380
 |~~ train@70000  Loss: 0.001135 Acc: 9.8420
 |~~ train@71000  Loss: 0.001144 Acc: 9.8110
 |~~ train@72000  Loss: 0.001112 Acc: 9.7980
 |~~ train@73000  Loss: 0.001112 Acc: 9.8570
 |~~ train@74000  Loss: 0.001136 Acc: 9.8140
 |~~ train@75000  Loss: 0.001121 Acc: 9.8580
 |~~ train@76000  Loss: 0.001115 Acc: 9.8590
 |~~ train@77000  Loss: 0.001144 Acc: 9.8020
 |~~ train@78000  Loss: 0.001139 Acc: 9.8240
 |~~ train@79000  Loss: 0.001135 Acc: 9.8300
 |~~ train@80000  Loss: 0.001138 Acc: 9.8180
 |~~ train

 |~~ train@13000  Loss: 0.001117 Acc: 9.9260
 |~~ train@14000  Loss: 0.001135 Acc: 9.8910
 |~~ train@15000  Loss: 0.001110 Acc: 9.9390
 |~~ train@16000  Loss: 0.001091 Acc: 9.9970
 |~~ train@17000  Loss: 0.001116 Acc: 9.9360
 |~~ train@18000  Loss: 0.001117 Acc: 9.9040
 |~~ train@19000  Loss: 0.001129 Acc: 9.8990
 |~~ train@20000  Loss: 0.001131 Acc: 9.9490
 |~~ train@21000  Loss: 0.001098 Acc: 9.9950
 |~~ train@22000  Loss: 0.001082 Acc: 9.9940
 |~~ train@23000  Loss: 0.001109 Acc: 9.9100
 |~~ train@24000  Loss: 0.001115 Acc: 9.9310
 |~~ train@25000  Loss: 0.001101 Acc: 9.9000
 |~~ train@26000  Loss: 0.001084 Acc: 9.9200
 |~~ train@27000  Loss: 0.001124 Acc: 9.8770
 |~~ train@28000  Loss: 0.001121 Acc: 9.9050
 |~~ train@29000  Loss: 0.001122 Acc: 9.8670
 |~~ train@30000  Loss: 0.001103 Acc: 9.9100
 |~~ train@31000  Loss: 0.001110 Acc: 9.8750
 |~~ train@32000  Loss: 0.001129 Acc: 9.8870
 |~~ train@33000  Loss: 0.001113 Acc: 9.8980
 |~~ train@34000  Loss: 0.001128 Acc: 9.8560
 |~~ train

 |~~ train@81000  Loss: 0.001106 Acc: 9.9930
 |~~ train@82000  Loss: 0.001097 Acc: 10.0320
 |~~ train@83000  Loss: 0.001115 Acc: 9.9370
 |~~ train@84000  Loss: 0.001094 Acc: 10.0190
 |~~ train@85000  Loss: 0.001090 Acc: 9.9300
 |~~ train@86000  Loss: 0.001092 Acc: 9.9530
 |~~ train@87000  Loss: 0.001136 Acc: 9.9410
 |~~ train@88000  Loss: 0.001089 Acc: 9.8630
 |~~ train@89000  Loss: 0.001097 Acc: 10.0090
 |~~ train@90000  Loss: 0.001117 Acc: 9.9230
 |~~ train@91000  Loss: 0.001126 Acc: 9.8660
 |~~ train@92000  Loss: 0.001116 Acc: 9.8850
 |~~ train@93000  Loss: 0.001110 Acc: 9.8760
 |~~ train@94000  Loss: 0.001079 Acc: 9.9510
 |~~ train@95000  Loss: 0.001106 Acc: 9.9070
 |~~ train@96000  Loss: 0.001105 Acc: 9.8920
 |~~ train@97000  Loss: 0.001102 Acc: 9.9430
 |~~ train@98000  Loss: 0.001111 Acc: 9.9360
 |~~ train@99000  Loss: 0.001131 Acc: 9.9210
 |~~ train@100000  Loss: 0.001107 Acc: 9.9430
 |~~ train@100908  Loss: 0.001222 Acc: 9.9791
train  Loss: 0.001108 Acc: 9.9603
 |~~ val@1000  L

 |~~ train@47000  Loss: 0.001115 Acc: 10.0240
 |~~ train@48000  Loss: 0.001107 Acc: 9.9230
 |~~ train@49000  Loss: 0.001095 Acc: 9.9940
 |~~ train@50000  Loss: 0.001104 Acc: 9.9890
 |~~ train@51000  Loss: 0.001096 Acc: 10.0180
 |~~ train@52000  Loss: 0.001115 Acc: 10.0340
 |~~ train@53000  Loss: 0.001104 Acc: 10.0180
 |~~ train@54000  Loss: 0.001112 Acc: 9.9610
 |~~ train@55000  Loss: 0.001110 Acc: 9.9640
 |~~ train@56000  Loss: 0.001123 Acc: 9.9520
 |~~ train@57000  Loss: 0.001110 Acc: 9.9390
 |~~ train@58000  Loss: 0.001089 Acc: 9.9920
 |~~ train@59000  Loss: 0.001100 Acc: 9.9780
 |~~ train@60000  Loss: 0.001104 Acc: 10.0200
 |~~ train@61000  Loss: 0.001116 Acc: 9.9620
 |~~ train@62000  Loss: 0.001092 Acc: 9.9250
 |~~ train@63000  Loss: 0.001076 Acc: 10.0050
 |~~ train@64000  Loss: 0.001104 Acc: 9.9910
 |~~ train@65000  Loss: 0.001119 Acc: 9.9650
 |~~ train@66000  Loss: 0.001084 Acc: 10.0550
 |~~ train@67000  Loss: 0.001096 Acc: 9.9610
 |~~ train@68000  Loss: 0.001065 Acc: 10.0300
 |

Training complete in 141m 4s


DataParallel(
  (module): ResNet(
    (conv1): Conv2d (3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
    (relu): ReLU(inplace)
    (maxpool): MaxPool2d(kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), dilation=(1, 1))
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d (64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
        (relu): ReLU(inplace)
        (conv2): Conv2d (64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d (64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
        (relu): ReLU(inplace)
        (conv2): Conv2d (64, 64, kernel_size=(3, 3), str

# Analysis of Model Results

In [54]:
out_model_30.train(mode=False)

obs_counter = 0
total_pred = Variable(torch.FloatTensor(torch.zeros(14)))
total_act = Variable(torch.FloatTensor(torch.zeros(14)))

conf_a = {}
conf_b = {}
conf_c = {}
conf_d = {}
for i in range(1,10):
    conf_a[i] = Variable(torch.FloatTensor(torch.zeros(14)))
    conf_b[i] = Variable(torch.FloatTensor(torch.zeros(14)))
    conf_c[i] = Variable(torch.FloatTensor(torch.zeros(14)))
    conf_d[i] = Variable(torch.FloatTensor(torch.zeros(14)))

for data in dataloaders['val']:
    print("STARTING ITERATION...")
    inputs, labels = data
    print("PROCESSING FIRST {} OBSERVATIONS".format(len(inputs)))

    inputs = Variable(inputs.cuda())
    labels = Variable(labels.cuda())

    outputs = out_model_30(inputs).sigmoid()
    
    total_act += labels.sum(0).cpu()
    total_pred += outputs.sum(0).cpu()

    # Store statistics (convert from autograd.Variable to float/int)
    for i in range(1,10):
        t = i/10
        conf_a[i] += ((outputs.sigmoid()>t) == (labels>0.5)).sum(0).cpu().float()
        conf_b[i] += ((outputs.sigmoid()<t) == (labels>0.5)).sum(0).cpu().float()
        conf_c[i] += ((outputs.sigmoid()>t) == (labels<0.5)).sum(0).cpu().float()
        conf_d[i] += ((outputs.sigmoid()<t) == (labels<0.5)).sum(0).cpu().float()

    obs_counter += len(inputs)

STARTING ITERATION...
PROCESSING FIRST 750 OBSERVATIONS
STARTING ITERATION...
PROCESSING FIRST 406 OBSERVATIONS


In [67]:
comparison = Variable(torch.FloatTensor(9, 14))
for i in range(9):
    comparison[0] = conf_a[1] / obs_counter
print(comparison.int())

Variable containing:

Columns 0 to 5 
 0.0000e+00  0.0000e+00  0.0000e+00  0.0000e+00  0.0000e+00  0.0000e+00
 0.0000e+00  0.0000e+00  0.0000e+00  0.0000e+00  0.0000e+00  0.0000e+00
 0.0000e+00  0.0000e+00  0.0000e+00  0.0000e+00  0.0000e+00  0.0000e+00
 0.0000e+00  0.0000e+00  0.0000e+00  0.0000e+00  0.0000e+00  0.0000e+00
 0.0000e+00  0.0000e+00  0.0000e+00  0.0000e+00 -2.1475e+09  0.0000e+00
 0.0000e+00  0.0000e+00  0.0000e+00  0.0000e+00  0.0000e+00  0.0000e+00
 0.0000e+00  0.0000e+00 -2.1475e+09  0.0000e+00 -2.1475e+09  0.0000e+00
 6.2634e+06 -1.0000e+00 -2.1475e+09  0.0000e+00 -2.1475e+09  0.0000e+00
 0.0000e+00  0.0000e+00  0.0000e+00  0.0000e+00  0.0000e+00  0.0000e+00

Columns 6 to 11 
 0.0000e+00  0.0000e+00  0.0000e+00  0.0000e+00  0.0000e+00  0.0000e+00
 0.0000e+00  0.0000e+00  0.0000e+00  0.0000e+00  0.0000e+00  0.0000e+00
 0.0000e+00  0.0000e+00  0.0000e+00  0.0000e+00 -6.5460e+04  0.0000e+00
 0.0000e+00  0.0000e+00  0.0000e+00  0.0000e+00  0.0000e+00  0.0000e+00
-2.1475e

In [83]:
conf_d[9] / obs_counter

Variable containing:
 0.3183
 0.1462
 0.3356
 0.2171
 0.3166
 0.3183
 0.2803
 0.2898
 0.2232
 0.3279
 0.2846
 0.3045
 0.3002
 0.3192
[torch.FloatTensor of size 14]

In [84]:
img_data_train.labels

['Fibrosis',
 'Infiltration',
 'Hernia',
 'Effusion',
 'Emphysema',
 'Edema',
 'Cardiomegaly',
 'Mass',
 'Nodule',
 'Atelectasis',
 'Pneumothorax',
 'Pleural_Thickening',
 'Consolidation',
 'Pneumonia']

In [101]:
'''
torch.save({
            'epoch': epoch+1,
            'state': model.state_dict(),
            'optimizer': optimizer,
            'scheduler': scheduler,
            'val_error': val_error
        }, model_out_path)
'''
test_load = torch.load('/user/xrayproj/output/20171120-01h41m56s_model_9.tar')

In [103]:
test_load.keys()

dict_keys(['epoch', 'state', 'optimizer', 'scheduler', 'val_error'])

In [106]:
load_opt = test_load['optimizer']
load_sched = test_load['scheduler']
load_state = test_load['state']

In [117]:
model2 = models.resnet18(pretrained=True)
for param in model2.parameters():
    param.requires_grad = False

# Replace FC layer
model2.fc = nn.Linear(model2.fc.in_features, len(img_data_train.labels))

model2_c = DataParallel(model2).cuda()

In [118]:
model2_c.load_state_dict(load_state)

In [123]:
model2_c.forward(Variable(img_data_train[0][0].unsqueeze(0).cuda())).sigmoid()

Variable containing:

Columns 0 to 9 
 0.7873  0.5093  0.2980  0.6386  0.4665  0.3371  0.1614  0.1846  0.1925  0.6630

Columns 10 to 13 
 0.0288  0.4468  0.3684  0.2549
[torch.cuda.FloatTensor of size 1x14 (GPU 0)]