In [1]:
import sklearn
import sklearn.metrics
import numpy as np
import pandas as pd
import math
import os
from PIL import Image
import torch
import torch.utils
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import gc

In [2]:
### DEVICE ###
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [3]:
root_dir = os.path.join("../","data","ntu_final_data")

train_file = pd.read_csv(os.path.join(root_dir,"medical_images","train.csv"))
label_data = []
unlabel_data = []
for i in train_file.index:
    if type(train_file.loc[i]["Labels"]) != str:
        if math.isnan(train_file.loc[i]["Labels"]):
            pass
            '''
            unlabel_data.append( [ train_file.loc[i]["Image Index"] ])
            '''
    else:
        p = [train_file.loc[i]["Image Index"], train_file.loc[i]["Labels"]]
        label_data.append(p)
        
        
img_dirs = os.path.join(root_dir,"medical_images","images")



  interactivity=interactivity, compiler=compiler, result=result)


In [4]:
#normalize = transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
transformList = []
transformList.append(transforms.RandomResizedCrop(224))
transformList.append(transforms.RandomHorizontalFlip())
#transformList.append(transforms.ToTensor())
#transformList.append(normalize)      
transformSequence=transforms.Compose(transformList)
def get_dataloader(data_list, transform=None, normalize=None):
    part_data = []
    part_label = []
    print(len(data_list))
    for i, pair in enumerate(data_list):
        print(i,end='\r')
        img = Image.open(os.path.join(img_dirs, pair[0]))
        if transform != None:
            img = img.convert(mode="RGB")
            img = transform(img)
            img = np.array(img)/255
            img = np.transpose(img, axes=[2,0,1])
        else:
            img = img.convert(mode="RGB")
            img = img.resize((224,224))
            img = np.array(img) / 255
            img = np.transpose(img, axes=[2,0,1])
        label = pair[1].split()
        label = np.array([int(c) for c in label])
        part_label.append(label)
        part_data.append(img)

    batch = 16
    label_data_x = torch.Tensor(part_data)
    label_data_y = torch.Tensor(part_label)
    label_dataset = torch.utils.data.TensorDataset(label_data_x, label_data_y)
    label_dataloader = torch.utils.data.DataLoader(dataset = label_dataset,
                                                   batch_size =batch,
                                                   shuffle = False,
                                                   num_workers = 1 )
    del part_data, part_label
    return label_dataloader

In [5]:
### Define Model ###
base_model = torchvision.models.densenet121(pretrained = True)
base_model.classifier = torch.nn.Linear(in_features = base_model.classifier.in_features,
                                        out_features = 14,
                                        bias = True)
'''
base_model.add_module("pre_conv",
                      torch.nn.Sequential(
                          torch.nn.Conv2d(1,64, kernel_size=(8,8), stride = (2,2), padding = 1 ), #254x254
                          torch.nn.BatchNorm2d(64),
                          torch.nn.LeakyReLU(),
                          torch.nn.Conv2d(64, 128, kernel_size=(5,5), stride=(1,1)),   #250x250
                          torch.nn.BatchNorm2d(128),
                          torch.nn.LeakyReLU(),
                          torch.nn.Conv2d(128, 128, kernel_size=(6,6), stride=(1,1), dilation=2),   #240x240
                          torch.nn.BatchNorm2d(128),
                          torch.nn.LeakyReLU(),
                          torch.nn.Conv2d(128, 128, kernel_size=(6,6), stride=(1,1)),   #230x230
                          torch.nn.BatchNorm2d(128),
                          torch.nn.LeakyReLU(),
                          torch.nn.Conv2d(128, 128, kernel_size=(4,4), stride=(1,1), dilation=2),   #224x224
                          torch.nn.BatchNorm2d(128),
                          torch.nn.LeakyReLU(),
                          torch.nn.Conv2d(128, 3, kernel_size=(1,1), stride=(1,1)),   #224x224
                          torch.nn.BatchNorm2d(3),
                          torch.nn.LeakyReLU(),
                      )
                     )
'''
base_model.add_module("output_act",torch.nn.Sigmoid())
base_model = base_model.to(device)

  nn.init.kaiming_normal(m.weight.data)


In [None]:
optimizer = torch.optim.Adam(base_model.parameters(),lr=0.001)
#preconv_optimizer = torch.optim.Adam(base_model.pre_conv.parameters(),lr=0.001)
criterion = torch.nn.BCELoss()

epoch = 200
model_name = "224_2.pkl"

train_data = label_data[len(label_data)//10:]
val_data = label_data[:len(label_data)//10]


for e in range(epoch):
    print("Epoch ",e)
    epoch_loss = 0
    epoch_acc = 0
    for part in range(10):
        gc.collect()
        print("Part ",part)
        label_dataloader = get_dataloader(train_data[part*len(train_data)//10:(part+1)*len(train_data)//10],
                                         transform = transformSequence,
                                         normalize = None)
        for b_num, (data, label) in enumerate(label_dataloader):
            print("Batch: ", b_num, end='\r')
            data = data.to(device)
            label = label.to(device)
            optimizer.zero_grad()
            #preconv_optimizer.zero_grad()
            pred = base_model.output_act( base_model(data) )
            loss = criterion(pred,label)
            loss.backward()
            optimizer.step()
            #preconv_optimizer.step()
            epoch_loss += loss.item()
            epoch_acc += torch.sum(torch.eq((pred>0.5), label.byte())).item()/14
        del label_dataloader
        torch.save(base_model, model_name)
    print("")
    print("Start Validation")
    val_loss = 0
    val_acc = 0
    val_dataloader = get_dataloader(val_data)
    ans_list = []
    label_list = []
    for b_num, (data, label) in enumerate(val_dataloader):
        print("Batch: ", b_num, end='\r')
        data = data.to(device)
        label = label.to(device)
        pred = base_model.output_act( base_model(data) )
        for one_row in pred.cpu().data.numpy():
            ans_list.append(one_row)
        for one_row in label.cpu().data.numpy():
            label_list.append(one_row)
    del val_dataloader
    auroc = sklearn.metrics.roc_auc_score(np.array(label_list),np.array(ans_list))
    print("")
    print("Epoch loss: ",8*epoch_loss/(9*len(label_data)//10) )
    print("Epoch acc: ",epoch_acc/(9*len(label_data)//10) )
    print("AUROC: ",auroc )
    

    
    

Epoch  0
Part  0
900
Part  1 56
900
Part  2 56
900
Part  3 56
900
Part  4 56
901
Part  5 56
900
Part  6 56
900
Part  7 56
900
Part  8 56
900
Part  9 56
901
Batch:  56
Start Validation
1000
Batch:  62
Epoch loss:  0.0949936746498066
Epoch acc:  0.9509102163251691
AUROC:  0.5194278194130922
Epoch  1
Part  0
900
Part  1 56
900
Part  2 56
900
Part  3 56
900
Part  4 56
901
Part  5 56
900
Part  6 56
900
Part  7 56
900
Part  8 56
900
Part  9 56
901
Batch:  56
Start Validation
1000
Batch:  62
Epoch loss:  0.09335311822143744
Epoch acc:  0.9515768089259913
AUROC:  0.4760431134675942
Epoch  2
Part  0
900
Part  1 56
900
Part  2 56
900
Part  3 56
900
Part  4 56
901
Part  5 56
900
Part  6 56
900
Part  7 56
900
Part  9 56
901
Batch:  56
Start Validation
1000
Batch:  62
Epoch loss:  0.0923425733029955
Epoch acc:  0.9516720364403947
AUROC:  0.5249736456568297
Epoch  3
Part  0
900
Part  1 56
900
Part  2 56
900
Part  3 56
900
Part  4 56
901
Part  5 56
900
Part  6 56
900
Part  7 56
900
Part  8 56
900
Par

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Part  5 56
900
Part  6 56
900
Part  7 56
900
Part  8 56
900
Part  9 56
901
Batch:  56
Start Validation
1000
Batch:  62
Epoch loss:  0.09107315862805244
Epoch acc:  0.9517355214499967
AUROC:  0.5564194056660571
Epoch  7
Part  0
900
Part  1 56
900
Part  2 56
900
Part  3 56
900
Part  4 56
901
Part  5 56
900
Part  6 56
900
Part  7 56
900
Part  8 56
900
Part  9 56
901
Batch:  56
Start Validation
1000
Batch:  34

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Part  3 56
900
Part  4 56
901
Part  5 56
900
Part  6 56
900
Part  7 56
900
Part  8 56
900
Part  9 56
901
Batch:  56
Start Validation
1000
Batch:  621
Epoch loss:  0.09011063784271489
Epoch acc:  0.9517355214499967
AUROC:  0.544435068928834
Epoch  9
Part  0
900
Part  1 56
900
Part  2 56
900
Part  3 56
900
Part  4 56
901
Part  5 56
900
Part  6 56
900
Part  7 56
900
Part  8 56
900
Part  9 56
901
Batch:  56
Start Validation
1000
Batch:  62
Epoch loss:  0.09008761276908483
Epoch acc:  0.9517355214499967
AUROC:  0.5532156063832149
Epoch  10
Part  0
900
Part  1 56
900
Part  2 56
900
Part  3 56
900
Part  4 56
901
Part  5 56
900
Part  6 56
900
Part  7 56
900
Part  8 56
900
Part  9 56
901
Batch:  56
Start Validation
1000
Batch:  62
Epoch loss:  0.08981564253280593
Epoch acc:  0.9517355214499967
AUROC:  0.5638710256478148
Epoch  11
Part  0
900
Part  1 56
900
Part  2 56
900
Part  3 56
900
Part  4 56
901
Part  5 56
900
Part  6 56
900
Part  7 56
900
Part  8 56
900
Part  9 56
901
Batch:  56
Start Val

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Part  5 56
900
Part  6 56
900
Part  7 56
900
Batch:  54