In [1]:
import sklearn
import sklearn.metrics
from PIL import Image
import torch
import os
import numpy as np
import pandas as pd
import math

In [11]:
### DEVICE ###
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

base_model = torch.load(os.path.join("./","224_2-Copy1.pkl"))
base_model = base_model.eval()

In [3]:
root_dir = os.path.join("../","data","ntu_final_data")

train_file = pd.read_csv(os.path.join(root_dir,"medical_images","train.csv"))
label_data = []
unlabel_data = []
for i in train_file.index:
    if type(train_file.loc[i]["Labels"]) != str:
        if math.isnan(train_file.loc[i]["Labels"]):
            pass
            '''
            unlabel_data.append( [ train_file.loc[i]["Image Index"] ])
            '''
    else:
        p = [train_file.loc[i]["Image Index"], train_file.loc[i]["Labels"]]
        label_data.append(p)
        
        
img_dirs = os.path.join(root_dir,"medical_images","images")

  interactivity=interactivity, compiler=compiler, result=result)


In [4]:
def get_dataloader_RGB(data_list, transform=None, normalize=None):
    part_data = []
    part_label = []
    print(len(data_list))
    for i, pair in enumerate(data_list):
        print(i,end='\r')
        img = Image.open(os.path.join(img_dirs, pair[0]))
        if transform != None:
            img = img.convert(mode="RGB")
            img = transform(img)
            img = np.array(img)/255
            img = np.transpose(img, axes=[2,0,1])
        else:
            img = img.convert(mode="RGB")
            img = img.resize((224,224))
            img = np.array(img) / 255
            img = np.transpose(img, axes=[2,0,1])
        label = pair[1].split()
        label = np.array([int(c) for c in label])
        part_label.append(label)
        part_data.append(img)

    batch = 4
    label_data_x = torch.Tensor(part_data)
    label_data_y = torch.Tensor(part_label)
    label_dataset = torch.utils.data.TensorDataset(label_data_x, label_data_y)
    label_dataloader = torch.utils.data.DataLoader(dataset = label_dataset,
                                                   batch_size =batch,
                                                   shuffle = False,
                                                   num_workers = 1 )
    del part_data, part_label
    return label_dataloader

In [5]:
def get_dataloader_L_512(data_list):
    part_data = []
    part_label=[]
    print(len(data_list))
    for i, name in enumerate(data_list):
        print(i,end='\r')
        img = Image.open(os.path.join(img_dirs, name[0]))
        img = img.convert(mode="L")
        img = img.resize((512,512))
        img = np.array(img)
        img = np.expand_dims(img, axis=0) / 255
        label = name[1].split()
        label = np.array([int(c) for c in label])
        part_label.append(label)
        part_data.append(img)

    batch = 4
    label_data_x = torch.Tensor(part_data)
    label_data_y = torch.Tensor(part_label)
    label_dataset = torch.utils.data.TensorDataset(label_data_x, label_data_y)
    label_dataloader = torch.utils.data.DataLoader(dataset = label_dataset,
                                                   batch_size =batch,
                                                   shuffle = False,
                                                   num_workers = 1 )
    del part_data
    return label_dataloader

In [12]:
val_dataloader = get_dataloader_RGB(label_data[:len(label_data)//10])
val_loss = 0
val_acc = 0
criterion = torch.nn.BCELoss()
ans_list = []
label_list = []
for b_num, (data, label) in enumerate(val_dataloader):
    print("Batch: ", b_num, end='\r')
    data = data.to(device)
    label = label.to(device)
    pred = base_model.output_act( base_model(data) )
    loss = criterion(pred,label)
    val_loss += loss.item()
    val_acc += torch.sum(torch.eq((pred>0.5), label.byte())).item()/14
    for one_row in pred.cpu().data.numpy():
        ans_list.append(one_row)
    for one_row in label.cpu().data.numpy():
        label_list.append(one_row)
print("val loss: ",4*val_loss/ (len(label_data)//10) )
print("val acc: ",val_acc/(len(label_data)//10) )

1000
val loss:  0.18648454277217388
val acc:  0.9485000000000005


In [13]:

sklearn.metrics.roc_auc_score(np.array(label_list),np.array(ans_list))


0.6092768826061293

In [6]:
# for 512x512 model
val_dataloader = get_dataloader_L_512(label_data[:len(label_data)//10])
val_loss = 0
val_acc = 0
criterion = torch.nn.BCELoss()
ans_list = []
label_list = []
for b_num, (data, label) in enumerate(val_dataloader):
    print("Batch: ", b_num, end='\r')
    data = data.to(device)
    label = label.to(device)
    pred = base_model.output_act( base_model( base_model.pre_conv(data) )  )
    loss = criterion(pred,label)
    val_loss += loss.item()
    val_acc += torch.sum(torch.eq((pred>0.5), label.byte())).item()/14
    for one_row in pred.cpu().data.numpy():
        ans_list.append(one_row)
    for one_row in label.cpu().data.numpy():
        label_list.append(one_row)
print("val loss: ",4*val_loss/ (len(label_data)//10) )
print("val acc: ",val_acc/(len(label_data)//10) )

1000
val loss:  0.2047331990227103
val acc:  0.9485000000000005
