In [1]:
# Before moving to the next step, pip install a few packages, and then restart for them to become active
! pip3 install ipywidgets
! pip3 install pandas
! pip3 install efficientnet_pytorch

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com




In [2]:
#### %matplotlib inline
# python libraties
import os, cv2,itertools
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from tqdm import tqdm
from glob import glob
from PIL import Image

# pytorch libraries
import torch
from torch import optim,nn
from torch.autograd import Variable
from torch.utils.data import DataLoader,Dataset
from torchvision import models,transforms
from efficientnet_pytorch import EfficientNet

# sklearn libraries
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# to make the results are reproducible
np.random.seed(10)
torch.manual_seed(10)
torch.cuda.manual_seed(10)

# print(os.listdir("/data/mnist_skin/mnist_skin/skin-cancer-mnist-ham10000/"))

## Path for TEST MODEL

In [3]:
! ls /data/mnist_skin/

README.md			       model_densenet121_focal_loss.pth
logs_densent_transforms.log	       model_densenet201.pth
logs_densent_transforms_resampled.log  model_densenet201_resample_augment.pth
mnist_skin			       notebooks
model_densenet121.pth


In [4]:
model_path = '/data/mnist_skin/model_densenet201_resample_augment.pth'

In [5]:
def get_dict_model(model_path):
    state_dict = torch.load(model_path)
#     print(state_dict.keys())
    return state_dict
#     model.load_state_dict(state_dict)

In [6]:
data_dir = '/data/mnist_skin/mnist_skin/ISIC2018_Task3_Test_Images'
all_image_path = glob(os.path.join(data_dir, '*.jpg'))
imageid_path_dict = {os.path.splitext(os.path.basename(x))[0]: x for x in all_image_path}
lesion_type_dict = {
    'nv': 'Melanocytic nevi',
    'mel': 'dermatofibroma',
    'bkl': 'Benign keratosis-like lesions ',
    'bcc': 'Basal cell carcinoma',
    'akiec': 'Actinic keratoses',
    'vasc': 'Vascular lesions',
    'df': 'Dermatofibroma'
}

df_test = pd.DataFrame(all_image_path).reset_index(drop=True).rename(columns={0:'path'})

In [10]:
# feature_extract is a boolean that defines if we are finetuning or feature extracting. 
# If feature_extract = False, the model is finetuned and all model parameters are updated. 
# If feature_extract = True, only the last layer parameters are updated, the others remain fixed.
def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False
            
            
            
def initialize_model(model_name, model_path, num_classes, feature_extract, use_pretrained=True):
    # Initialize these variables which will be set in this if statement. Each of these
    #   variables is model specific.
    model_ft = None
    input_size = 0

    if model_name == "resnet":
        """ Resnet18, resnet34, resnet50, resnet101
        """
        model_ft = models.resnet50(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs, num_classes)
        input_size = 224


    elif model_name == "vgg":
        """ VGG11_bn
        """
        model_ft = models.vgg11_bn(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier[6].in_features
        model_ft.classifier[6] = nn.Linear(num_ftrs,num_classes)
        input_size = 224


    elif model_name == "densenet":
        """ Densenet121
        """
#         model_ft = models.densenet121(pretrained=use_pretrained)
        model_ft = models.densenet201(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier.in_features
        model_ft.classifier = nn.Linear(num_ftrs, num_classes)
        input_size = 224

    elif model_name == "inception":
        """ Inception v3
        Be careful, expects (299,299) sized images and has auxiliary output
        """
        model_ft = models.inception_v3(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        # Handle the auxilary net
        num_ftrs = model_ft.AuxLogits.fc.in_features
        model_ft.AuxLogits.fc = nn.Linear(num_ftrs, num_classes)
        # Handle the primary net
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs,num_classes)
        input_size = 299
        
    elif model_name == 'efficientnet':
        model_ft = EfficientNet.from_pretrained('efficientnet-b7',num_classes=num_classes)
        set_parameter_requires_grad(model_ft, feature_extract)

#         # Handle the primary net
#         num_ftrs = model_ft.fc.in_features
#         model_ft.fc = nn.Linear(num_ftrs,num_classes)
        input_size = 600

    else:
        print("Invalid model name, exiting...")
        exit()
        
    model_ft.load_state_dict(get_dict_model(model_path))
    return model_ft, input_size

### Initialize Model

In [11]:
# resnet,vgg,densenet,inception
model_name = 'efficientnet'
num_classes = 7
feature_extract = False
# Initialize the model for this run
model_ft, input_size = initialize_model(model_name, model_path, 
                                        num_classes, feature_extract, use_pretrained=True)
# Define the device:
device = torch.device('cuda:0')
# Put the model on the device:
model = model_ft.to(device)

# we use Adam optimizer, use cross entropy loss as our loss function
# optimizer = optim.Adam(model.parameters(), lr=1e-3)
optimizer = optim.SGD(model.parameters(), lr=0.002)
criterion = nn.CrossEntropyLoss().to(device)

### Transform for test images

In [12]:
input_size
norm_mean = [0.7630338, 0.5456488, 0.5700453]
norm_std = [0.14092796, 0.1526125, 0.16997054]

In [13]:
# Define a pytorch dataloader for this dataset
class HAM10000(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, index):
        # Load data and get label
        X = Image.open(self.df['path'][index])
        
#         y = torch.tensor(int(self.df['cell_type_idx'][index]))

        if self.transform:
            X = self.transform(X)

        return X#, y

In [14]:
# Same for the validation set:
val_transform = transforms.Compose([transforms.Resize((input_size,input_size)), transforms.ToTensor(),
                                    transforms.Normalize(norm_mean, norm_std)])
validation_set = HAM10000(df_test, transform=val_transform)
val_loader = DataLoader(validation_set, batch_size=32, shuffle=False, num_workers=4)
df_test# = df_val[df_val.columns[[0,8,10]]]

Unnamed: 0,path
0,/data/mnist_skin/mnist_skin/ISIC2018_Task3_Tes...
1,/data/mnist_skin/mnist_skin/ISIC2018_Task3_Tes...
2,/data/mnist_skin/mnist_skin/ISIC2018_Task3_Tes...
3,/data/mnist_skin/mnist_skin/ISIC2018_Task3_Tes...
4,/data/mnist_skin/mnist_skin/ISIC2018_Task3_Tes...
...,...
1506,/data/mnist_skin/mnist_skin/ISIC2018_Task3_Tes...
1507,/data/mnist_skin/mnist_skin/ISIC2018_Task3_Tes...
1508,/data/mnist_skin/mnist_skin/ISIC2018_Task3_Tes...
1509,/data/mnist_skin/mnist_skin/ISIC2018_Task3_Tes...


In [15]:
# this function is used during training process, to calculation the loss and accuracy
class AverageMeter(object):
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count
        
def validate(val_loader, model, criterion, optimizer, epoch):
    model.eval()
    val_loss = AverageMeter()
    val_acc = AverageMeter()
    prediction_list = []
    with torch.no_grad():
        for i, data in enumerate(val_loader):
            images = data
            N = images.size(0)
            images = Variable(images).to(device)

            outputs = model(images)
            prediction = outputs.max(1, keepdim=True)[1]
            prediction_list.append(prediction)


    print('------------------------------------------------------------')
    print('[epoch %d], [val loss %.5f], [val acc %.5f]' % (epoch, val_loss.avg, val_acc.avg))
    print('------------------------------------------------------------')
    return prediction_list

In [16]:
epoch_num = 1
best_val_acc = 0
total_loss_val, total_acc_val = [],[]
# for epoch in range(1, epoch_num+1):
print('starting test')
prediction_list = validate(val_loader, model, criterion, optimizer, 1)

starting test
------------------------------------------------------------
[epoch 1], [val loss 0.00000], [val acc 0.00000]
------------------------------------------------------------


In [17]:
prediction_list[0].size()

torch.Size([32, 1])

In [18]:
size_total = 0
for x in prediction_list:
    size_total += x.size()[0]

In [19]:
size_total

1511

In [20]:
prediction_list[0]

tensor([[4],
        [4],
        [4],
        [2],
        [6],
        [4],
        [4],
        [6],
        [4],
        [2],
        [4],
        [1],
        [2],
        [4],
        [4],
        [4],
        [6],
        [2],
        [4],
        [4],
        [4],
        [4],
        [2],
        [6],
        [1],
        [4],
        [4],
        [5],
        [2],
        [4],
        [4],
        [6]], device='cuda:0')

In [21]:

y_predict = []
for x in prediction_list:
    y_predict.extend(np.squeeze(x.cpu().numpy().T))
y_predict

[4,
 4,
 4,
 2,
 6,
 4,
 4,
 6,
 4,
 2,
 4,
 1,
 2,
 4,
 4,
 4,
 6,
 2,
 4,
 4,
 4,
 4,
 2,
 6,
 1,
 4,
 4,
 5,
 2,
 4,
 4,
 6,
 4,
 2,
 4,
 2,
 3,
 1,
 4,
 4,
 1,
 4,
 4,
 4,
 2,
 4,
 4,
 4,
 2,
 4,
 4,
 4,
 4,
 2,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 6,
 2,
 4,
 2,
 4,
 2,
 2,
 4,
 4,
 2,
 6,
 4,
 5,
 4,
 4,
 6,
 4,
 4,
 6,
 4,
 4,
 6,
 4,
 3,
 1,
 4,
 4,
 2,
 1,
 4,
 6,
 4,
 2,
 4,
 4,
 0,
 2,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 6,
 4,
 5,
 1,
 2,
 4,
 2,
 4,
 6,
 4,
 4,
 0,
 4,
 4,
 6,
 4,
 4,
 1,
 2,
 4,
 4,
 6,
 4,
 2,
 4,
 4,
 4,
 6,
 4,
 6,
 4,
 2,
 6,
 6,
 2,
 2,
 2,
 4,
 2,
 4,
 6,
 4,
 6,
 4,
 4,
 6,
 4,
 4,
 4,
 4,
 1,
 2,
 6,
 2,
 1,
 2,
 4,
 4,
 4,
 2,
 4,
 4,
 1,
 4,
 4,
 4,
 4,
 4,
 6,
 1,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 2,
 4,
 4,
 2,
 0,
 4,
 0,
 2,
 4,
 4,
 4,
 4,
 6,
 4,
 4,
 4,
 4,
 4,
 3,
 4,
 4,
 1,
 4,
 6,
 2,
 4,
 4,
 4,
 4,
 4,
 2,
 6,
 4,
 4,
 4,
 0,
 4,
 6,
 4,
 5,
 6,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 5,
 4,
 4,
 6,
 0,
 4,
 4,
 4,
 4,
 4,
 2,
 4,
 4,
 2,


In [22]:
plot_labels = ['AKIEC', 'BCC', 'BKL', 'DF', 'NV', 'VASC','MEL']

In [23]:
predicted_labels = []
for x in y_predict:
    predicted_labels.append(plot_labels[x])

In [24]:
predicted_labels

['NV',
 'NV',
 'NV',
 'BKL',
 'MEL',
 'NV',
 'NV',
 'MEL',
 'NV',
 'BKL',
 'NV',
 'BCC',
 'BKL',
 'NV',
 'NV',
 'NV',
 'MEL',
 'BKL',
 'NV',
 'NV',
 'NV',
 'NV',
 'BKL',
 'MEL',
 'BCC',
 'NV',
 'NV',
 'VASC',
 'BKL',
 'NV',
 'NV',
 'MEL',
 'NV',
 'BKL',
 'NV',
 'BKL',
 'DF',
 'BCC',
 'NV',
 'NV',
 'BCC',
 'NV',
 'NV',
 'NV',
 'BKL',
 'NV',
 'NV',
 'NV',
 'BKL',
 'NV',
 'NV',
 'NV',
 'NV',
 'BKL',
 'NV',
 'NV',
 'NV',
 'NV',
 'NV',
 'NV',
 'NV',
 'NV',
 'NV',
 'NV',
 'NV',
 'NV',
 'MEL',
 'BKL',
 'NV',
 'BKL',
 'NV',
 'BKL',
 'BKL',
 'NV',
 'NV',
 'BKL',
 'MEL',
 'NV',
 'VASC',
 'NV',
 'NV',
 'MEL',
 'NV',
 'NV',
 'MEL',
 'NV',
 'NV',
 'MEL',
 'NV',
 'DF',
 'BCC',
 'NV',
 'NV',
 'BKL',
 'BCC',
 'NV',
 'MEL',
 'NV',
 'BKL',
 'NV',
 'NV',
 'AKIEC',
 'BKL',
 'NV',
 'NV',
 'NV',
 'NV',
 'NV',
 'NV',
 'NV',
 'NV',
 'MEL',
 'NV',
 'VASC',
 'BCC',
 'BKL',
 'NV',
 'BKL',
 'NV',
 'MEL',
 'NV',
 'NV',
 'AKIEC',
 'NV',
 'NV',
 'MEL',
 'NV',
 'NV',
 'BCC',
 'BKL',
 'NV',
 'NV',
 'MEL',
 'NV',
 'BKL

In [25]:
df_test['prediction']=predicted_labels

In [26]:
df_test['path'][0]

'/data/mnist_skin/mnist_skin/ISIC2018_Task3_Test_Images/ISIC_0034956.jpg'

In [27]:
df_test['image'] = df_test['path'].apply(lambda x: x.split('/')[-1].split('.')[0])

In [28]:
df_test['probability'] = 1.0

In [29]:
df_test.groupby(by=['image','prediction','probability']).size()

image         prediction  probability
ISIC_0034524  NV          1.0            1
ISIC_0034525  NV          1.0            1
ISIC_0034526  BKL         1.0            1
ISIC_0034527  NV          1.0            1
ISIC_0034528  NV          1.0            1
                                        ..
ISIC_0036060  BKL         1.0            1
ISIC_0036061  BKL         1.0            1
ISIC_0036062  BKL         1.0            1
ISIC_0036063  NV          1.0            1
ISIC_0036064  MEL         1.0            1
Length: 1511, dtype: int64

In [30]:
df_test

Unnamed: 0,path,prediction,image,probability
0,/data/mnist_skin/mnist_skin/ISIC2018_Task3_Tes...,NV,ISIC_0034956,1.0
1,/data/mnist_skin/mnist_skin/ISIC2018_Task3_Tes...,NV,ISIC_0035014,1.0
2,/data/mnist_skin/mnist_skin/ISIC2018_Task3_Tes...,NV,ISIC_0035508,1.0
3,/data/mnist_skin/mnist_skin/ISIC2018_Task3_Tes...,BKL,ISIC_0035327,1.0
4,/data/mnist_skin/mnist_skin/ISIC2018_Task3_Tes...,MEL,ISIC_0035651,1.0
...,...,...,...,...
1506,/data/mnist_skin/mnist_skin/ISIC2018_Task3_Tes...,NV,ISIC_0035375,1.0
1507,/data/mnist_skin/mnist_skin/ISIC2018_Task3_Tes...,BKL,ISIC_0035425,1.0
1508,/data/mnist_skin/mnist_skin/ISIC2018_Task3_Tes...,NV,ISIC_0034661,1.0
1509,/data/mnist_skin/mnist_skin/ISIC2018_Task3_Tes...,NV,ISIC_0034821,1.0


In [31]:
out_df = df_test.pivot(index="image", columns="prediction", values="probability").reset_index().fillna(0)

In [32]:
out_df = out_df.append(pd.DataFrame.from_dict({'image':['ISIC_0035068'],'AKIEC':[0.0],
              'BCC':[0.0],
              'BKL':[0.0],
              'DF':[0.0],
              'MEL':[0.0],
              'NV':[1.0],
              'VASC':[0.0]}),ignore_index=True)

In [33]:
out_df

Unnamed: 0,image,AKIEC,BCC,BKL,DF,MEL,NV,VASC
0,ISIC_0034524,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1,ISIC_0034525,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,ISIC_0034526,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3,ISIC_0034527,0.0,0.0,0.0,0.0,0.0,1.0,0.0
4,ISIC_0034528,0.0,0.0,0.0,0.0,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...
1507,ISIC_0036061,0.0,0.0,1.0,0.0,0.0,0.0,0.0
1508,ISIC_0036062,0.0,0.0,1.0,0.0,0.0,0.0,0.0
1509,ISIC_0036063,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1510,ISIC_0036064,0.0,0.0,0.0,0.0,1.0,0.0,0.0


In [34]:
out_df.to_csv('test.csv',index=False)