# Setup

In [1]:
import os
from pathlib import Path
import sys

# If we're using Google Colab, we set the environment variable to point to the relevant folder in our Google Drive:
if 'COLAB_GPU' in os.environ:
    from google.colab import drive
    drive.mount('/content/drive')
    os.environ['SKIN_LESION_CLASSIFICATION'] = '/content/drive/MyDrive/Colab Notebooks/skin-lesion-classification'

# Otherwise, we use the environment variable on our local system:
project_environment_variable = "SKIN_LESION_CLASSIFICATION"

# Path to the root directory of the project:
project_path = Path(os.environ.get(project_environment_variable))

# Relative path to /scripts (from where custom modules will be imported):
scripts_path = project_path.joinpath("scripts")

# Add this path to sys.path so that Python will look there for modules:
sys.path.append(str(scripts_path))

# Now import path_step from our custom utils module to create a dictionary to all subdirectories in our root directory:
from utils import path_setup
path = path_setup.subfolders(project_path)

path['project'] : D:\projects\skin-lesion-classification
path['images'] : D:\projects\skin-lesion-classification\images
path['models'] : D:\projects\skin-lesion-classification\models
path['expository'] : D:\projects\skin-lesion-classification\expository
path['literature'] : D:\projects\skin-lesion-classification\literature
path['notebooks'] : D:\projects\skin-lesion-classification\notebooks
path['presentation'] : D:\projects\skin-lesion-classification\presentation
path['scripts'] : D:\projects\skin-lesion-classification\scripts
path['streamlit'] : D:\projects\skin-lesion-classification\streamlit


<a id='model_setup'></a>
# Model setup

In [2]:
from typing import Type, Union      # For type hints
from processing import process      # Custom module for processing metadata

data_dir: Path = path["images"]     # Path to directory containing metadata.csv file
csv_filename: str = "metadata.csv"  # The filename

tvr: int = 3              # Ratio of training set to validation set. See discussion below for explanation.
seed: int = 0             # Random seed for parts of the process where randomness is called for.
keep_first: bool = False  # If False, then, for each lesion, we choose a random image to assign to our training set.
stratified: bool = True   # If True, we stratify classes so that the proportions remain as stable as possible after train/val split.
                          # If False, the proportions will be roughly similar.

to_classify: Union[list, dict] = ["mel",   # These are the lesion types we are interested in classifying.
                                  "bcc",   # Any missing ones will be grouped together as the 0-label class: no need to write "other" here.
                                  "akiec", # If 'other' is not desired, use restrict_to attribute above
                                  "nv",]   # Can also be a dictionary, like { 'malignant' : ['mel', 'bcc'], 'benign' : ['nv', 'bkl']}

train_one_img_per_lesion: Union[None, bool] = False

val_expansion_factor: Union[None,int] = 3

sample_size: Union[None, dict] = {"mel": 2000,     # Handling class imbalance by upsampling minority classes/downsampling majority classes
                                  "bcc": 2000,     # Specify how many images of each lesion diagnosis we want in our training set.
                                  "akiec": 2000,
                                  "nv": 2000,
                                  "other" : 2000,} # Could also leave out "other" here, and include e.g. "df: 2000" if we wanted to.

In [3]:
# Create an instance of the process class with attribute values as above.
balance_ta = process(data_dir=data_dir,
                  csv_filename=csv_filename,
                  tvr=tvr,
                  seed=seed,
                  keep_first=keep_first,
                  stratified=stratified,
                  to_classify=to_classify,
                  train_one_img_per_lesion=train_one_img_per_lesion,
                  val_expansion_factor=val_expansion_factor,
                  sample_size=sample_size,)

- Loaded file 'D:\projects\skin-lesion-classification\images\metadata.csv'.
- Inserted 'num_images' column in dataframe, to the right of 'lesion_id' column.
- Inserted 'label' column in dataframe, to the right of 'dx' column: 
  {'bkl': 0, 'df': 0, 'vasc': 0, 'akiec': 1, 'bcc': 2, 'mel': 3, 'nv': 4}
- Added 'set' column to dataframe, with values 't1', 'v1', 'ta', and 'va', to the right of 'localization' column.
- Basic, overall dataframe (pre-train/test split): self.df
- Balancing classes in training set.
- Balanced training set (uses as many different images per lesion as possible): self.df_train
- Expanding validation set: will combine 3 predictions into one, for each lesion in val set.
- Expanded validation set (one image per lesion, repeated 3 times): self.df_val1
- Expanded validation set (use up to 3 different images per lesion, if available): self.df_val_a
- Small sample dataframes for code testing: self._df_train_code_test, self._df_val1_code_test, self._df_val_a_code_test


In [4]:
import torchvision.transforms as transforms
transform = transforms.Compose([
transforms.RandomCrop((300, 300)),
transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.1),
transforms.Resize((224,224)), # Resize images to fit ResNet input size
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # Normalize with ImageNet stats
])

In [5]:
import pandas as pd
from typing import Union, List, Callable
import torchvision.models as models

source: Union[process, pd.DataFrame] = balance_ta        # Processed data to be fed into model for training.
                                                      # Must either be an instance of the process class, or a dataframe of the same format as source.df if source were an instance of the process class.
model_dir: Path = path["models"]                      # Path to directory where models/model info/model results are stored.

transform: Union[None,
                 transforms.Compose,
                 List[Callable]] = transform     # Transform to be applied to images before feeding into neural network.

filename_stem: Union[None, str] = "rn18"         # For saving model and related files. Default "rn18" (if ResNet model) or "EffNet" (if EfficientNet), or "cnn".
filename_suffix: Union[None, str] = "rndcropjit" # Something descriptive and unique for future reference. Default empty string "".

# model: Union[None, models.ResNet, models.EfficientNet] = models.efficientnet_b0(weights=models.EfficientNet_B0_Weights.DEFAULT) # Pre-trained model. Default: ResNet18.
model: Union[None, models.ResNet, models.EfficientNet] = models.resnet18(weights="ResNet18_Weights.DEFAULT")
unfreeze_last: Union[None, bool] = True
# overwrite: Union[None, bool] = True

In [6]:
# Create an instance of the resnet18 class with attribute values as above.
from multiclass_models import cnn

rn18_bal_ta_jit = cnn(source=source,
                    model_dir=model_dir,
                    transform=transform,
                    filename_stem=filename_stem,
                    filename_suffix=filename_suffix,
                    model=model,
                    unfreeze_last=unfreeze_last,)

New files will be created. 
Base filename: rn18_ta_bal_uflast_10e_rndcropjit_00
Attributes saved to file: D:\projects\skin-lesion-classification\models\rn18_ta_bal_uflast_10e_rndcropjit_00_attributes.json


In [7]:
# import time

# tic = time.time()
# rn18_bal_ta_jit.train()
# toc = time.time()
# print(f"Elapsed time: {toc - tic}")

Batch: 0, running loss: 1.6842782497406006
Batch: 10, running loss: 19.145963191986084
Batch: 20, running loss: 34.67513358592987
Batch: 30, running loss: 49.071648955345154
Batch: 40, running loss: 63.74217236042023
Batch: 50, running loss: 77.20099461078644
Batch: 60, running loss: 90.57211661338806
Batch: 70, running loss: 103.72088730335236
Batch: 80, running loss: 116.80320429801941
Batch: 90, running loss: 128.45613777637482
Batch: 100, running loss: 139.98243701457977
Batch: 110, running loss: 151.77183717489243
Batch: 120, running loss: 164.47522085905075
Batch: 130, running loss: 176.82181704044342
Batch: 140, running loss: 188.5344362258911
Batch: 150, running loss: 200.57091188430786
Batch: 160, running loss: 212.31532156467438
Batch: 170, running loss: 223.71758544445038
Batch: 180, running loss: 233.3976131081581
Batch: 190, running loss: 244.59328520298004
Batch: 200, running loss: 255.93721771240234
Batch: 210, running loss: 268.06265330314636
Batch: 220, running loss: 2

In [None]:
# import torch
# import torch.nn as nn

# instance = rn18_bal_ta_jit

# model = models.resnet18()
# num_ftrs = model.fc.in_features
# model.fc = nn.Linear(num_ftrs, len(instance.label_codes))

# file_path_pth = instance.model_dir.joinpath(instance._filename + ".pth")
# state_dict = torch.load(file_path_pth)
# model.load_state_dict(state_dict)

# # model = models.efficientnet_b0()
# # num_ftrs = model.classifier[1].in_features
# # model.classifier[1] = nn.Linear(num_ftrs, len(instance.label_codes))

# instance.model = model
# instance.state_dict = state_dict

In [8]:
# from utils import print_header
# from multiclass_models import get_probabilities

# instance = rn18_bal_ta_jit

# tic = time.time()

# instance.df_probabilities_val1 = get_probabilities(df=instance.df_val1,
#                                                    data_dir=instance.data_dir,
#                                                    model_dir=instance.model_dir,
#                                                    model=instance.model,
#                                                    filename=instance._filename,
#                                                    label_codes=instance.label_codes,
#                                                    transform=instance.transform,
#                                                    batch_size=instance.batch_size,
#                                                    Print=False,
#                                                    save_as=instance._filename + "_val1",)

# toc = time.time()

# print(f"Elapsed time: {toc - tic}")

# instance.df_probabilities_val_a = get_probabilities(df=instance.df_val_a,
#                                                     data_dir=instance.data_dir,
#                                                     model_dir=instance.model_dir,
#                                                     model=instance.model,
#                                                     filename=instance._filename,
#                                                     label_codes=instance.label_codes,
#                                                     transform=instance.transform,
#                                                     batch_size=instance.batch_size,
#                                                     Print=False,
#                                                     save_as=instance._filename + "_val_a",)
# tic = time.time()

# print(f"Elapsed time: {tic - toc}")

Saving probabilities: /content/drive/MyDrive/Colab Notebooks/skin-lesion-classification/models/rn18_ta_bal_uflast_10e_rndcrop_01_val1_probabilities.csv
Elapsed time: 147.73473620414734
Saving probabilities: /content/drive/MyDrive/Colab Notebooks/skin-lesion-classification/models/rn18_ta_bal_uflast_10e_rndcrop_01_val_a_probabilities.csv
Elapsed time: 148.00056433677673


In [7]:
instance = rn18_bal_ta_jit

file_path1 = instance.model_dir.joinpath("rn18_ta_bal_uflast_10e_rndcropjit_01_val1_probabilities.csv")
file_path_a = instance.model_dir.joinpath("rn18_ta_bal_uflast_10e_rndcropjit_01_val_a_probabilities.csv")

instance.df_probabilities_val1 = pd.read_csv(file_path1, index_col=0)
instance.df_probabilities_val_a = pd.read_csv(file_path_a, index_col=0)

In [8]:
from utils import print_header

instance = rn18_bal_ta_jit

print_header("Validation set: one image per lesion")
display_columns = ['lesion_id', 'image_id', 'dx'] + [col for col in instance.df_probabilities_val1.columns if col.startswith('prob')]
display(instance.df_probabilities_val1[display_columns].head())

print_header("Validation set: all images per lesion")
display(instance.df_probabilities_val_a[display_columns].head())


VALIDATION SET: ONE IMAGE PER LESION



Unnamed: 0,lesion_id,image_id,dx,prob_other,prob_akiec,prob_bcc,prob_mel,prob_nv
0,HAM_0002730,ISIC_0025661,bkl,0.047352,0.000851,0.000754,0.943291,0.007752
1,HAM_0002730,ISIC_0025661,bkl,0.33462,0.106245,0.034429,0.474315,0.050391
2,HAM_0002730,ISIC_0025661,bkl,0.646562,0.048694,0.043296,0.215025,0.046423
3,HAM_0001466,ISIC_0027850,bkl,0.638631,0.105183,0.008891,0.174365,0.072931
4,HAM_0001466,ISIC_0027850,bkl,0.467379,0.00014,0.000118,0.519765,0.012598



VALIDATION SET: ALL IMAGES PER LESION



Unnamed: 0,lesion_id,image_id,dx,prob_other,prob_akiec,prob_bcc,prob_mel,prob_nv
0,HAM_0002730,ISIC_0026769,bkl,0.319865,0.489471,0.005783,0.173567,0.011314
1,HAM_0002730,ISIC_0026769,bkl,0.274868,0.546251,0.006499,0.165906,0.006476
2,HAM_0002730,ISIC_0025661,bkl,0.133562,0.000708,0.002278,0.837677,0.025775
3,HAM_0001466,ISIC_0031633,bkl,0.186304,0.084551,0.000807,0.72102,0.007318
4,HAM_0001466,ISIC_0027850,bkl,0.067095,1.8e-05,3.2e-05,0.894343,0.038511


In [9]:
from collections import OrderedDict
from typing import Dict, List
from multiclass_models import final_prediction

instance = rn18_bal_ta_jit

raw_probabilities_df1: pd.DataFrame = instance.df_probabilities_val1
raw_probabilities_df_a: pd.DataFrame = instance.df_probabilities_val_a
aggregate_method: Union[None, Dict[str, List[str]]] = None#{ 'max' : ['mel', 'bcc', 'akiec'], 'min' : ['nv'], 'mean' : ['other']}
threshold_dict_help: Union[None, OrderedDict[str, float]] = None# OrderedDict([('mel',0.4), ('bcc', 0.4), ('akiec', 0.4)])
threshold_dict_hinder: Union[None, OrderedDict[str, float]] = None#OrderedDict([('nv',0.6)])
votes_to_win_dict: Union[None, OrderedDict[str, int]] = None #OrderedDict([('mel',1), ('bcc',1), ('akiec',1)])
label_codes: Dict[int, str] = instance.label_codes
prefix: Union[None, str] = 'prob_'

print_header("Validation set, one image per lesion: combining probabilities and making predictions")

instance.df_pred_val1 = final_prediction(raw_probabilities_df=raw_probabilities_df1,
                                          threshold_dict_help=threshold_dict_help,
                                          threshold_dict_hinder=threshold_dict_hinder,
                                          votes_to_win_dict=votes_to_win_dict,
                                          label_codes=label_codes,)
display_columns = ['lesion_id', 'image_id', 'dx'] + [col for col in instance.df_probabilities_val1.columns if col.startswith('prob')] + ['pred', 'pred_final']
display(instance.df_pred_val1[display_columns].head())

print_header("Validation set, all images per lesion: combining probabilities, making predictions, and combining predictions")

instance.df_pred_val_a = final_prediction(raw_probabilities_df=raw_probabilities_df_a,
                                          threshold_dict_help=threshold_dict_help,
                                          threshold_dict_hinder=threshold_dict_hinder,
                                          votes_to_win_dict=votes_to_win_dict,
                                          label_codes=label_codes,)

display(instance.df_pred_val_a[display_columns].head())


VALIDATION SET, ONE IMAGE PER LESION: COMBINING PROBABILITIES AND MAKING PREDICTIONS



Unnamed: 0,lesion_id,image_id,dx,prob_other,prob_akiec,prob_bcc,prob_mel,prob_nv,pred,pred_final
0,HAM_0002730,ISIC_0025661,bkl,0.047352,0.000851,0.000754,0.943291,0.007752,3,3
1,HAM_0002730,ISIC_0025661,bkl,0.33462,0.106245,0.034429,0.474315,0.050391,3,3
2,HAM_0002730,ISIC_0025661,bkl,0.646562,0.048694,0.043296,0.215025,0.046423,0,3
3,HAM_0001466,ISIC_0027850,bkl,0.638631,0.105183,0.008891,0.174365,0.072931,0,3
4,HAM_0001466,ISIC_0027850,bkl,0.467379,0.00014,0.000118,0.519765,0.012598,3,3



VALIDATION SET, ALL IMAGES PER LESION: COMBINING PROBABILITIES, MAKING PREDICTIONS, AND COMBINING PREDICTIONS



Unnamed: 0,lesion_id,image_id,dx,prob_other,prob_akiec,prob_bcc,prob_mel,prob_nv,pred,pred_final
0,HAM_0002730,ISIC_0026769,bkl,0.319865,0.489471,0.005783,0.173567,0.011314,1,1
1,HAM_0002730,ISIC_0026769,bkl,0.274868,0.546251,0.006499,0.165906,0.006476,1,1
2,HAM_0002730,ISIC_0025661,bkl,0.133562,0.000708,0.002278,0.837677,0.025775,3,1
3,HAM_0001466,ISIC_0031633,bkl,0.186304,0.084551,0.000807,0.72102,0.007318,3,3
4,HAM_0001466,ISIC_0027850,bkl,0.067095,1.8e-05,3.2e-05,0.894343,0.038511,3,3


In [10]:
# from utils import print_header
from evaluation import weighted_average_f, confusion_matrix_with_metric

instance = rn18_bal_ta_jit
map_labels = instance.label_codes

target1 = instance.df_pred_val1.drop_duplicates(subset='lesion_id')['label']
prediction1 = instance.df_pred_val1.drop_duplicates(subset='lesion_id')['pred_final']

target_a = instance.df_pred_val_a.drop_duplicates(subset='lesion_id')['label']
prediction_a = instance.df_pred_val_a.drop_duplicates(subset='lesion_id')['pred_final']

txp1 = pd.crosstab(target1,prediction1,margins=True,dropna=False)
txp_a = pd.crosstab(target_a,prediction_a,margins=True,dropna=False)

beta = 2
# Weights inversely proportional to relative class size in the training set, giving more importance to smaller classes.
weights = 1/instance.df_train['label'].value_counts(normalize=True).sort_index().values # None

instance.cm1 = confusion_matrix_with_metric(AxB=txp1,
                                            lst=None,
                                            full_pad=True,
                                            func=weighted_average_f,
                                            beta=beta,
                                            weights=weights,
                                            percentage=False,
                                            map_labels=map_labels)

instance.cm_a = confusion_matrix_with_metric(AxB=txp_a,
                                            lst=None,
                                            full_pad=True,
                                            func=weighted_average_f,
                                            beta=beta,
                                            weights=weights,
                                            percentage=False,
                                            map_labels=map_labels)

print_header("Confusion matrix: validation set, one image per lesion")
display(instance.cm1.fillna('_'))

print_header("Confusion matrix: validation set, all images per lesion")
display(instance.cm_a.fillna('_'))


CONFUSION MATRIX: VALIDATION SET, ONE IMAGE PER LESION



predicted,other,akiec,bcc,mel,nv,All,recall
actual,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
other,103.0,22.0,6.0,61.0,33.0,225,0.457778
akiec,7.0,35.0,3.0,7.0,5.0,57,0.614035
bcc,12.0,18.0,34.0,12.0,6.0,82,0.414634
mel,16.0,5.0,3.0,102.0,28.0,154,0.662338
nv,44.0,22.0,5.0,154.0,1126.0,1351,0.833457
All,182.0,102.0,51.0,336.0,1198.0,1869,_
precision,0.565934,0.343137,0.666667,0.303571,0.9399,_,0.568662



CONFUSION MATRIX: VALIDATION SET, ALL IMAGES PER LESION



predicted,other,akiec,bcc,mel,nv,All,recall
actual,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
other,96.0,25.0,7.0,71.0,26.0,225,0.426667
akiec,9.0,35.0,3.0,5.0,5.0,57,0.614035
bcc,17.0,16.0,35.0,7.0,7.0,82,0.426829
mel,13.0,6.0,4.0,104.0,27.0,154,0.675325
nv,55.0,19.0,7.0,145.0,1125.0,1351,0.832717
All,190.0,101.0,56.0,332.0,1190.0,1869,_
precision,0.505263,0.346535,0.625,0.313253,0.945378,_,0.565916


In [11]:
# from utils import print_header
from evaluation import metric_dictionary
# import pandas as pd

instance = rn18_bal_ta_jit

target1 = instance.df_pred_val1.drop_duplicates(subset='lesion_id')['label']
prediction1 = instance.df_pred_val1.drop_duplicates(subset='lesion_id')['pred_final']
probabilities1 = instance.df_probabilities_val1.drop_duplicates(subset='lesion_id').filter(regex=r'^prob_')
agg_probabilities1 = instance.df_pred_val1.drop_duplicates(subset='lesion_id').filter(regex=r'^prob_')

target_a = instance.df_pred_val_a.drop_duplicates(subset='lesion_id')['label']
prediction_a = instance.df_pred_val_a.drop_duplicates(subset='lesion_id')['pred_final']
probabilities_a = instance.df_probabilities_val_a.drop_duplicates(subset='lesion_id').filter(regex=r'^prob_')
agg_probabilities_a = instance.df_pred_val_a.drop_duplicates(subset='lesion_id').filter(regex=r'^prob_')

beta = 2
# Weights inversely proportional to relative class size, giving more importance to smaller classes.
weights = 1/instance.df_train['label'].value_counts(normalize=True).sort_index().values # None

print_header("Baseline model: other metrics")

instance.metric_dict1 = metric_dictionary(target=target1,
                                          prediction=prediction1,
                                          probabilities=probabilities1)

instance.metric_dict_a = metric_dictionary(target=target_a,
                                          prediction=prediction_a,
                                          probabilities=probabilities_a)

print("\nOne image per lesion".upper())
display(pd.DataFrame(instance.metric_dict1))

print("\nAll images per lesion".upper())
display(pd.DataFrame(instance.metric_dict_a))


BASELINE MODEL: OTHER METRICS


ONE IMAGE PER LESION


Unnamed: 0,ACC,BACC,precision,recall,F1/2,F1,F2,MCC,ROC-AUC mac,ROC-AUC wt,ROC-AUC wt*
0,0.749064,0.596448,0.563842,0.596448,0.553619,0.551497,0.568662,0.516344,0.898182,0.908291,0.861048



ALL IMAGES PER LESION


Unnamed: 0,ACC,BACC,precision,recall,F1/2,F1,F2,MCC,ROC-AUC mac,ROC-AUC wt,ROC-AUC wt*
0,0.746388,0.595114,0.547086,0.595114,0.542033,0.545279,0.565916,0.513809,0.892117,0.903874,0.853041


In [12]:
from collections import OrderedDict
from typing import Dict, List
from multiclass_models import final_prediction

instance = rn18_bal_ta_jit

raw_probabilities_df1: pd.DataFrame = instance.df_probabilities_val1
raw_probabilities_df_a: pd.DataFrame = instance.df_probabilities_val_a
aggregate_method: Union[None, Dict[str, List[str]]] = { 'max' : ['mel', 'bcc', 'akiec'], 'min' : ['nv'], 'mean' : ['other']}
threshold_dict_help: Union[None, OrderedDict[str, float]] = OrderedDict([('mel',0.4), ('bcc', 0.4), ('akiec', 0.4)])
threshold_dict_hinder: Union[None, OrderedDict[str, float]] = None#OrderedDict([('nv',0.6)])
votes_to_win_dict: Union[None, OrderedDict[str, int]] = None #OrderedDict([('mel',1), ('bcc',1), ('akiec',1)])
label_codes: Dict[int, str] = instance.label_codes
prefix: Union[None, str] = 'prob_'

print_header("Validation set, one image per lesion: combining probabilities and making predictions")

instance.df_pred_val1 = final_prediction(raw_probabilities_df=raw_probabilities_df1,
                                          threshold_dict_help=threshold_dict_help,
                                          threshold_dict_hinder=threshold_dict_hinder,
                                          votes_to_win_dict=votes_to_win_dict,
                                          label_codes=label_codes,)
display_columns = ['lesion_id', 'image_id', 'dx'] + [col for col in instance.df_probabilities_val1.columns if col.startswith('prob')] + ['pred', 'pred_final']
display(instance.df_pred_val1[display_columns].head())

print_header("Validation set, all images per lesion: combining probabilities, making predictions, and combining predictions")

instance.df_pred_val_a = final_prediction(raw_probabilities_df=raw_probabilities_df_a,
                                          threshold_dict_help=threshold_dict_help,
                                          threshold_dict_hinder=threshold_dict_hinder,
                                          votes_to_win_dict=votes_to_win_dict,
                                          label_codes=label_codes,)

display(instance.df_pred_val_a[display_columns].head())


VALIDATION SET, ONE IMAGE PER LESION: COMBINING PROBABILITIES AND MAKING PREDICTIONS



Unnamed: 0,lesion_id,image_id,dx,prob_other,prob_akiec,prob_bcc,prob_mel,prob_nv,pred,pred_final
0,HAM_0002730,ISIC_0025661,bkl,0.047352,0.000851,0.000754,0.943291,0.007752,3,3
1,HAM_0002730,ISIC_0025661,bkl,0.33462,0.106245,0.034429,0.474315,0.050391,3,3
2,HAM_0002730,ISIC_0025661,bkl,0.646562,0.048694,0.043296,0.215025,0.046423,0,3
3,HAM_0001466,ISIC_0027850,bkl,0.638631,0.105183,0.008891,0.174365,0.072931,0,3
4,HAM_0001466,ISIC_0027850,bkl,0.467379,0.00014,0.000118,0.519765,0.012598,3,3



VALIDATION SET, ALL IMAGES PER LESION: COMBINING PROBABILITIES, MAKING PREDICTIONS, AND COMBINING PREDICTIONS



Unnamed: 0,lesion_id,image_id,dx,prob_other,prob_akiec,prob_bcc,prob_mel,prob_nv,pred,pred_final
0,HAM_0002730,ISIC_0026769,bkl,0.319865,0.489471,0.005783,0.173567,0.011314,1,1
1,HAM_0002730,ISIC_0026769,bkl,0.274868,0.546251,0.006499,0.165906,0.006476,1,1
2,HAM_0002730,ISIC_0025661,bkl,0.133562,0.000708,0.002278,0.837677,0.025775,3,1
3,HAM_0001466,ISIC_0031633,bkl,0.186304,0.084551,0.000807,0.72102,0.007318,3,3
4,HAM_0001466,ISIC_0027850,bkl,0.067095,1.8e-05,3.2e-05,0.894343,0.038511,3,3


In [13]:
# from utils import print_header
from evaluation import weighted_average_f, confusion_matrix_with_metric

instance = rn18_bal_ta_jit
map_labels = instance.label_codes

target1 = instance.df_pred_val1.drop_duplicates(subset='lesion_id')['label']
prediction1 = instance.df_pred_val1.drop_duplicates(subset='lesion_id')['pred_final']

target_a = instance.df_pred_val_a.drop_duplicates(subset='lesion_id')['label']
prediction_a = instance.df_pred_val_a.drop_duplicates(subset='lesion_id')['pred_final']

txp1 = pd.crosstab(target1,prediction1,margins=True,dropna=False)
txp_a = pd.crosstab(target_a,prediction_a,margins=True,dropna=False)

beta = 2
# Weights inversely proportional to relative class size in the training set, giving more importance to smaller classes.
weights = 1/instance.df_train['label'].value_counts(normalize=True).sort_index().values # None

instance.cm1 = confusion_matrix_with_metric(AxB=txp1,
                                            lst=None,
                                            full_pad=True,
                                            func=weighted_average_f,
                                            beta=beta,
                                            weights=weights,
                                            percentage=False,
                                            map_labels=map_labels)

instance.cm_a = confusion_matrix_with_metric(AxB=txp_a,
                                            lst=None,
                                            full_pad=True,
                                            func=weighted_average_f,
                                            beta=beta,
                                            weights=weights,
                                            percentage=False,
                                            map_labels=map_labels)

print_header("Confusion matrix: validation set, one image per lesion")
display(instance.cm1.fillna('_'))

print_header("Confusion matrix: validation set, all images per lesion")
display(instance.cm_a.fillna('_'))


CONFUSION MATRIX: VALIDATION SET, ONE IMAGE PER LESION



predicted,other,akiec,bcc,mel,nv,All,recall
actual,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
other,96.0,22.0,5.0,73.0,29.0,225,0.426667
akiec,7.0,34.0,3.0,8.0,5.0,57,0.596491
bcc,10.0,18.0,35.0,13.0,6.0,82,0.426829
mel,13.0,5.0,3.0,112.0,21.0,154,0.727273
nv,43.0,23.0,6.0,198.0,1081.0,1351,0.800148
All,169.0,102.0,52.0,404.0,1142.0,1869,_
precision,0.568047,0.333333,0.673077,0.277228,0.946585,_,0.559882



CONFUSION MATRIX: VALIDATION SET, ALL IMAGES PER LESION



predicted,other,akiec,bcc,mel,nv,All,recall
actual,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
other,92.0,25.0,6.0,78.0,24.0,225,0.408889
akiec,7.0,35.0,3.0,7.0,5.0,57,0.614035
bcc,17.0,16.0,35.0,7.0,7.0,82,0.426829
mel,11.0,6.0,4.0,107.0,26.0,154,0.694805
nv,51.0,21.0,7.0,199.0,1073.0,1351,0.794226
All,178.0,103.0,55.0,398.0,1135.0,1869,_
precision,0.516854,0.339806,0.636364,0.268844,0.945374,_,0.552082


In [14]:
# from utils import print_header
from evaluation import metric_dictionary
# import pandas as pd

instance = rn18_bal_ta_jit

target1 = instance.df_pred_val1.drop_duplicates(subset='lesion_id')['label']
prediction1 = instance.df_pred_val1.drop_duplicates(subset='lesion_id')['pred_final']
probabilities1 = instance.df_probabilities_val1.drop_duplicates(subset='lesion_id').filter(regex=r'^prob_')
agg_probabilities1 = instance.df_pred_val1.drop_duplicates(subset='lesion_id').filter(regex=r'^prob_')

target_a = instance.df_pred_val_a.drop_duplicates(subset='lesion_id')['label']
prediction_a = instance.df_pred_val_a.drop_duplicates(subset='lesion_id')['pred_final']
probabilities_a = instance.df_probabilities_val_a.drop_duplicates(subset='lesion_id').filter(regex=r'^prob_')
agg_probabilities_a = instance.df_pred_val_a.drop_duplicates(subset='lesion_id').filter(regex=r'^prob_')

beta = 2
# Weights inversely proportional to relative class size, giving more importance to smaller classes.
weights = 1/instance.df_train['label'].value_counts(normalize=True).sort_index().values # None

print_header("Baseline model: other metrics")

instance.metric_dict1 = metric_dictionary(target=target1,
                                          prediction=prediction1,
                                          probabilities=probabilities1)

instance.metric_dict_a = metric_dictionary(target=target_a,
                                          prediction=prediction_a,
                                          probabilities=probabilities_a)

print("\nOne image per lesion".upper())
display(pd.DataFrame(instance.metric_dict1))

print("\nAll images per lesion".upper())
display(pd.DataFrame(instance.metric_dict_a))


BASELINE MODEL: OTHER METRICS


ONE IMAGE PER LESION


Unnamed: 0,ACC,BACC,precision,recall,F1/2,F1,F2,MCC,ROC-AUC mac,ROC-AUC wt,ROC-AUC wt*
0,0.726592,0.595482,0.559654,0.595482,0.546265,0.541207,0.559882,0.499232,0.898182,0.908291,0.861048



ALL IMAGES PER LESION


Unnamed: 0,ACC,BACC,precision,recall,F1/2,F1,F2,MCC,ROC-AUC mac,ROC-AUC wt,ROC-AUC wt*
0,0.718031,0.587757,0.541448,0.587757,0.532132,0.531188,0.552082,0.48482,0.892117,0.903874,0.853041
