# Setup

In [1]:
import os
from pathlib import Path
import sys

# If we're using Google Colab, we set the environment variable to point to the relevant folder in our Google Drive:
if 'COLAB_GPU' in os.environ:
    from google.colab import drive
    drive.mount('/content/drive')
    os.environ['SKIN_LESION_CLASSIFICATION'] = '/content/drive/MyDrive/Colab Notebooks/skin-lesion-classification'

# Otherwise, we use the environment variable on our local system:
project_environment_variable = "SKIN_LESION_CLASSIFICATION"

# Path to the root directory of the project:
project_path = Path(os.environ.get(project_environment_variable))

# Relative path to /scripts (from where custom modules will be imported):
scripts_path = project_path.joinpath("scripts")

# Add this path to sys.path so that Python will look there for modules:
sys.path.append(str(scripts_path))

# Now import path_step from our custom utils module to create a dictionary to all subdirectories in our root directory:
from utils import path_setup
path = path_setup.subfolders(project_path)

path['project'] : D:\projects\skin-lesion-classification
path['images'] : D:\projects\skin-lesion-classification\images
path['models'] : D:\projects\skin-lesion-classification\models
path['expository'] : D:\projects\skin-lesion-classification\expository
path['literature'] : D:\projects\skin-lesion-classification\literature
path['notebooks'] : D:\projects\skin-lesion-classification\notebooks
path['presentation'] : D:\projects\skin-lesion-classification\presentation
path['scripts'] : D:\projects\skin-lesion-classification\scripts


<a id='contents'></a>
# Contents

* [Pipeline walk-through with explanations and examples](#pipeline_walk-through)
    * [Loading and processing metadata](#loading_and)
    * [Class distribution](#class_distribution)
    * [Train/val split](#trainval_split)
    * [Balancing the training set](#balancing)
    * [Expanding the validation set](#expanding)
    * [Fine-tuning EfficientNet or ResNet18](#fine-tuning)
    * [Small sample for testing code](#small_sample)
    * [Model architecture and state dictionary](#model_architecture)
    * [Inference: getting probabilities](#inference1)
    * [Inference: combining probabilities](#inference2)
    * [Inference: combining predictions](#inference3)
    * [Evaluation](#evaluation)
* [Trivial models](#trivial_models)
* [Baseline models](#baseline_models)
* [Models with balancing](#models_with)

<a id='pipeline_walk-through'></a>
# Pipeline walk-through with explanations and examples
↑↑ [Contents](#contents) ↓ [Loading and processing metadata](#loading_and)

<a id='loading_and'></a>
## Loading and processing metadata
↑↑ [Contents](#contents) ↑ [Pipeline walk-through with explanations and examples](#pipeline_walk-through) ↓ [Class distribution](#class_distribution)

In [2]:
from typing import Type, Union      # For type hints
from processing import process      # Custom module for processing metadata

data_dir: Path = path["images"]     # Path to directory containing metadata.csv file
csv_filename: str = "metadata.csv"  # The filename
    
restrict_to: Union[dict, None] = None                   # Remove all records *unless* column k lies in list v, for k : v in restrict_to dictionary.    
remove_if: Union[dict, None] = None                     # Remove all records if column k lies in list v, for k : v in remove_if dictionary.    
drop_row_if_missing_value_in: Union[list, None] = None  # We drop all rows for which there is a missing value in a column from this list.   
                                    
tvr: int = 3              # Ratio of training set to validation set. See discussion below for explanation.
seed: int = 0             # Random seed for parts of the process where randomness is called for.
keep_first: bool = False  # If False, then, for each lesion, we choose a random image to assign to our training set. 
stratified: bool = True   # If True, we stratify classes so that the proportions remain as stable as possible after train/val split. 
                          # If False, the proportions will be roughly similar.

to_classify: Union[list, dict] = ["mel",   # These are the lesion types we are interested in classifying. 
                                  "bcc",   # Any missing ones will be grouped together as the 0-label class: no need to write "other" here.
                                  "akiec", # If 'other' is not desired, use restrict_to attribute above
                                  "nv",]   # Can also be a dictionary, like { 'malignant' : ['mel', 'bcc'], 'benign' : ['nv', 'bkl']}

train_one_img_per_lesion: Union[bool, None] = False # If False, we take advantage of the (in some cases) multiple images of a lesion in our dataset
val_one_img_per_lesion: Union[bool, None] = False   # If False, we will validate our model by combining multiple predictions for a lesion (for multiple images of it) into a single prediction
val_expansion_factor: Union[int, None] = 3          # A random transformation may be applied to an image before making a prediction.
                                                    # For a given lesion, we may make multiple predictions (as specified here), and combine them into a single prediction.
    
sample_size: Union[None, dict] = {"mel": 2000,     # Handling class imbalance by upsampling minority classes/downsampling majority classes     
                                  "bcc": 2000,     # Specify how many images of each lesion diagnosis we want in our training set.
                                  "akiec": 2000, 
                                  "nv": 2000,
                                  "other" : 2000,} # Could also leave out "other" here, and include e.g. "df: 2000" if we wanted to.    

In [3]:
# Create an instance of the process class with attribute values as above.
demo = process(data_dir=data_dir,
               csv_filename=csv_filename,
               restrict_to=restrict_to,
               remove_if=remove_if,
               drop_row_if_missing_value_in=drop_row_if_missing_value_in,
               tvr=tvr,
               seed=seed,
               keep_first=keep_first,
               stratified=stratified,
               to_classify=to_classify,
               train_one_img_per_lesion=train_one_img_per_lesion,
               val_expansion_factor=val_expansion_factor,
               sample_size=sample_size,)

- Loaded file 'D:\projects\skin-lesion-classification\images\metadata.csv'.
- Inserted 'num_images' column in dataframe, to the right of 'lesion_id' column.
- Inserted 'label' column in dataframe, to the right of 'dx' column: 
  {'bkl': 0, 'df': 0, 'vasc': 0, 'mel': 1, 'akiec': 2, 'nv': 3, 'bcc': 4}
- Added 'set' column to dataframe, with values 't1', 'v1', 'ta', and 'va', to the right of 'localization' column.
- Basic, overall dataframe (pre-train/test split): self.df
- Balancing classes in training set.
- Balanced training set (uses as many different images per lesion as possible): self.df_train
- Expanding validation set: will combine 3 predictions into one, for each lesion in val set.
- Expanded validation set (one image per lesion, repeated 3 times): self.df_val1
- Expanded validation set (use up to 3 different images per lesion, if available): self.df_val_a
- Small sample dataframes for code testing: self._df_train_code_test, self._df_val1_code_test, self._df_val_a_code_test


<a id='class_distribution'></a>
## Class distribution
↑↑ [Contents](#contents) ↑ [Loading and processing metadata](#loading_and) ↓ [Train/val split](#trainval_split)

In [4]:
for across in ["lesions", "images"]:
    for subset in ["all", "train", "val"]:
        process.dx_dist(demo, subset = subset, across = across)


DISTRIBUTION OF LESIONS BY DIAGNOSIS: OVERALL



dx,nv,other,mel,bcc,akiec
freq,5403.0,898.0,614.0,327.0,228.0
%,72.33,12.02,8.22,4.38,3.05


Total lesions: 7470.


DISTRIBUTION OF LESIONS BY DIAGNOSIS: TRAIN



dx,nv,other,mel,bcc,akiec
freq,4052.0,673.0,460.0,245.0,171.0
%,72.34,12.02,8.21,4.37,3.05


Total lesions: 5601 (74.98% of all lesions).


DISTRIBUTION OF LESIONS BY DIAGNOSIS: VAL



dx,nv,other,mel,bcc,akiec
freq,1351.0,225.0,154.0,82.0,57.0
%,72.28,12.04,8.24,4.39,3.05


Total lesions: 1869 (25.02% of all lesions).


DISTRIBUTION OF IMAGES BY DIAGNOSIS: OVERALL



dx,nv,other,mel,bcc,akiec
freq,6705.0,1356.0,1113.0,514.0,327.0
%,66.95,13.54,11.11,5.13,3.27


Total images: 10015.


DISTRIBUTION OF IMAGES BY DIAGNOSIS: TRAIN



dx,nv,other,mel,bcc,akiec
freq,5007.0,1008.0,831.0,384.0,250.0
%,66.94,13.48,11.11,5.13,3.34


Total images: 7480 (74.69% of all images).


DISTRIBUTION OF IMAGES BY DIAGNOSIS: VAL



dx,nv,other,mel,bcc,akiec
freq,1698.0,348.0,282.0,130.0,77.0
%,66.98,13.73,11.12,5.13,3.04


Total images: 2535 (25.31% of all images).



<a id='trainval_split'></a>
## Train/val split
↑↑ [Contents](#contents) ↑ [Class distribution](#class_distribution) ↓ [Balancing the training set](#balancing)

<!-- <details>
    <summary><b><i>Train test split explanation: click here to expand/collapse</i></b></summary> -->
    
We partition our dataset based on ```lesion_id```, **not** on ```image_id```: that way, every lesion will be represented in training or in validation, but not both.

For each classification task, we will train a model by making use of
* **exactly one** image for every lesion in our training set;
* **all** images of every lesion in our training set.

In both cases, we will vaildate our model by making use of 
* **exactly one** image for every lesion in our validation set;
* **all** images of every lesion in our validation set (at least, _potentially_ all of them). 

**However**, we will make only one prediction per lesion (```lesion_id```) in our validation set: if there are multiple images of a lesion in the validation set, we will combine the predictions for the multiple images into a single prediction for the lesion.

Accordingly, we proceed as follows. We'll explain by example, assuming the dataset is not filtered before splitting (if it is, the number of distinct lesions will be less than $7470$, and the proportions will be different).
1. Randomly select (without replacement) a proportion of our $7470$ distinct ```lesion_id```s and label them with ```t``` (train). 
2. Label the remaining ```lesion_id```s with ```v``` (validate).
3. For each ```lesion_id``` labeled with a ```t```:
    * Select an ```image_id``` and label it ```t1```.
    * Label all (if any) remaining ```image_id```s corresponding to this ```lesion_id``` with ```ta```.
4.  For each ```lesion_id``` labeled with a ```v```:
    * Select an ```image_id``` and label it ```v1```.
    * Label all (if any) remaining ```image_id```s corresponding to this ```lesion_id``` with ```va```.

In Step 1, the number of ```lesion_id```s randomly selected to be labeled ```t``` will be such that the ratio of ```t```s to ```v```s is as close as possible to a specified ratio ```tvr``` (we default to $3$, i.e. $\approx75\%$ of lesions are represented in training). In Steps 3 and 4, the first substep can be done randomly (our default choice), or we can simply choose the "first" image in our table that corresponds to the lesion (see ```keep_first``` attribute of the ```process``` class). 

The four train/val scenarios we could consider are:
* ```t1v1```: train on precisely those images labeled ```t1``` and validate on precisely those labeled ```v1```.
* ```t1va```: train on precisely those images labeled ```t1``` and validate on precisely those labeled ```v1``` **or** ```va```.
* ```tav1```: train on precisely those images labeled ```t1``` **or** ```ta``` and validate on precisely those labeled ```v1```.
* ```tava```: train on precisely those images labeled ```t1``` **or** ```ta``` and validate on precisely those labeled ```v1``` ***or*** ```va```.

The mnemonic is ```t``` for training, ```v``` for validation, ```1``` for one-image-per-lesion, and ```a``` for all images.
<!-- </details> -->

In [5]:
# Let's have a look at our metadata dataframe, which is now just an attribute of the metadata instance of the process class.
from utils import print_header

instance = demo
df = instance.df

print_header("First five rows of metadata table")

to_print = ["Added columns\n".upper(), 
            "\'num_images\': number of images of lesion in dataset", 
            "\'label\': class to which lesion belongs",
            "\'set\': train/val assignment",
            "\'t*\': lesion is in the training set",
            "\'v*\': lesion is in the validation set",
            "\'t1\': we would train on this image if training a model on exactly one, or on all, image(s) per lesion in the training set",
            "If training set is balanced using one image per lesion, this one image would be re-used as many times as necessary.",
            "\'ta\': we would train on this image if training a model on all images of each lesion in the training set",
            "If training set is balanced using all images per lesion, images labeled ta would all be used before any image is repeated.",
            "\'v1': we\'d use this image if validating a model on exactly one, or on all, image(s) per lesion in the validation set",
            "If a validation expansion factor is given, this one image would be re-used that many times",
            "\'va': we\'d use this image if validating on all images of each lesion in the validation set" ,
            "If a validation expansion factor is given, iamges labeled va would all be used before any image is repeated.",
            "NB: if more than one image is used for any lesion in validation, the predictions will be combined into a single prediction"
           ]

print("\n- ".join(to_print))
display(df.head())


FIRST FIVE ROWS OF METADATA TABLE

ADDED COLUMNS

- 'num_images': number of images of lesion in dataset
- 'label': class to which lesion belongs
- 'set': train/val assignment
- 't*': lesion is in the training set
- 'v*': lesion is in the validation set
- 't1': we would train on this image if training a model on exactly one, or on all, image(s) per lesion in the training set
- If training set is balanced using one image per lesion, this one image would be re-used as many times as necessary.
- 'ta': we would train on this image if training a model on all images of each lesion in the training set
- If training set is balanced using all images per lesion, images labeled ta would all be used before any image is repeated.
- 'v1': we'd use this image if validating a model on exactly one, or on all, image(s) per lesion in the validation set
- If a validation expansion factor is given, this one image would be re-used that many times
- 'va': we'd use this image if validating on all images of ea

Unnamed: 0,lesion_id,num_images,image_id,dx,label,dx_type,age,sex,localization,set
0,HAM_0000118,2,ISIC_0027419,bkl,0,histo,80.0,male,scalp,ta
1,HAM_0000118,2,ISIC_0025030,bkl,0,histo,80.0,male,scalp,t1
2,HAM_0002730,2,ISIC_0026769,bkl,0,histo,80.0,male,scalp,va
3,HAM_0002730,2,ISIC_0025661,bkl,0,histo,80.0,male,scalp,v1
4,HAM_0001466,2,ISIC_0031633,bkl,0,histo,75.0,male,ear,va


<a id='balancing'></a>
## Balancing the training set
↑↑ [Contents](#contents) ↑ [Train/val split](#trainval_split) ↓ [Expanding the validation set](#expanding)

<!-- <details>
    <summary><b><i>Balancing/upsampling explanation: click here to expand/collapse</i></b></summary> -->

We explain the balancing procedure by way of example. (This is performed by the ```balance``` method of the ```process``` class in our ```processing``` module.) We assume the dataset has not been filtered, training to validation ratio is $3$, etc. There are $460$ distinct melanoma lesions represented in our training set. As most melanoma are represented by multiple distinct images, there are a total of $831$ distinct images of melanoma lesions in our training set. Suppose we want our training set to contain $2000$ melanoma images: each of the $460$ distinct melanoma lesions will be represented by $2000/460 \approx 4.35$ images on average. We do not merely sample with replacement.

The goal is to (a) have as little variance as possible in the number of times a lesion is represented, and (b) use as many distinct images as possible (taking advantage of the fact that there are multiple _distinct_ images of most melanoma). Thus, we note that $2000 = 4\times 460 + 160$, so we will use each of the $460$ distinct melanoma lesions four times, and make the remainder up by randomly sample $160$ distinct lesions from the $160$. In other words, exactly $300$ distinct lesions will each be represented by exactly four images, and exactly $160$ distinct lesions will each be represented by exactly five images: $2000 = 300 \times 4 + 160 \times 5$. 

How do we select the four images of each distinct melanoma lesion (plus another one image for $160$ of them)? Consider lesion id ```HAM_0000871``` for example: there are three distinct images of this lesion in our data set. Thus, if ```train_one_img_per_lesion``` is ```False```, we will use all three of them, and then randomly select one more (or two more if this particular lesion were to be one of the $160$ that are represented five times). See below. On the other hand, if ```train_one_img_per_lesion``` is ```True```, we have no choice but to use the one image (label ```t1```) four times.
    
<!-- </details> -->

In [6]:
# from utils import print_header

# The specific numbers in this example assume a certain choice for the attributes, including 
# sample_size: Union[None, dict] = {"mel": 2000,         
#                                   "bcc": 2000, 
#                                   "akiec": 2000, 
#                                   "nv": 2000,
#                                   "other" : 2000,}

instance = demo
df = demo.df_train

print_header("Eg: Representations of lesion HAM_0000871 in balanced training set")

to_print = ["HAM_0000871 represented by four images\n".upper(),
            "Three distinct images of this lesion to choose from: ISIC_0025964, ISIC_0030623, and ISIC_0025964",
            "Use ISIC_0025964 once, ISIC_0030623 twice, and ISIC_0025964 once",]

print("\n- ".join(to_print))

display(df[df['lesion_id'] == 'HAM_0000871'])


EG: REPRESENTATIONS OF LESION HAM_0000871 IN BALANCED TRAINING SET

HAM_0000871 REPRESENTED BY FOUR IMAGES

- Three distinct images of this lesion to choose from: ISIC_0025964, ISIC_0030623, and ISIC_0025964
- Use ISIC_0025964 once, ISIC_0030623 twice, and ISIC_0025964 once


Unnamed: 0,lesion_id,lesion_mult,num_images,image_id,img_mult,dx,label,dx_type,age,sex,localization,set
1773,HAM_0000871,4,3,ISIC_0025964,1,mel,1,histo,40.0,female,chest,ta
1774,HAM_0000871,4,3,ISIC_0030623,1,mel,1,histo,40.0,female,chest,t1
3086,HAM_0000871,4,3,ISIC_0026506,2,mel,1,histo,40.0,female,trunk,ta
3087,HAM_0000871,4,3,ISIC_0026506,2,mel,1,histo,40.0,female,trunk,ta


In [7]:
# from utils import print_header
# The specific numbers given in this example assume a certain choice for the attributes, including 
# sample_size: Union[None, dict] = {"mel": 2000,         
#                                   "bcc": 2000, 
#                                   "akiec": 2000, 
#                                   "nv": 2000,
#                                   "other" : 2000,}

instance = demo
df = demo.df_train
df = df[df['set'].isin(["ta", "t1"]) & (df['dx'] == 'mel')]

print_header("Eg: Melanoma in balanced training set")

to_print = ["Value counts for \'lesion_mult\' column\n".upper(),
            "300 distinct melanoma lesions each represented by four images: 300*4 = 1200",
            "160 distinct melanoma lesions each represented by five images: 160*5 = 800",]

print("\n- ".join(to_print[:3]))
display(df['lesion_mult'].value_counts())

print("\n- ".join(to_print[3:]))

display(df)


EG: MELANOMA IN BALANCED TRAINING SET

VALUE COUNTS FOR 'LESION_MULT' COLUMN

- 300 distinct melanoma lesions each represented by four images: 300*4 = 1200
- 160 distinct melanoma lesions each represented by five images: 160*5 = 800


4    1200
5     800
Name: lesion_mult, dtype: int64




Unnamed: 0,lesion_id,lesion_mult,num_images,image_id,img_mult,dx,label,dx_type,age,sex,localization,set
1773,HAM_0000871,4,3,ISIC_0025964,1,mel,1,histo,40.0,female,chest,ta
1774,HAM_0000871,4,3,ISIC_0030623,1,mel,1,histo,40.0,female,chest,t1
1775,HAM_0000040,5,1,ISIC_0027190,5,mel,1,histo,80.0,male,upper extremity,t1
1776,HAM_0000040,5,1,ISIC_0027190,5,mel,1,histo,80.0,male,upper extremity,t1
1777,HAM_0000040,5,1,ISIC_0027190,5,mel,1,histo,80.0,male,upper extremity,t1
...,...,...,...,...,...,...,...,...,...,...,...,...
7778,HAM_0002552,5,3,ISIC_0032936,2,mel,1,histo,25.0,male,upper extremity,ta
7784,HAM_0002552,5,3,ISIC_0033232,2,mel,1,histo,25.0,male,upper extremity,ta
7785,HAM_0002552,5,3,ISIC_0033232,2,mel,1,histo,25.0,male,upper extremity,ta
9998,HAM_0003521,5,2,ISIC_0032258,2,mel,1,histo,70.0,female,back,ta


<a id='expanding'></a>
## Expanding the validation set
↑↑ [Contents](#contents) ↑ [Balancing the training set](#balancing) ↓ [Fine-tuning EfficientNet or ResNet18](#fine-tuning)

As mentioned already, we make one prediction per lesion. However, we may have multiple images of a given lesion at our disposal: we could make a prediction for each of them and combine them somehow into a single prediction for the lesion. Even if there is only one image of a lesion, we could make multiple predictions on it: if a random transformation is applied to an image before our model makes a prediction on it, this would yield a different array of probabilities each time. Again, we could combine the results into a single prediction.

This is what the attribute ```val_expansion_factor``` of the ```process``` class is concerned with. Similarly to the way we balance the training set, we can replicate one single image per lesion in the validation set as many times as specified by ```val_expansion_factor```, as in ```self.df_val1```, or we can take advantage of other images of the lesion (if available), as in ```self.val_a```.

In [8]:
# from utils import print_header
# The specific numbers given in this example assume a certain choice for the attributes  

instance = demo

df = demo.df_val1
df = df[df['set'].isin(["va", "v1"]) & (df['dx'] == 'mel')]

print_header("Eg: Melanoma in expanded validation set (only one image per lesion used)")

to_print = [f"- Note that \'lesion_mult\' is always {instance.val_expansion_factor}",
            "HAM_0005678 represented by three images",
            "Two distinct images of this lesion: ISIC_0031023 and ISIC_0028086",
            f"However, only use ISIC_0031023 ({instance.val_expansion_factor} times)",]

print("\n- ".join(to_print))

display(df)

df = demo.df_val_a
df = df[df['set'].isin(["va", "v1"]) & (df['dx'] == 'mel')]

print_header("Eg: Melanoma in expanded validation set (all images used)")

to_print = [f"- Note that \'lesion_mult\' is always {instance.val_expansion_factor}",
            "HAM_0005678 represented by three images",
            "Two distinct images of this lesion to choose from: ISIC_0031023 and ISIC_0028086",
            "Use ISIC_0031023 once, and ISIC_0028086 twice",]

print("\n- ".join(to_print))

display(df)


EG: MELANOMA IN EXPANDED VALIDATION SET (ONLY ONE IMAGE PER LESION USED)

- Note that 'lesion_mult' is always 3
- HAM_0005678 represented by three images
- Two distinct images of this lesion: ISIC_0031023 and ISIC_0028086
- However, only use ISIC_0031023 (3 times)


Unnamed: 0,lesion_id,lesion_mult,num_images,image_id,img_mult,dx,label,dx_type,age,sex,localization,set
603,HAM_0005678,3,2,ISIC_0031023,3,mel,1,histo,60.0,male,chest,v1
604,HAM_0005678,3,2,ISIC_0031023,3,mel,1,histo,60.0,male,chest,v1
605,HAM_0005678,3,2,ISIC_0031023,3,mel,1,histo,60.0,male,chest,v1
606,HAM_0006722,3,2,ISIC_0031499,3,mel,1,histo,85.0,female,lower extremity,v1
607,HAM_0006722,3,2,ISIC_0031499,3,mel,1,histo,85.0,female,lower extremity,v1
...,...,...,...,...,...,...,...,...,...,...,...,...
1060,HAM_0004081,3,1,ISIC_0031957,3,mel,1,histo,70.0,female,lower extremity,v1
1061,HAM_0004081,3,1,ISIC_0031957,3,mel,1,histo,70.0,female,lower extremity,v1
1062,HAM_0004746,3,2,ISIC_0028764,3,mel,1,histo,65.0,female,back,v1
1063,HAM_0004746,3,2,ISIC_0028764,3,mel,1,histo,65.0,female,back,v1



EG: MELANOMA IN EXPANDED VALIDATION SET (ALL IMAGES USED)

- Note that 'lesion_mult' is always 3
- HAM_0005678 represented by three images
- Two distinct images of this lesion to choose from: ISIC_0031023 and ISIC_0028086
- Use ISIC_0031023 once, and ISIC_0028086 twice


Unnamed: 0,lesion_id,lesion_mult,num_images,image_id,img_mult,dx,label,dx_type,age,sex,localization,set
603,HAM_0005678,3,2,ISIC_0031023,1,mel,1,histo,60.0,male,chest,v1
604,HAM_0005678,3,2,ISIC_0028086,2,mel,1,histo,60.0,male,chest,va
605,HAM_0005678,3,2,ISIC_0028086,2,mel,1,histo,60.0,male,chest,va
606,HAM_0006722,3,2,ISIC_0030443,1,mel,1,histo,85.0,female,lower extremity,va
607,HAM_0006722,3,2,ISIC_0031499,2,mel,1,histo,85.0,female,lower extremity,v1
...,...,...,...,...,...,...,...,...,...,...,...,...
1060,HAM_0004746,3,2,ISIC_0028764,2,mel,1,histo,65.0,female,back,v1
1061,HAM_0004746,3,2,ISIC_0028764,2,mel,1,histo,65.0,female,back,v1
1062,HAM_0004746,3,2,ISIC_0029021,1,mel,1,histo,65.0,female,back,va
1063,HAM_0002525,3,2,ISIC_0025188,1,mel,1,histo,55.0,male,face,va


<a id='fine-tuning'></a>
## Fine-tuning EfficientNet or ResNet18
↑↑ [Contents](#contents) ↑ [Expanding the validation set](#expanding) ↓ [Small sample for testing code](#small_sample)

In [9]:
# Now let's set values for the attributes of our resnet18 class (the model we will use with out processed data).
# One of the attributes has to do with image transformations.

import torchvision.transforms as transforms

transform = transforms.Compose([
    
transforms.RandomCrop((300, 300)),
transforms.Resize((224,224)), # Resize images to fit ResNet input size
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # Normalize with ImageNet stats
])    

In [10]:
import pandas as pd
from typing import Union, List, Callable
import torchvision.models as models

source: Union[process, pd.DataFrame] = demo      # Processed data to be fed into model for training.
                                                 # Must either be an instance of the process class, or a dataframe of the same format as source.df if source were an instance of the process class.
model_dir: Path = path["models"]                 # Path to directory where models/model info/model results are stored.
transform: Union[None, 
                 transforms.Compose, 
                 List[Callable]] = transform     # Transform to be applied to images before feeding into neural network.
batch_size: Union[None, int] = 32                # Mini-batch size: default 32.
epochs: Union[None, int] = 10                    # Number of epochs (all layers unfrozen from the start): default 10.
base_learning_rate: Union[None, float] = 1/1000  # Learning rate to start with: default 1/1000. Using Adam optimizer.
filename_stem: Union[None, str] = "rn18"         # For saving model and related files. Default "rn18" (if ResNet model) or "EffNet" (if EfficientNet), or "cnn".
filename_suffix: Union[None, str] = "demo"       # Something descriptive and unique for future reference. Default empty string "".
overwrite: Union[None, bool] = True              # If False, any will generate an unused filename for saving .pth, .csv files etc., but appending a two-digit number.
                                                 # If None, will default to False. Only set to True if confident that training done on previous instances with same filename stem and suffix can be over-written.
code_test: Union[None, bool] = True
# model: Union[None, models.ResNet, models.EfficientNet] = models.efficientnet_b0(weights=models.EfficientNet_B0_Weights.DEFAULT) # Pre-trained model. Default: ResNet18.   
model: Union[None, models.ResNet, models.EfficientNet] = models.resnet18(weights="ResNet18_Weights.DEFAULT")  

In [11]:
# Create an instance of the resnet18 class with attribute values as above.
from multiclass_models import cnn

resnet_demo = cnn(                                   
    source=source,                                           
    model_dir=model_dir,
    transform=transform,
    batch_size=batch_size,
    epochs=epochs,                                          
    base_learning_rate=base_learning_rate,
    filename_stem=filename_stem,
    filename_suffix=filename_suffix,                         
    overwrite=overwrite,
    code_test=code_test,    
    model=model
)


CODE TEST MODE

- self.epochs set to 1
- self.Print set to True
- self.filename_suffix set to 'test'
- self.overwrite set to True
- self.df_train, self.df_val1, self.df_val_a replaced with a small number of records
- Change code_test attribute to False and re-create/create new cnn instance after testing is done.

Existing files will be overwritten. 
Base filename: rn18_ta_bal_test_1e_test_00
Attributes saved to file: D:\projects\skin-lesion-classification\models\rn18_ta_bal_test_1e_test_00_attributes.json


<a id='small_sample'></a>
## Small sample for testing code
↑↑ [Contents](#contents) ↑ [Fine-tuning EfficientNet or ResNet18](#fine-tuning) ↓ [Model architecture and state dictionary](#model_architecture)

In [12]:
# from utils import print_header

instance = resnet_demo

print_header("Code test: training set")
print(f"{instance.df_train.shape[0]} images".upper())
display(instance.df_train.head())

print_header(f"Code test: validation set (one image per lesion, repeated {instance.source.val_expansion_factor} times)")
print(f"{instance.df_val1.shape[0]} images".upper())
display(instance.df_val1.head())

print_header(f"Code test: validation set ({instance.source.val_expansion_factor} possibly different images per lesion)")
print(f"{instance.df_val_a.shape[0]} images".upper())
display(instance.df_val_a.head())


CODE TEST: TRAINING SET

170 IMAGES


Unnamed: 0,lesion_id,lesion_mult,num_images,image_id,img_mult,dx,label,dx_type,age,sex,localization,set
0,HAM_0004400,3,1,ISIC_0030026,3,bkl,0,histo,75.0,female,face,t1
1,HAM_0004400,3,1,ISIC_0030026,3,bkl,0,histo,75.0,female,face,t1
2,HAM_0004400,3,1,ISIC_0030026,3,bkl,0,histo,75.0,female,face,t1
3,HAM_0000744,3,3,ISIC_0032230,2,bkl,0,histo,70.0,male,face,t1
4,HAM_0000744,3,3,ISIC_0032230,2,bkl,0,histo,70.0,male,face,t1



CODE TEST: VALIDATION SET (ONE IMAGE PER LESION, REPEATED 3 TIMES)

42 IMAGES


Unnamed: 0,lesion_id,lesion_mult,num_images,image_id,img_mult,dx,label,dx_type,age,sex,localization,set
0,HAM_0003218,3,1,ISIC_0033305,3,bkl,0,consensus,75.0,male,back,v1
1,HAM_0003218,3,1,ISIC_0033305,3,bkl,0,consensus,75.0,male,back,v1
2,HAM_0003218,3,1,ISIC_0033305,3,bkl,0,consensus,75.0,male,back,v1
3,HAM_0000983,3,1,ISIC_0033490,3,bkl,0,consensus,,unknown,unknown,v1
4,HAM_0000983,3,1,ISIC_0033490,3,bkl,0,consensus,,unknown,unknown,v1



CODE TEST: VALIDATION SET (3 POSSIBLY DIFFERENT IMAGES PER LESION)

42 IMAGES


Unnamed: 0,lesion_id,lesion_mult,num_images,image_id,img_mult,dx,label,dx_type,age,sex,localization,set
0,HAM_0004406,3,2,ISIC_0034125,2,bkl,0,histo,80.0,male,back,va
1,HAM_0004406,3,2,ISIC_0034125,2,bkl,0,histo,80.0,male,back,va
2,HAM_0004406,3,2,ISIC_0033060,1,bkl,0,histo,80.0,male,back,v1
3,HAM_0003943,3,3,ISIC_0031078,1,bkl,0,histo,80.0,female,lower extremity,va
4,HAM_0003943,3,3,ISIC_0031464,1,bkl,0,histo,80.0,female,lower extremity,v1


In [13]:
# Train the model on the specified training data by calling the train method:
# from utils import print_header

instance = resnet_demo

print_header("Code test: training and validation")
instance.train()


CODE TEST: TRAINING AND VALIDATION

image_id, label, ohe-label: ISIC_0030344, 2, tensor([0., 0., 1., 0., 0.])
image_id, label, ohe-label: ISIC_0032114, 0, tensor([1., 0., 0., 0., 0.])
image_id, label, ohe-label: ISIC_0025196, 2, tensor([0., 0., 1., 0., 0.])
image_id, label, ohe-label: ISIC_0029480, 1, tensor([0., 1., 0., 0., 0.])
image_id, label, ohe-label: ISIC_0031272, 4, tensor([0., 0., 0., 0., 1.])
image_id, label, ohe-label: ISIC_0027770, 0, tensor([1., 0., 0., 0., 0.])
image_id, label, ohe-label: ISIC_0032230, 0, tensor([1., 0., 0., 0., 0.])
image_id, label, ohe-label: ISIC_0027149, 1, tensor([0., 1., 0., 0., 0.])
image_id, label, ohe-label: ISIC_0032652, 4, tensor([0., 0., 0., 0., 1.])
image_id, label, ohe-label: ISIC_0029474, 0, tensor([1., 0., 0., 0., 0.])
image_id, label, ohe-label: ISIC_0030344, 2, tensor([0., 0., 1., 0., 0.])
image_id, label, ohe-label: ISIC_0029099, 0, tensor([1., 0., 0., 0., 0.])
image_id, label, ohe-label: ISIC_0025927, 1, tensor([0., 1., 0., 0., 0.])
i

image_id, label, ohe-label: ISIC_0025350, 2, tensor([0., 0., 1., 0., 0.])
image_id, label, ohe-label: ISIC_0031526, 4, tensor([0., 0., 0., 0., 1.])
image_id, label, ohe-label: ISIC_0024932, 1, tensor([0., 1., 0., 0., 0.])
image_id, label, ohe-label: ISIC_0029713, 2, tensor([0., 0., 1., 0., 0.])
image_id, label, ohe-label: ISIC_0029099, 0, tensor([1., 0., 0., 0., 0.])
image_id, label, ohe-label: ISIC_0030344, 2, tensor([0., 0., 1., 0., 0.])
image_id, label, ohe-label: ISIC_0030344, 2, tensor([0., 0., 1., 0., 0.])
image_id, label, ohe-label: ISIC_0031272, 4, tensor([0., 0., 0., 0., 1.])
image_id, label, ohe-label: ISIC_0028076, 2, tensor([0., 0., 1., 0., 0.])
image_id, label, ohe-label: ISIC_0030953, 2, tensor([0., 0., 1., 0., 0.])
image_id, label, ohe-label: ISIC_0025196, 2, tensor([0., 0., 1., 0., 0.])
image_id, label, ohe-label: ISIC_0030953, 2, tensor([0., 0., 1., 0., 0.])
image_id, label, ohe-label: ISIC_0028314, 2, tensor([0., 0., 1., 0., 0.])
outputs.shape: torch.Size([32, 5])
los

image_id, label, ohe-label: ISIC_0026598, 3, tensor([0., 0., 0., 1., 0.])
image_id, label, ohe-label: ISIC_0031372, 0, tensor([1., 0., 0., 0., 0.])
image_id, label, ohe-label: ISIC_0026313, 0, tensor([1., 0., 0., 0., 0.])
image_id, label, ohe-label: ISIC_0026313, 0, tensor([1., 0., 0., 0., 0.])
image_id, label, ohe-label: ISIC_0026629, 0, tensor([1., 0., 0., 0., 0.])
image_id, label, ohe-label: ISIC_0026629, 0, tensor([1., 0., 0., 0., 0.])
image_id, label, ohe-label: ISIC_0026629, 0, tensor([1., 0., 0., 0., 0.])
image_id, label, ohe-label: ISIC_0030443, 1, tensor([0., 1., 0., 0., 0.])
image_id, label, ohe-label: ISIC_0031499, 1, tensor([0., 1., 0., 0., 0.])
image_id, label, ohe-label: ISIC_0031499, 1, tensor([0., 1., 0., 0., 0.])
image_id, label, ohe-label: ISIC_0033299, 1, tensor([0., 1., 0., 0., 0.])
image_id, label, ohe-label: ISIC_0033299, 1, tensor([0., 1., 0., 0., 0.])
image_id, label, ohe-label: ISIC_0033299, 1, tensor([0., 1., 0., 0., 0.])
image_id, label, ohe-label: ISIC_00326

In [20]:
# from utils import print_header
from multiclass_models import load_dict

# Let's look at the training and validation loss for each epoch:
instance = resnet_demo

print_header("Code test")
print("Loss dictionary (training and validation loss from each epoch)".upper())
to_print = ["- Key \'val1_loss\' refers to validation set in which one image per lesion is used.",
         "Key \'val_a_los\' refers to validation set in which more than one image per lesion is potentially used."]
print("\n- ".join(to_print))

try:
    if instance.epoch_losses is not None:
        display(instance.epoch_losses)
    else:
        retrieved_epoch_losses = load_dict(instance.model_dir, instance._filename + "_epoch_losses")
        display(retrieved_epoch_losses)
except:
    retrieved_epoch_losses = load_dict(instance.model_dir, instance._filename + "_epoch_losses")
    display(retrieved_epoch_losses)


CODE TEST

LOSS DICTIONARY (TRAINING AND VALIDATION LOSS FROM EACH EPOCH)
- Key 'val1_loss' refers to validation set in which one image per lesion is used.
- Key 'val_a_los' refers to validation set in which more than one image per lesion is potentially used.


{'train_loss': array([1.00222969]),
 'val1_loss': array([4.04400718]),
 'val_a_loss': array([3.69006991])}

In [22]:
# from utils import print_header
from multiclass_models import load_dict

# Let's look at the training and validation loss for each epoch:
instance = resnet_demo

print_header("Code test")
print("If we didn't just train the model and the epoch losses dictionary is not in memory, we can load it from a file during training.")

display(load_dict(instance.model_dir, instance._filename + "_epoch_losses"))


CODE TEST

If we didn't just train the model and the epoch losses dictionary is not in memory, we can load it from a file during training.


{'train_loss': [1.0022296855847042],
 'val1_loss': [4.044007182121277],
 'val_a_loss': [3.6900699138641357]}

<a id='model_architecture'></a>
## Model architecture and state dictionary
↑↑ [Contents](#contents) ↑ [Small sample for testing code](#small_sample) ↓ [Inference: getting probabilities](#inference1)

In [25]:
# from utils import print_header
import torch
import torch.nn as nn

instance = resnet_demo

if instance.state_dict is None:
    print("Loading model and state dictionary from file\n".upper())
    file_path_pth = instance.model_dir.joinpath(instance._filename + ".pth")

    # model = models.efficientnet_b0()  
    model = models.resnet18()  
    if isinstance(model,models.ResNet):
        num_ftrs = model.fc.in_features
        model.fc = nn.Linear(num_ftrs, len(instance.label_codes))
    elif isinstance(model,models.EfficientNet):
        num_ftrs = model.classifier[1].in_features
        model.classifier[1] = nn.Linear(num_ftrs, len(instance.label_codes))

    # Load the state dictionary into the model
    state_dict = torch.load(file_path_pth)
    model.load_state_dict(state_dict)

    instance.model = model
    instance.state_dict = state_dict
    
print_header("Code test: model architecture")
print(f"Note: \'out_features = {len(instance.label_codes)}\' at the end".upper())
display(instance.model)


CODE TEST: MODEL ARCHITECTURE AND STATE DICTIONARY


CODE TEST: MODEL ARCHITECTURE

NOTE: 'OUT_FEATURES = 5' AT THE END


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [26]:
print_header("Code test: model state dictionary")
print(str(instance.state_dict)[:1000], "\n ... LOTS OF PARAMETERS ...\n", str(instance.state_dict)[-1000:])


CODE TEST: MODEL STATE DICTIONARY

OrderedDict([('conv1.weight', tensor([[[[-9.4734e-03, -5.2116e-03, -5.9452e-04,  ...,  5.8054e-02,
            1.8560e-02, -1.1146e-02],
          [ 1.2014e-02,  1.0617e-02, -1.0889e-01,  ..., -2.7213e-01,
           -1.2776e-01,  3.0430e-03],
          [-5.9763e-03,  6.0046e-02,  2.9645e-01,  ...,  5.2089e-01,
            2.5754e-01,  6.2821e-02],
          ...,
          [-2.6718e-02,  1.7227e-02,  7.3482e-02,  ..., -3.3373e-01,
           -4.2156e-01, -2.5888e-01],
          [ 3.1190e-02,  4.2055e-02,  6.1787e-02,  ...,  4.1287e-01,
            3.9258e-01,  1.6504e-01],
          [-1.3263e-02, -4.8003e-03, -2.5220e-02,  ..., -1.5196e-01,
           -8.3439e-02, -6.6822e-03]],

         [[-8.4013e-03, -2.3473e-02, -3.1329e-02,  ...,  3.3832e-02,
            1.6109e-03, -2.4748e-02],
          [ 4.8986e-02,  3.6950e-02, -1.0112e-01,  ..., -3.1173e-01,
           -1.5991e-01, -2.9361e-04],
          [ 2.3979e-03,  1.0222e-01,  4.0576e-01,  ...,  7.08

In [121]:
# from utils import print_header
from multiclass_models import get_probabilities

instance = resnet_demo

# model = models.efficientnet_b0()  
# model = models.resnet18() 
# if isinstance(model,models.ResNet):
#     num_ftrs = model.fc.in_features
#     model.fc = nn.Linear(num_ftrs, len(instance.label_codes))
# elif isinstance(model,models.EfficientNet):
#     num_ftrs = model.classifier[1].in_features
#     model.classifier[1] = nn.Linear(num_ftrs, len(instance.label_codes))

instance.df_probabilities_val1 = get_probabilities(df=instance.df_val1,
                                                   data_dir=instance.data_dir,
                                                   model_dir=instance.model_dir,
                                                   model=instance.model,
                                                   filename=instance._filename,
                                                   label_codes=instance.label_codes,
                                                   transform=instance.transform,
                                                   batch_size=instance.batch_size,
                                                   Print=False,
                                                   save_as=instance._filename + "_val1",)

instance.df_probabilities_val_a = get_probabilities(df=instance.df_val_a,
                                                    data_dir=instance.data_dir,
                                                    model_dir=instance.model_dir,
                                                    model=instance.model,
                                                    filename=instance._filename,
                                                    label_codes=instance.label_codes,
                                                    transform=instance.transform,
                                                    batch_size=instance.batch_size,
                                                    Print=False,
                                                    save_as=instance._filename + "_val_a",)

print_header("Code test: probabilities, validation set (one image per lesion)")
display_columns = ['lesion_id', 'image_id', 'dx'] + [col for col in instance.df_probabilities_val1.columns if col.startswith('prob')]
display(instance.df_probabilities_val1[display_columns].head())

print_header("Code test: probabilities, validation set (more than one image per lesion)")
display(instance.df_probabilities_val_a[display_columns].head())

Saving probabilities: D:\projects\skin-lesion-classification\models\rn18_ta_bal_test_1e_test_00_val1_probabilities.csv
Saving probabilities: D:\projects\skin-lesion-classification\models\rn18_ta_bal_test_1e_test_00_val_a_probabilities.csv

CODE TEST: PROBABILITIES, VALIDATION SET (ONE IMAGE PER LESION)



Unnamed: 0,lesion_id,image_id,dx,prob_other,prob_mel,prob_akiec,prob_nv,prob_bcc
0,HAM_0003218,ISIC_0033305,bkl,0.000347,0.909989,0.067869,0.020783,0.001012
1,HAM_0003218,ISIC_0033305,bkl,5.2e-05,0.961752,0.013188,0.013692,0.011316
2,HAM_0003218,ISIC_0033305,bkl,0.000186,0.928205,0.053561,0.013748,0.004301
3,HAM_0000983,ISIC_0033490,bkl,1.2e-05,0.056986,4.7e-05,0.001958,0.940998
4,HAM_0000983,ISIC_0033490,bkl,8.3e-05,0.360215,0.001458,0.026391,0.611853



CODE TEST: PROBABILITIES, VALIDATION SET (MORE THAN ONE IMAGE PER LESION)



Unnamed: 0,lesion_id,image_id,dx,prob_other,prob_mel,prob_akiec,prob_nv,prob_bcc
0,HAM_0004406,ISIC_0034125,bkl,0.008977,0.721454,0.005207,0.263809,0.000552
1,HAM_0004406,ISIC_0034125,bkl,0.006242,0.664127,0.00418,0.325128,0.000323
2,HAM_0004406,ISIC_0033060,bkl,4.1e-05,0.983301,0.000597,0.010002,0.006059
3,HAM_0003943,ISIC_0031078,bkl,0.00414,0.012328,0.97918,0.003748,0.000604
4,HAM_0003943,ISIC_0031464,bkl,0.002361,0.137206,0.748915,0.010283,0.101236


<a id='inference1'></a>
## Inference: getting probabilities
↑↑ [Contents](#contents) ↑ [Model architecture and state dictionary](#model_architecture) ↓ [Inference: combining probabilities](#inference2)

In [38]:
# from utils import print_header
from multiclass_models import df_from_ids, get_probabilities     

instance = resnet_demo
df = instance.df

print_header("Code test: prediction on individual images or lesions")

to_print = ["- We can make predictions for individual images or lesions.",
            "We only require a dataframe with an \'image_id\' column.",
            "Given the filename of an image, the df_from_ids function will construct such a dataframe.",
            "We can then feed this dataframe into the get_probabilities function.",
            "Here is the result of passing \'filenames = [\'ISIC_0033305\',\'ISIC_0025661\']\' to df_from_ids:",
            "- And here are the corresponding probabilities:",
            "- If we have a lesion_id with associated image_ids, we can also construct a small dataframe.",
            "The df_from_ids function also takes arguments for the number of predictions we want to make for a given image/lesion.",
            "Here is the result of passing \'lesion_ids = \'HAM_0000118\', \'multiplicity = 3\', and \'one_img_per_lesion = False\' to df_from_ids:",
            "- We have filtered all columns except for lesion_id and image_id (knowing the diagnosis defeats the purpose).",
            "- Here are the associated probabilities:",
            "- Notice that the probabilities may vary with each execution of a prediction.",
            "This is because a random transformation may be applied to each image before our model makes a prediction on it."]

df_2img = df_from_ids(filenames=['ISIC_0033305','ISIC_0025661'], # can be a string or a list of strings
                       multiplicity=None,
                       lesion_ids=None,
                       df=df,
                       one_img_per_lesion=None,)

print("\n- ".join(to_print[:5]))

display(df_2img)

df_2img_prob = get_probabilities(df=df_2img,
                  data_dir=instance.data_dir,
                  model_dir=instance.model_dir,
                  model=instance.model,
                  filename=instance._filename,
                  label_codes=instance.label_codes,
                  transform=instance.transform,
                  batch_size=instance.batch_size,
                  Print=False,
                  save_as=None,)   

print(to_print[5])
display(df_2img_prob)

print("\n- ".join(to_print[6:9]))

df_1les = df_from_ids(filenames=None,
                       multiplicity=3,
                       lesion_ids='HAM_0000118', # can be a string or a list of strings
                       df=df,
                       one_img_per_lesion=False,)

display_columns = ['lesion_id', 'image_id'] 
display(df_1les[display_columns])

print("\n- ".join(to_print[9:10]))

df_1les_prob = get_probabilities(df=df_1les,
                  data_dir=instance.data_dir,
                  model_dir=instance.model_dir,
                  model=instance.model,
                  filename=instance._filename,
                  label_codes=instance.label_codes,
                  transform=instance.transform,
                  batch_size=instance.batch_size,
                  Print=False,
                  save_as=None,)   

print(to_print[10])
display_columns = ['lesion_id', 'image_id'] + [col for col in df_1les_prob if col.startswith('prob')]
display(df_1les_prob[display_columns])

print("\n- ".join(to_print[11:]))


CODE TEST: PREDICTION ON INDIVIDUAL IMAGES OR LESIONS

- We can make predictions for individual images or lesions.
- We only require a dataframe with an 'image_id' column.
- Given the filename of an image, the df_from_ids function will construct such a dataframe.
- We can then feed this dataframe into the get_probabilities function.
- Here is the result of passing 'filenames = ['ISIC_0033305','ISIC_0025661']' to df_from_ids:


Unnamed: 0,image_id
0,ISIC_0033305
1,ISIC_0025661


- And here are the corresponding probabilities:


Unnamed: 0,image_id,prob_other,prob_mel,prob_akiec,prob_nv,prob_bcc
0,ISIC_0033305,0.000393,0.924487,0.031356,0.031861,0.011903
1,ISIC_0025661,0.002455,0.351872,0.417792,0.158065,0.069817


- If we have a lesion_id with associated image_ids, we can also construct a small dataframe.
- The df_from_ids function also takes arguments for the number of predictions we want to make for a given image/lesion.
- Here is the result of passing 'lesion_ids = 'HAM_0000118', 'multiplicity = 3', and 'one_img_per_lesion = False' to df_from_ids:


Unnamed: 0,lesion_id,image_id
0,HAM_0000118,ISIC_0027419
1,HAM_0000118,ISIC_0025030
2,HAM_0000118,ISIC_0025030


- We have filtered all columns except for lesion_id and image_id (knowing the diagnosis defeats the purpose).
- Here are the associated probabilities:


Unnamed: 0,lesion_id,image_id,prob_other,prob_mel,prob_akiec,prob_nv,prob_bcc
0,HAM_0000118,ISIC_0027419,0.000247,0.942904,0.035084,0.019931,0.001834
1,HAM_0000118,ISIC_0025030,3.4e-05,0.982403,0.000663,0.003238,0.013661
2,HAM_0000118,ISIC_0025030,8.7e-05,0.911008,0.043588,0.010123,0.035194


- Notice that the probabilities may vary with each execution of a prediction.
- This is because a random transformation may be applied to each image before our model makes a prediction on it.


In [42]:
print("Here's another example of how we'd use df_from_ids on an arbitrary image from outside our dataset.")

display(df_from_ids(filenames='image_from_somewhere', multiplicity=3,))

print("If it were an actual image in the data_dir folder, we could feed this dataframe into the get_probabilities function.")

Here's another example of how we'd use df_from_ids on an arbitrary image from outside our dataset.


Unnamed: 0,image_id
0,image_from_somewhere
1,image_from_somewhere
2,image_from_somewhere


If it were an actual image in the data_dir folder, we could feed this dataframe into the get_probabilities function.


<a id='inference2'></a>
## Inference: combining probabilities
↑↑ [Contents](#contents) ↑ [Inference: getting probabilities](#inference1) ↓ [Inference: combining predictions](#inference3)

In [171]:
# from utils import print_header
from multiclass_models import aggregate_probabilities

print_header("Code test: combining probabilities")

method = { 'max' : ['mel'], 'min' : ['nv'], 'mean' : ['akiec', 'bcc'] }

print("- We can combine multiple probabilities for a single lesion, if available, by taking the maximum, minimum, or mean.")
print("- Here is the original dataframe:")
display(df_1les_prob)
print("- Here is the dataframe with max mel probability, minimum nv probability, mean akiec and bcc prob\'s, and \'other\' left alone:")
display(aggregate_probabilities(df_1les_prob, method=method))

print("- Here's another example (no lesion_id in this case):")
display(df_2img_prob)
display(aggregate_probabilities(df_2img_prob))


CODE TEST: COMBINING PROBABILITIES

- We can combine multiple probabilities for a single lesion, if available, by taking the maximum, minimum, or mean.
- Here is the original dataframe:


Unnamed: 0,lesion_id,num_images,image_id,dx,label,dx_type,age,sex,localization,set,prob_other,prob_mel,prob_akiec,prob_nv,prob_bcc
0,HAM_0000118,2,ISIC_0027419,bkl,0,histo,80.0,male,scalp,ta,0.000247,0.942904,0.035084,0.019931,0.001834
1,HAM_0000118,2,ISIC_0025030,bkl,0,histo,80.0,male,scalp,t1,3.4e-05,0.982403,0.000663,0.003238,0.013661
2,HAM_0000118,2,ISIC_0025030,bkl,0,histo,80.0,male,scalp,t1,8.7e-05,0.911008,0.043588,0.010123,0.035194


- Here is the dataframe with max mel probability, minimum nv probability, mean akiec and bcc prob's, and 'other' left alone:


Unnamed: 0,lesion_id,num_images,image_id,dx,label,dx_type,age,sex,localization,set,prob_other,prob_mel,prob_akiec,prob_nv,prob_bcc
0,HAM_0000118,2,ISIC_0027419,bkl,0,histo,80.0,male,scalp,ta,0.000247,0.982403,0.026445,0.003238,0.016897
1,HAM_0000118,2,ISIC_0025030,bkl,0,histo,80.0,male,scalp,t1,3.4e-05,0.982403,0.026445,0.003238,0.016897
2,HAM_0000118,2,ISIC_0025030,bkl,0,histo,80.0,male,scalp,t1,8.7e-05,0.982403,0.026445,0.003238,0.016897


- Here's another example (no lesion_id in this case):


Unnamed: 0,image_id,prob_other,prob_mel,prob_akiec,prob_nv,prob_bcc
0,ISIC_0033305,0.000393,0.924487,0.031356,0.031861,0.011903
1,ISIC_0025661,0.002455,0.351872,0.417792,0.158065,0.069817


Unnamed: 0,image_id,prob_other,prob_mel,prob_akiec,prob_nv,prob_bcc
0,ISIC_0033305,0.000393,0.924487,0.031356,0.031861,0.011903
1,ISIC_0025661,0.002455,0.351872,0.417792,0.158065,0.069817


<a id='inference3'></a>
## Inference: combining predictions
↑↑ [Contents](#contents) ↑ [Inference: combining probabilities](#inference2) ↓ [Evaluation](#evaluation)

In [189]:
# from utils import print_header
from multiclass_models import final_prediction

print_header("Code test: making predictions")

raw_probabilities_df: pd.DataFrame = df_1les_prob 
raw_probabilities_df_a: pd.DataFrame = instance.df_probabilities_val_a
aggregate_method: Union[None, Dict[str, List[str]]] = { 'max' : ['mel'], 'min' : ['nv'], 'mean' : ['akiec', 'bcc']}
threshold_dict_help: Union[None, OrderedDict[str, float]] = OrderedDict([('bcc',0.01), ('mel',0.4)])
threshold_dict_hinder: Union[None, OrderedDict[str, float]] = OrderedDict([('nv',0.6)])    
votes_to_win_dict: Union[None, OrderedDict[str, int]] = OrderedDict([('mel',1)])
label_codes: Dict[int, str] = instance.label_codes
prefix: Union[None, str] = 'prob_'
    
to_print = ["- There are various ways we can form a prediction based on the combined probabilities.",
            "For instance, we can immediately predict mel if the probability of mel is greater than 0.4.",
            "If that's not the case, we can continue down a list, e.g. predicting bcc if probability of bcc is at least 0.45.",
            "We can also require, e.g., the probability of nv to be at least 0.6 before predicting nv.",
            "Once we have predicted classes for each image of a lesion, we can then combined the predictions in various ways.",
            "For instance, we might want to make a final prediction of mel if that is a prediction for at least one of the images.",
            "If not, we might again go through an ordered list, voting e.g. for bcc if at least one image is predicted as bcc.",
            "It doesn't have to be \'at least one prediction\': we could say \'if at least two predictions are for bcc, then...\'.",
            "If we reach the end of this list, we'd proceed to select the most popular prediction as the final one for the lesion.",
            "We don't have to specify a priority list at all: in that case we just take the most popular prediction as the final one.",
            "We could do similar if we had no lesion_id but only image_ids repeated a number of times.",
            "Below, by way of illustration, we've stated that we want to predict bcc if the probability for bcc is at least 0.01.",
            "But then, we've stated that we want to make a final prediction of mel if at least one prediction is for mel.",
            f"The label codes are as follows: {instance.label_codes}",            
            ]

print("\n- ".join(to_print))

display(final_prediction(raw_probabilities_df=raw_probabilities_df, 
                 threshold_dict_help=threshold_dict_help,
                 threshold_dict_hinder=threshold_dict_hinder,
                 votes_to_win_dict=votes_to_win_dict,
                 label_codes=label_codes,))


CODE TEST: MAKING PREDICTIONS

- There are various ways we can form a prediction based on the combined probabilities.
- For instance, we can immediately predict mel if the probability of mel is greater than 0.4.
- If that's not the case, we can continue down a list, e.g. predicting bcc if probability of bcc is at least 0.45.
- We can also require, e.g., the probability of nv to be at least 0.6 before predicting nv.
- Once we have predicted classes for each image of a lesion, we can then combined the predictions in various ways.
- For instance, we might want to make a final prediction of mel if that is a prediction for at least one of the images.
- If not, we might again go through an ordered list, voting e.g. for bcc if at least one image is predicted as bcc.
- It doesn't have to be 'at least one prediction': we could say 'if at least two predictions are for bcc, then...'.
- If we reach the end of this list, we'd proceed to select the most popular prediction as the final one for the l

Unnamed: 0,lesion_id,num_images,image_id,dx,label,dx_type,age,sex,localization,set,prob_other,prob_mel,prob_akiec,prob_nv,prob_bcc,pred,pred_final
0,HAM_0000118,2,ISIC_0027419,bkl,0,histo,80.0,male,scalp,ta,0.000247,0.942904,0.035084,0.019931,0.001834,1,1
1,HAM_0000118,2,ISIC_0025030,bkl,0,histo,80.0,male,scalp,t1,3.4e-05,0.982403,0.000663,0.003238,0.013661,4,1
2,HAM_0000118,2,ISIC_0025030,bkl,0,histo,80.0,male,scalp,t1,8.7e-05,0.911008,0.043588,0.010123,0.035194,4,1


In [132]:
# Let's now apply this to our code test validation sets.
# We can reload the probabilities dataframes from csv files saved earlier, if they are not already in memory
file_path_val1 = instance.model_dir.joinpath(instance._filename + "_val1_probabilities.csv")
file_path_val_a = instance.model_dir.joinpath(instance._filename + "_val_a_probabilities.csv")
instance.df_val1_probabilities = pd.read_csv(file_path_val1,index_col=0)
instance.df_val_a_probabilities = pd.read_csv(file_path_val_a,index_col=0)

In [165]:
# from utils import print_header
from multiclass_models import final_prediction
from collections import OrderedDict
from typing import Dict, List

instance = resnet_demo

raw_probabilities_df1: pd.DataFrame = instance.df_probabilities_val1 
raw_probabilities_df_a: pd.DataFrame = instance.df_probabilities_val_a
aggregate_method: Union[None, Dict[str, List[str]]] = { 'max' : ['mel'], 'min' : ['nv'], 'mean' : ['bcc']}
threshold_dict_help: Union[None, OrderedDict[str, float]] = OrderedDict([('mel',0.4), ('bcc',0.45)])
threshold_dict_hinder: Union[None, OrderedDict[str, float]] = OrderedDict([('nv',0.6)])    
votes_to_win_dict: Union[None, OrderedDict[str, int]] = OrderedDict([('mel',1)])
label_codes: Dict[int, str] = instance.label_codes
prefix: Union[None, str] = 'prob_'

print_header("Code test: combining probabilities and making predictions")

print(f"Validation set: one image per lesion, repeated {instance.source.val_expansion_factor} times".upper())

instance.df_pred_val1 = final_prediction(raw_probabilities_df=raw_probabilities_df1, 
                                         threshold_dict_help=threshold_dict_help,
                                         threshold_dict_hinder=threshold_dict_hinder,
                                         votes_to_win_dict=votes_to_win_dict,
                                         label_codes=label_codes,)

display(instance.df_pred_val1.head())

print(f"\nValidation set: {instance.source.val_expansion_factor} images per lesion, using all images before repeating".upper())

instance.df_pred_val_a = final_prediction(raw_probabilities_df=raw_probabilities_df_a, 
                                         threshold_dict_help=threshold_dict_help,
                                         threshold_dict_hinder=threshold_dict_hinder,
                                         votes_to_win_dict=votes_to_win_dict,
                                         label_codes=label_codes,)

display(instance.df_pred_val_a.head())

print("\nNow we simply drop duplicates (lesion_id)...".upper())

display(instance.df_pred_val1.drop_duplicates(subset='lesion_id')[['lesion_id','label','pred_final']])

display(instance.df_pred_val_a.drop_duplicates(subset='lesion_id')[['lesion_id','label','pred_final']])


CODE TEST: COMBINING PROBABILITIES AND MAKING PREDICTIONS

VALIDATION SET: ONE IMAGE PER LESION, REPEATED 3 TIMES


Unnamed: 0,lesion_id,lesion_mult,num_images,image_id,img_mult,dx,label,dx_type,age,sex,localization,set,prob_other,prob_mel,prob_akiec,prob_nv,prob_bcc,pred,pred_final
0,HAM_0003218,3,1,ISIC_0033305,3,bkl,0,consensus,75.0,male,back,v1,0.000347,0.909989,0.067869,0.020783,0.001012,1,1
1,HAM_0003218,3,1,ISIC_0033305,3,bkl,0,consensus,75.0,male,back,v1,5.2e-05,0.961752,0.013188,0.013692,0.011316,1,1
2,HAM_0003218,3,1,ISIC_0033305,3,bkl,0,consensus,75.0,male,back,v1,0.000186,0.928205,0.053561,0.013748,0.004301,1,1
3,HAM_0000983,3,1,ISIC_0033490,3,bkl,0,consensus,,unknown,unknown,v1,1.2e-05,0.056986,4.7e-05,0.001958,0.940998,4,4
4,HAM_0000983,3,1,ISIC_0033490,3,bkl,0,consensus,,unknown,unknown,v1,8.3e-05,0.360215,0.001458,0.026391,0.611853,4,4



VALIDATION SET: 3 IMAGES PER LESION, USING ALL IMAGES BEFORE REPEATING


Unnamed: 0,lesion_id,lesion_mult,num_images,image_id,img_mult,dx,label,dx_type,age,sex,localization,set,prob_other,prob_mel,prob_akiec,prob_nv,prob_bcc,pred,pred_final
0,HAM_0004406,3,2,ISIC_0034125,2,bkl,0,histo,80.0,male,back,va,0.008977,0.721454,0.005207,0.263809,0.000552,1,1
1,HAM_0004406,3,2,ISIC_0034125,2,bkl,0,histo,80.0,male,back,va,0.006242,0.664127,0.00418,0.325128,0.000323,1,1
2,HAM_0004406,3,2,ISIC_0033060,1,bkl,0,histo,80.0,male,back,v1,4.1e-05,0.983301,0.000597,0.010002,0.006059,1,1
3,HAM_0003943,3,3,ISIC_0031078,1,bkl,0,histo,80.0,female,lower extremity,va,0.00414,0.012328,0.97918,0.003748,0.000604,2,2
4,HAM_0003943,3,3,ISIC_0031464,1,bkl,0,histo,80.0,female,lower extremity,v1,0.002361,0.137206,0.748915,0.010283,0.101236,2,2



NOW WE SIMPLY DROP DUPLICATES (LESION_ID)...


Unnamed: 0,lesion_id,label,pred_final
0,HAM_0003218,0,1
3,HAM_0000983,0,4
6,HAM_0003267,3,0
9,HAM_0006318,3,1
12,HAM_0005518,0,1
15,HAM_0005663,0,1
18,HAM_0001953,1,1
21,HAM_0002591,1,4
24,HAM_0005713,0,1
27,HAM_0007568,0,0


Unnamed: 0,lesion_id,label,pred_final
0,HAM_0004406,0,1
3,HAM_0003943,0,2
6,HAM_0001756,3,0
9,HAM_0006602,3,0
12,HAM_0007418,0,1
15,HAM_0004065,0,1
18,HAM_0006722,1,4
21,HAM_0000107,1,2
24,HAM_0000940,0,1
27,HAM_0007150,0,1


<a id='evaluation'></a>
## Evaluation
↑↑ [Contents](#contents) ↑ [Inference: combining predictions](#inference3) ↓ [Trivial models](#trivial_models)

In [216]:
# from utils import print_header
from evaluation import weighted_average_f, confusion_matrix_with_metric

instance = resnet_demo
map_labels = instance.label_codes

target1 = instance.df_pred_val1.drop_duplicates(subset='lesion_id')['label'] 
prediction1 = instance.df_pred_val1.drop_duplicates(subset='lesion_id')['pred_final'] 

target_a = instance.df_pred_val_a.drop_duplicates(subset='lesion_id')['label']  
prediction_a = instance.df_pred_val_a.drop_duplicates(subset='lesion_id')['pred_final']  

txp1 = pd.crosstab(target1,prediction1,margins=True,dropna=False)
txp_a = pd.crosstab(target_a,prediction_a,margins=True,dropna=False)

beta = 1
# Weights inversely proportional to relative class size in the training set, giving more importance to smaller classes.
weights = 1/instance.df_train['label'].value_counts(normalize=True).sort_index().values # None

instance.cm1 = confusion_matrix_with_metric(AxB=txp1,
                                            lst=None,
                                            full_pad=True,
                                            func=weighted_average_f,
                                            beta=beta,
                                            weights=weights,
                                            percentage=False,
                                            map_labels=map_labels)

instance.cm_a = confusion_matrix_with_metric(AxB=txp_a,
                                            lst=None,
                                            full_pad=True,
                                            func=weighted_average_f,
                                            beta=beta,
                                            weights=weights,
                                            percentage=False,
                                            map_labels=map_labels)

print_header("Code test: confusion matrix (validation set)")

to_print = ["- The overall evaluation metric would appear at the bottom right, if it were defined (this code test set is too small).",
            "It would be a class-wise weighted average fbeta score, beta and weights as specified (default values 1).",
            "One could also pass None, a float, or a different function to the func parameter in confusion_matrix_with_metric."]

print("\n- ".join(to_print))

print(f"\nOne image per lesion, repeated {instance.source.val_expansion_factor} times".upper())
display(instance.cm1.fillna('_'))

print(f"\n{instance.source.val_expansion_factor} images per lesion, using all available images before repeating".upper())
display(instance.cm_a.fillna('_'))


CODE TEST: CONFUSION MATRIX (VALIDATION SET)

- The overall evaluation metric would appear at the bottom right, if it were defined (this code test set is too small).
- It would be a class-wise weighted average fbeta score, beta and weights as specified (default values 1).
- One could also pass None, a float, or a different function to the func parameter in confusion_matrix_with_metric.

ONE IMAGE PER LESION, REPEATED 3 TIMES


predicted,other,mel,akiec,nv,bcc,All,recall
actual,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
other,1.0,4.0,0,0.0,1.0,6,0.166667
mel,0.0,1.0,0,0.0,1.0,2,0.5
akiec,0.0,0.0,0,0.0,2.0,2,0.0
nv,1.0,1.0,0,0.0,0.0,2,0.0
bcc,0.0,0.0,0,1.0,1.0,2,0.5
All,2.0,6.0,0,1.0,5.0,14,_
precision,0.5,0.166667,_,0.0,0.2,_,_



3 IMAGES PER LESION, USING ALL AVAILABLE IMAGES BEFORE REPEATING


predicted,other,mel,akiec,nv,bcc,All,recall
actual,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
other,0.0,5.0,1.0,0,0.0,6,0.0
mel,0.0,0.0,1.0,0,1.0,2,0.0
akiec,0.0,2.0,0.0,0,0.0,2,0.0
nv,2.0,0.0,0.0,0,0.0,2,0.0
bcc,0.0,2.0,0.0,0,0.0,2,0.0
All,2.0,9.0,2.0,0,1.0,14,_
precision,0.0,0.0,0.0,_,0.0,_,_


In [220]:
# from utils import print_header
from evaluation import metric_dictionary
# import pandas as pd

instance = resnet_demo

target1 = instance.df_pred_val1.drop_duplicates(subset='lesion_id')['label'] 
prediction1 = instance.df_pred_val1.drop_duplicates(subset='lesion_id')['pred_final'] 
probabilities1 = instance.df_probabilities_val1.drop_duplicates(subset='lesion_id').filter(regex=r'^prob_')
agg_probabilities1 = instance.df_pred_val1.drop_duplicates(subset='lesion_id').filter(regex=r'^prob_') 

target_a = instance.df_pred_val_a.drop_duplicates(subset='lesion_id')['label']  
prediction_a = instance.df_pred_val_a.drop_duplicates(subset='lesion_id')['pred_final']  
probabilities_a = instance.df_probabilities_val_a.drop_duplicates(subset='lesion_id').filter(regex=r'^prob_')
agg_probabilities_a = instance.df_pred_val_a.drop_duplicates(subset='lesion_id').filter(regex=r'^prob_') 

beta = 1
# Weights inversely proportional to relative class size, giving more importance to smaller classes.
weights = 1/instance.df_train['label'].value_counts(normalize=True).sort_index().values # None

print_header("Code test: other metrics")

to_print = ["- ACC: accuracy",
            "BACC: balanced accuracy",
            "precision: macro-averaged precision (equal weight to each class)",
            "recal: macro-averaged recall (equal weight to each class)",
            "Fbeta: macro-averaged F_beta score (equal weight to each class)",
            "MCC: Matthews correlation coefficient",
            "ROC-AUC mac: macro-averaged ROC-AUC (equal weight to each class)",
            "ROC-AUC wt: weighted-average ROC-AUC (larger class -> more weight)",
            "ROC-AUC wt*: weighted-average ROC-AUC (larger class -> *less weight)",            
            ]

instance.metric_dict1 = metric_dictionary(target=target1, 
                                          prediction=prediction1, 
                                          probabilities=probabilities1)

instance.metric_dict_a = metric_dictionary(target=target_a, 
                                          prediction=prediction_a, 
                                          probabilities=probabilities_a)

print("\n- ".join(to_print))

print("\n One image per lesion".upper())
display(pd.DataFrame(instance.metric_dict1))

print("\n Possibly more than one image per lesion".upper())
display(pd.DataFrame(instance.metric_dict_a))


CODE TEST: OTHER METRICS

- ACC: accuracy
- BACC: balanced accuracy
- precision: macro-averaged precision (equal weight to each class)
- recal: macro-averaged recall (equal weight to each class)
- Fbeta: macro-averaged F_beta score (equal weight to each class)
- MCC: Matthews correlation coefficient
- ROC-AUC mac: macro-averaged ROC-AUC (equal weight to each class)
- ROC-AUC wt: weighted-average ROC-AUC (larger class -> more weight)
- ROC-AUC wt*: weighted-average ROC-AUC (larger class -> *less weight)

 ONE IMAGE PER LESION


Unnamed: 0,ACC,BACC,precision,recall,F1/2,F1,F2,MCC,ROC-AUC mac,ROC-AUC wt,ROC-AUC wt*
0,0.214286,0.233333,0.216667,0.233333,0.155345,0.157143,0.186813,0.043853,0.616667,0.583333,0.616667



 POSSIBLY MORE THAN ONE IMAGE PER LESION


Unnamed: 0,ACC,BACC,precision,recall,F1/2,F1,F2,MCC,ROC-AUC mac,ROC-AUC wt,ROC-AUC wt*
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.291386,0.425,0.434524,0.416667


<a id='trivial_models'></a>
# Trivial models
↑↑ [Contents](#contents) ↑ [Evaluation](#evaluation) ↓ [Baseline models](#baseline_models)

In [4]:
from typing import Type, Union      # For type hints
from processing import process      # Custom module for processing metadata

data_dir: Path = path["images"]     # Path to directory containing metadata.csv file
csv_filename: str = "metadata.csv"  # The filename
    
tvr: int = 3              # Ratio of training set to validation set. See discussion below for explanation.
seed: int = 0             # Random seed for parts of the process where randomness is called for.
keep_first: bool = False  # If False, then, for each lesion, we choose a random image to assign to our training set. 
stratified: bool = True   # If True, we stratify classes so that the proportions remain as stable as possible after train/val split. 
                          # If False, the proportions will be roughly similar.

to_classify: Union[list, dict] = ["mel",   # These are the lesion types we are interested in classifying. 
                                  "bcc",   # Any missing ones will be grouped together as the 0-label class: no need to write "other" here.
                                  "akiec", # If 'other' is not desired, use restrict_to attribute above
                                  "nv",]   # Can also be a dictionary, like { 'malignant' : ['mel', 'bcc'], 'benign' : ['nv', 'bkl']}

In [5]:
# Create an instance of the process class with attribute values as above.
trivial = process(data_dir=data_dir,
               csv_filename=csv_filename,
               tvr=tvr,
               seed=seed,
               keep_first=keep_first,
               stratified=stratified,
               to_classify=to_classify,)

- Loaded file 'D:\projects\skin-lesion-classification\images\metadata.csv'.
- Inserted 'num_images' column in dataframe, to the right of 'lesion_id' column.
- Inserted 'label' column in dataframe, to the right of 'dx' column: 
  {'df': 0, 'bkl': 0, 'vasc': 0, 'nv': 1, 'akiec': 2, 'mel': 3, 'bcc': 4}
- Added 'set' column to dataframe, with values 't1', 'v1', 'ta', and 'va', to the right of 'localization' column.
- Basic, overall dataframe (pre-train/test split): self.df
- Training set (not balanced, all images per lesion): self.df_train
- Validation set (not expanded, one image per lesion): self.df_val1
- Validation set (not expanded, use all images of each lesion): self.df_val_a
- Small sample dataframes for code testing: self._df_train_code_test, self._df_val1_code_test, self._df_val_a_code_test


In [57]:
from utils import print_header
import pandas as pd
from multiclass_models import trivial_prediction, final_prediction
from evaluation import weighted_average_f, confusion_matrix_with_metric, metric_dictionary

instance = trivial

y_train = instance.df_train['label']
y_val1 = instance.df_val1['label']
y_val_a = instance.df_val_a['label']
label_codes = instance.label_codes

_, prediction1, probabilities1 = trivial_prediction(y_train, 
                                                    num_preds=y_val1.shape[0],
                                                    label_codes=label_codes,
                                                    pos_label='majority_class',)

_, prediction_a, probabilities_a = trivial_prediction(y_train, 
                                                      num_preds=y_val_a.shape[0],
                                                      label_codes=label_codes,
                                                      pos_label='majority_class',)

instance.df_probabilities_val1 = instance.df_val1.copy()
instance.df_probabilities_val_a = instance.df_val_a.copy()
for i, dx in label_codes.items():
    instance.df_probabilities_val1['prob_' + dx] = probabilities1[:,i]
    instance.df_probabilities_val_a['prob_' + dx] = probabilities_a[:,i]
    
raw_probabilities_df1 = instance.df_probabilities_val1 
raw_probabilities_df_a = instance.df_probabilities_val_a

instance.df_pred_val1 = final_prediction(raw_probabilities_df=raw_probabilities_df1, 
                                         label_codes=label_codes,)   

instance.df_pred_val_a = final_prediction(raw_probabilities_df=raw_probabilities_df_a, 
                                         label_codes=label_codes,)  

map_labels = label_codes

target1 = instance.df_pred_val1.drop_duplicates(subset='lesion_id')['label'] 
prediction1 = instance.df_pred_val1.drop_duplicates(subset='lesion_id')['pred_final'] 

target_a = instance.df_pred_val_a.drop_duplicates(subset='lesion_id')['label']  
prediction_a = instance.df_pred_val_a.drop_duplicates(subset='lesion_id')['pred_final']  

txp1 = pd.crosstab(target1,prediction1,margins=True,dropna=False)
txp_a = pd.crosstab(target_a,prediction_a,margins=True,dropna=False)

beta = 2
# Weights inversely proportional to relative class size in the training set, giving more importance to smaller classes.
weights = 1/instance.df_train['label'].value_counts(normalize=True).sort_index().values # None

instance.cm1 = confusion_matrix_with_metric(AxB=txp1,
                                            lst=None,
                                            full_pad=True,
                                            func=weighted_average_f,
                                            beta=beta,
                                            weights=weights,
                                            percentage=False,
                                            map_labels=map_labels)

instance.cm_a = confusion_matrix_with_metric(AxB=txp_a,
                                            lst=None,
                                            full_pad=True,
                                            func=weighted_average_f,
                                            beta=beta,
                                            weights=weights,
                                            percentage=False,
                                            map_labels=map_labels)

target1 = instance.df_pred_val1.drop_duplicates(subset='lesion_id')['label'] 
prediction1 = instance.df_pred_val1.drop_duplicates(subset='lesion_id')['pred_final'] 
probabilities1 = instance.df_probabilities_val1.drop_duplicates(subset='lesion_id').filter(regex=r'^prob_')
agg_probabilities1 = instance.df_pred_val1.drop_duplicates(subset='lesion_id').filter(regex=r'^prob_') 

target_a = instance.df_pred_val_a.drop_duplicates(subset='lesion_id')['label']  
prediction_a = instance.df_pred_val_a.drop_duplicates(subset='lesion_id')['pred_final']  
probabilities_a = instance.df_probabilities_val_a.drop_duplicates(subset='lesion_id').filter(regex=r'^prob_')
agg_probabilities_a = instance.df_pred_val_a.drop_duplicates(subset='lesion_id').filter(regex=r'^prob_') 

instance.metric_dict1 = metric_dictionary(target=target1, 
                                          prediction=prediction1, 
                                          probabilities=probabilities1)

instance.metric_dict_a = metric_dictionary(target=target_a, 
                                          prediction=prediction_a, 
                                          probabilities=probabilities_a)

print_header("Trivial prediction: all labels majority class")

print("\nOne image per lesion".upper())

display(instance.cm1.fillna('_'))
display(pd.DataFrame(instance.metric_dict1))

print("\nAll images per lesion".upper())
print("- This is the same as the one image per lesion case, because the validation sets are identical after dropping duplicates.")
print("- However, we include it here as a check.")
display(instance.cm_a.fillna('_'))
display(pd.DataFrame(instance.metric_dict_a))


TRIVIAL PREDICTION: ALL LABELS MAJORITY CLASS


 ONE IMAGE PER LESION


predicted,other,nv,akiec,mel,bcc,All,recall
actual,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
other,0,225.0,0,0,0,225,0.0
nv,0,1351.0,0,0,0,1351,1.0
akiec,0,57.0,0,0,0,57,0.0
mel,0,154.0,0,0,0,154,0.0
bcc,0,82.0,0,0,0,82,0.0
All,0,1869.0,0,0,0,1869,_
precision,_,0.722846,_,_,_,_,_


Unnamed: 0,ACC,BACC,precision,recall,F1/2,F1,F2,MCC,ROC-AUC mac,ROC-AUC wt,ROC-AUC wt*
0,0.722846,0.2,0.722846,0.2,0.153053,0.167826,0.185756,0.0,0.5,0.5,0.5



 ALL IMAGES PER LESION

This is the same as the one image per lesion case, because the validation sets are identical after dropping duplicates.

However, we include it here as a check.


predicted,other,nv,akiec,mel,bcc,All,recall
actual,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
other,0,225.0,0,0,0,225,0.0
nv,0,1351.0,0,0,0,1351,1.0
akiec,0,57.0,0,0,0,57,0.0
mel,0,154.0,0,0,0,154,0.0
bcc,0,82.0,0,0,0,82,0.0
All,0,1869.0,0,0,0,1869,_
precision,_,0.722846,_,_,_,_,_


Unnamed: 0,ACC,BACC,precision,recall,F1/2,F1,F2,MCC,ROC-AUC mac,ROC-AUC wt,ROC-AUC wt*
0,0.722846,0.2,0.722846,0.2,0.153053,0.167826,0.185756,0.0,0.5,0.5,0.5


In [58]:
from utils import print_header
import pandas as pd
from multiclass_models import trivial_prediction, final_prediction
from evaluation import weighted_average_f, confusion_matrix_with_metric, metric_dictionary

instance = trivial

y_train = instance.df_train['label']
y_val1 = instance.df_val1['label']
y_val_a = instance.df_val_a['label']
label_codes = instance.label_codes

_, prediction1, probabilities1 = trivial_prediction(y_train, 
                                                    num_preds=y_val1.shape[0],
                                                    label_codes=label_codes,
                                                    pos_label='minority_class',)

_, prediction_a, probabilities_a = trivial_prediction(y_train, 
                                                      num_preds=y_val_a.shape[0],
                                                      label_codes=label_codes,
                                                      pos_label='minority_class',)

instance.df_probabilities_val1 = instance.df_val1.copy()
instance.df_probabilities_val_a = instance.df_val_a.copy()
for i, dx in label_codes.items():
    instance.df_probabilities_val1['prob_' + dx] = probabilities1[:,i]
    instance.df_probabilities_val_a['prob_' + dx] = probabilities_a[:,i]
    
raw_probabilities_df1 = instance.df_probabilities_val1 
raw_probabilities_df_a = instance.df_probabilities_val_a

instance.df_pred_val1 = final_prediction(raw_probabilities_df=raw_probabilities_df1, 
                                         label_codes=label_codes,)   

instance.df_pred_val_a = final_prediction(raw_probabilities_df=raw_probabilities_df_a, 
                                         label_codes=label_codes,)  

map_labels = label_codes

target1 = instance.df_pred_val1.drop_duplicates(subset='lesion_id')['label'] 
prediction1 = instance.df_pred_val1.drop_duplicates(subset='lesion_id')['pred_final'] 

target_a = instance.df_pred_val_a.drop_duplicates(subset='lesion_id')['label']  
prediction_a = instance.df_pred_val_a.drop_duplicates(subset='lesion_id')['pred_final']  

txp1 = pd.crosstab(target1,prediction1,margins=True,dropna=False)
txp_a = pd.crosstab(target_a,prediction_a,margins=True,dropna=False)

beta = 2
# Weights inversely proportional to relative class size in the training set, giving more importance to smaller classes.
weights = 1/instance.df_train['label'].value_counts(normalize=True).sort_index().values # None

instance.cm1 = confusion_matrix_with_metric(AxB=txp1,
                                            lst=None,
                                            full_pad=True,
                                            func=weighted_average_f,
                                            beta=beta,
                                            weights=weights,
                                            percentage=False,
                                            map_labels=map_labels)

instance.cm_a = confusion_matrix_with_metric(AxB=txp_a,
                                            lst=None,
                                            full_pad=True,
                                            func=weighted_average_f,
                                            beta=beta,
                                            weights=weights,
                                            percentage=False,
                                            map_labels=map_labels)

target1 = instance.df_pred_val1.drop_duplicates(subset='lesion_id')['label'] 
prediction1 = instance.df_pred_val1.drop_duplicates(subset='lesion_id')['pred_final'] 
probabilities1 = instance.df_probabilities_val1.drop_duplicates(subset='lesion_id').filter(regex=r'^prob_')
agg_probabilities1 = instance.df_pred_val1.drop_duplicates(subset='lesion_id').filter(regex=r'^prob_') 

target_a = instance.df_pred_val_a.drop_duplicates(subset='lesion_id')['label']  
prediction_a = instance.df_pred_val_a.drop_duplicates(subset='lesion_id')['pred_final']  
probabilities_a = instance.df_probabilities_val_a.drop_duplicates(subset='lesion_id').filter(regex=r'^prob_')
agg_probabilities_a = instance.df_pred_val_a.drop_duplicates(subset='lesion_id').filter(regex=r'^prob_') 

instance.metric_dict1 = metric_dictionary(target=target1, 
                                          prediction=prediction1, 
                                          probabilities=probabilities1)

instance.metric_dict_a = metric_dictionary(target=target_a, 
                                          prediction=prediction_a, 
                                          probabilities=probabilities_a)

print_header("Trivial prediction: all labels minority class")

print("\nOne image per lesion".upper())

display(instance.cm1.fillna('_'))
display(pd.DataFrame(instance.metric_dict1))

print("\nAll images per lesion".upper())
print("- This is the same as the one image per lesion case, because the validation sets are identical after dropping duplicates.")
print("- However, we include it here as a check.")
display(instance.cm_a.fillna('_'))
display(pd.DataFrame(instance.metric_dict_a))


TRIVIAL PREDICTION: ALL LABELS MINORITY CLASS


ONE IMAGE PER LESION


predicted,other,nv,akiec,mel,bcc,All,recall
actual,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
other,0,0,225.0,0,0,225,0.0
nv,0,0,1351.0,0,0,1351,0.0
akiec,0,0,57.0,0,0,57,1.0
mel,0,0,154.0,0,0,154,0.0
bcc,0,0,82.0,0,0,82,0.0
All,0,0,1869.0,0,0,1869,_
precision,_,_,0.030498,_,_,_,_


Unnamed: 0,ACC,BACC,precision,recall,F1/2,F1,F2,MCC,ROC-AUC mac,ROC-AUC wt,ROC-AUC wt*
0,0.030498,0.2,0.030498,0.2,0.007567,0.011838,0.027182,0.0,0.5,0.5,0.5



ALL IMAGES PER LESION
- This is the same as the one image per lesion case, because the validation sets are identical after dropping duplicates.
- However, we include it here as a check.


predicted,other,nv,akiec,mel,bcc,All,recall
actual,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
other,0,0,225.0,0,0,225,0.0
nv,0,0,1351.0,0,0,1351,0.0
akiec,0,0,57.0,0,0,57,1.0
mel,0,0,154.0,0,0,154,0.0
bcc,0,0,82.0,0,0,82,0.0
All,0,0,1869.0,0,0,1869,_
precision,_,_,0.030498,_,_,_,_


Unnamed: 0,ACC,BACC,precision,recall,F1/2,F1,F2,MCC,ROC-AUC mac,ROC-AUC wt,ROC-AUC wt*
0,0.030498,0.2,0.030498,0.2,0.007567,0.011838,0.027182,0.0,0.5,0.5,0.5


In [60]:
from utils import print_header
import pandas as pd
from multiclass_models import trivial_prediction, final_prediction
from evaluation import weighted_average_f, confusion_matrix_with_metric, metric_dictionary

instance = trivial

y_train = instance.df_train['label']
y_val1 = instance.df_val1['label']
y_val_a = instance.df_val_a['label']
label_codes = instance.label_codes

_, prediction1, probabilities1 = trivial_prediction(y_train, 
                                                    num_preds=y_val1.shape[0],
                                                    label_codes=label_codes,
                                                    pos_label='mel',)

_, prediction_a, probabilities_a = trivial_prediction(y_train, 
                                                      num_preds=y_val_a.shape[0],
                                                      label_codes=label_codes,
                                                      pos_label='mel',)

instance.df_probabilities_val1 = instance.df_val1.copy()
instance.df_probabilities_val_a = instance.df_val_a.copy()
for i, dx in label_codes.items():
    instance.df_probabilities_val1['prob_' + dx] = probabilities1[:,i]
    instance.df_probabilities_val_a['prob_' + dx] = probabilities_a[:,i]
    
raw_probabilities_df1 = instance.df_probabilities_val1 
raw_probabilities_df_a = instance.df_probabilities_val_a

instance.df_pred_val1 = final_prediction(raw_probabilities_df=raw_probabilities_df1, 
                                         label_codes=label_codes,)   

instance.df_pred_val_a = final_prediction(raw_probabilities_df=raw_probabilities_df_a, 
                                         label_codes=label_codes,)  

map_labels = label_codes

target1 = instance.df_pred_val1.drop_duplicates(subset='lesion_id')['label'] 
prediction1 = instance.df_pred_val1.drop_duplicates(subset='lesion_id')['pred_final'] 

target_a = instance.df_pred_val_a.drop_duplicates(subset='lesion_id')['label']  
prediction_a = instance.df_pred_val_a.drop_duplicates(subset='lesion_id')['pred_final']  

txp1 = pd.crosstab(target1,prediction1,margins=True,dropna=False)
txp_a = pd.crosstab(target_a,prediction_a,margins=True,dropna=False)

beta = 2
# Weights inversely proportional to relative class size in the training set, giving more importance to smaller classes.
weights = 1/instance.df_train['label'].value_counts(normalize=True).sort_index().values # None

instance.cm1 = confusion_matrix_with_metric(AxB=txp1,
                                            lst=None,
                                            full_pad=True,
                                            func=weighted_average_f,
                                            beta=beta,
                                            weights=weights,
                                            percentage=False,
                                            map_labels=map_labels)

instance.cm_a = confusion_matrix_with_metric(AxB=txp_a,
                                            lst=None,
                                            full_pad=True,
                                            func=weighted_average_f,
                                            beta=beta,
                                            weights=weights,
                                            percentage=False,
                                            map_labels=map_labels)

target1 = instance.df_pred_val1.drop_duplicates(subset='lesion_id')['label'] 
prediction1 = instance.df_pred_val1.drop_duplicates(subset='lesion_id')['pred_final'] 
probabilities1 = instance.df_probabilities_val1.drop_duplicates(subset='lesion_id').filter(regex=r'^prob_')
agg_probabilities1 = instance.df_pred_val1.drop_duplicates(subset='lesion_id').filter(regex=r'^prob_') 

target_a = instance.df_pred_val_a.drop_duplicates(subset='lesion_id')['label']  
prediction_a = instance.df_pred_val_a.drop_duplicates(subset='lesion_id')['pred_final']  
probabilities_a = instance.df_probabilities_val_a.drop_duplicates(subset='lesion_id').filter(regex=r'^prob_')
agg_probabilities_a = instance.df_pred_val_a.drop_duplicates(subset='lesion_id').filter(regex=r'^prob_') 

instance.metric_dict1 = metric_dictionary(target=target1, 
                                          prediction=prediction1, 
                                          probabilities=probabilities1)

instance.metric_dict_a = metric_dictionary(target=target_a, 
                                          prediction=prediction_a, 
                                          probabilities=probabilities_a)

print_header("Trivial prediction: all labels mel")

print("\nOne image per lesion".upper())

display(instance.cm1.fillna('_'))
display(pd.DataFrame(instance.metric_dict1))

print("\nAll images per lesion".upper())
print("- This is the same as the one image per lesion case, because the validation sets are identical after dropping duplicates.")
print("- However, we include it here as a check.")
display(instance.cm_a.fillna('_'))
display(pd.DataFrame(instance.metric_dict_a))


TRIVIAL PREDICTION: ALL LABELS MEL


ONE IMAGE PER LESION


predicted,other,nv,akiec,mel,bcc,All,recall
actual,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
other,0,0,0,225.0,0,225,0.0
nv,0,0,0,1351.0,0,1351,0.0
akiec,0,0,0,57.0,0,57,0.0
mel,0,0,0,154.0,0,154,1.0
bcc,0,0,0,82.0,0,82,0.0
All,0,0,0,1869.0,0,1869,_
precision,_,_,_,0.082397,_,_,_


Unnamed: 0,ACC,BACC,precision,recall,F1/2,F1,F2,MCC,ROC-AUC mac,ROC-AUC wt,ROC-AUC wt*
0,0.082397,0.2,0.082397,0.2,0.020183,0.03045,0.061972,0.0,0.5,0.5,0.5



ALL IMAGES PER LESION
- This is the same as the one image per lesion case, because the validation sets are identical after dropping duplicates.
- However, we include it here as a check.


predicted,other,nv,akiec,mel,bcc,All,recall
actual,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
other,0,0,0,225.0,0,225,0.0
nv,0,0,0,1351.0,0,1351,0.0
akiec,0,0,0,57.0,0,57,0.0
mel,0,0,0,154.0,0,154,1.0
bcc,0,0,0,82.0,0,82,0.0
All,0,0,0,1869.0,0,1869,_
precision,_,_,_,0.082397,_,_,_


Unnamed: 0,ACC,BACC,precision,recall,F1/2,F1,F2,MCC,ROC-AUC mac,ROC-AUC wt,ROC-AUC wt*
0,0.082397,0.2,0.082397,0.2,0.020183,0.03045,0.061972,0.0,0.5,0.5,0.5


In [61]:
from utils import print_header
import pandas as pd
from multiclass_models import trivial_prediction, final_prediction
from evaluation import weighted_average_f, confusion_matrix_with_metric, metric_dictionary

instance = trivial

y_train = instance.df_train['label']
y_val1 = instance.df_val1['label']
y_val_a = instance.df_val_a['label']
label_codes = instance.label_codes

_, prediction1, probabilities1 = trivial_prediction(y_train, 
                                                    num_preds=y_val1.shape[0],
                                                    label_codes=label_codes,
                                                    pos_label='bcc',)

_, prediction_a, probabilities_a = trivial_prediction(y_train, 
                                                      num_preds=y_val_a.shape[0],
                                                      label_codes=label_codes,
                                                      pos_label='bcc',)

instance.df_probabilities_val1 = instance.df_val1.copy()
instance.df_probabilities_val_a = instance.df_val_a.copy()
for i, dx in label_codes.items():
    instance.df_probabilities_val1['prob_' + dx] = probabilities1[:,i]
    instance.df_probabilities_val_a['prob_' + dx] = probabilities_a[:,i]
    
raw_probabilities_df1 = instance.df_probabilities_val1 
raw_probabilities_df_a = instance.df_probabilities_val_a

instance.df_pred_val1 = final_prediction(raw_probabilities_df=raw_probabilities_df1, 
                                         label_codes=label_codes,)   

instance.df_pred_val_a = final_prediction(raw_probabilities_df=raw_probabilities_df_a, 
                                         label_codes=label_codes,)  

map_labels = label_codes

target1 = instance.df_pred_val1.drop_duplicates(subset='lesion_id')['label'] 
prediction1 = instance.df_pred_val1.drop_duplicates(subset='lesion_id')['pred_final'] 

target_a = instance.df_pred_val_a.drop_duplicates(subset='lesion_id')['label']  
prediction_a = instance.df_pred_val_a.drop_duplicates(subset='lesion_id')['pred_final']  

txp1 = pd.crosstab(target1,prediction1,margins=True,dropna=False)
txp_a = pd.crosstab(target_a,prediction_a,margins=True,dropna=False)

beta = 2
# Weights inversely proportional to relative class size in the training set, giving more importance to smaller classes.
weights = 1/instance.df_train['label'].value_counts(normalize=True).sort_index().values # None

instance.cm1 = confusion_matrix_with_metric(AxB=txp1,
                                            lst=None,
                                            full_pad=True,
                                            func=weighted_average_f,
                                            beta=beta,
                                            weights=weights,
                                            percentage=False,
                                            map_labels=map_labels)

instance.cm_a = confusion_matrix_with_metric(AxB=txp_a,
                                            lst=None,
                                            full_pad=True,
                                            func=weighted_average_f,
                                            beta=beta,
                                            weights=weights,
                                            percentage=False,
                                            map_labels=map_labels)

target1 = instance.df_pred_val1.drop_duplicates(subset='lesion_id')['label'] 
prediction1 = instance.df_pred_val1.drop_duplicates(subset='lesion_id')['pred_final'] 
probabilities1 = instance.df_probabilities_val1.drop_duplicates(subset='lesion_id').filter(regex=r'^prob_')
agg_probabilities1 = instance.df_pred_val1.drop_duplicates(subset='lesion_id').filter(regex=r'^prob_') 

target_a = instance.df_pred_val_a.drop_duplicates(subset='lesion_id')['label']  
prediction_a = instance.df_pred_val_a.drop_duplicates(subset='lesion_id')['pred_final']  
probabilities_a = instance.df_probabilities_val_a.drop_duplicates(subset='lesion_id').filter(regex=r'^prob_')
agg_probabilities_a = instance.df_pred_val_a.drop_duplicates(subset='lesion_id').filter(regex=r'^prob_') 

instance.metric_dict1 = metric_dictionary(target=target1, 
                                          prediction=prediction1, 
                                          probabilities=probabilities1)

instance.metric_dict_a = metric_dictionary(target=target_a, 
                                          prediction=prediction_a, 
                                          probabilities=probabilities_a)

print_header("Trivial prediction: all labels bcc")

print("\nOne image per lesion".upper())

display(instance.cm1.fillna('_'))
display(pd.DataFrame(instance.metric_dict1))

print("\nAll images per lesion".upper())
print("- This is the same as the one image per lesion case, because the validation sets are identical after dropping duplicates.")
print("- However, we include it here as a check.")
display(instance.cm_a.fillna('_'))
display(pd.DataFrame(instance.metric_dict_a))


TRIVIAL PREDICTION: ALL LABELS BCC


ONE IMAGE PER LESION


predicted,other,nv,akiec,mel,bcc,All,recall
actual,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
other,0,0,0,0,225.0,225,0.0
nv,0,0,0,0,1351.0,1351,0.0
akiec,0,0,0,0,57.0,57,0.0
mel,0,0,0,0,154.0,154,0.0
bcc,0,0,0,0,82.0,82,1.0
All,0,0,0,0,1869.0,1869,_
precision,_,_,_,_,0.043874,_,_


Unnamed: 0,ACC,BACC,precision,recall,F1/2,F1,F2,MCC,ROC-AUC mac,ROC-AUC wt,ROC-AUC wt*
0,0.043874,0.2,0.043874,0.2,0.010849,0.016812,0.037324,0.0,0.5,0.5,0.5



ALL IMAGES PER LESION
- This is the same as the one image per lesion case, because the validation sets are identical after dropping duplicates.
- However, we include it here as a check.


predicted,other,nv,akiec,mel,bcc,All,recall
actual,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
other,0,0,0,0,225.0,225,0.0
nv,0,0,0,0,1351.0,1351,0.0
akiec,0,0,0,0,57.0,57,0.0
mel,0,0,0,0,154.0,154,0.0
bcc,0,0,0,0,82.0,82,1.0
All,0,0,0,0,1869.0,1869,_
precision,_,_,_,_,0.043874,_,_


Unnamed: 0,ACC,BACC,precision,recall,F1/2,F1,F2,MCC,ROC-AUC mac,ROC-AUC wt,ROC-AUC wt*
0,0.043874,0.2,0.043874,0.2,0.010849,0.016812,0.037324,0.0,0.5,0.5,0.5


In [62]:
from utils import print_header
import pandas as pd
from multiclass_models import trivial_prediction, final_prediction
from evaluation import weighted_average_f, confusion_matrix_with_metric, metric_dictionary

instance = trivial

y_train = instance.df_train['label']
y_val1 = instance.df_val1['label']
y_val_a = instance.df_val_a['label']
label_codes = instance.label_codes

_, prediction1, probabilities1 = trivial_prediction(y_train, 
                                                    num_preds=y_val1.shape[0],
                                                    label_codes=label_codes,
                                                    pos_label='other',)

_, prediction_a, probabilities_a = trivial_prediction(y_train, 
                                                      num_preds=y_val_a.shape[0],
                                                      label_codes=label_codes,
                                                      pos_label='other',)

instance.df_probabilities_val1 = instance.df_val1.copy()
instance.df_probabilities_val_a = instance.df_val_a.copy()
for i, dx in label_codes.items():
    instance.df_probabilities_val1['prob_' + dx] = probabilities1[:,i]
    instance.df_probabilities_val_a['prob_' + dx] = probabilities_a[:,i]
    
raw_probabilities_df1 = instance.df_probabilities_val1 
raw_probabilities_df_a = instance.df_probabilities_val_a

instance.df_pred_val1 = final_prediction(raw_probabilities_df=raw_probabilities_df1, 
                                         label_codes=label_codes,)   

instance.df_pred_val_a = final_prediction(raw_probabilities_df=raw_probabilities_df_a, 
                                         label_codes=label_codes,)  

map_labels = label_codes

target1 = instance.df_pred_val1.drop_duplicates(subset='lesion_id')['label'] 
prediction1 = instance.df_pred_val1.drop_duplicates(subset='lesion_id')['pred_final'] 

target_a = instance.df_pred_val_a.drop_duplicates(subset='lesion_id')['label']  
prediction_a = instance.df_pred_val_a.drop_duplicates(subset='lesion_id')['pred_final']  

txp1 = pd.crosstab(target1,prediction1,margins=True,dropna=False)
txp_a = pd.crosstab(target_a,prediction_a,margins=True,dropna=False)

beta = 2
# Weights inversely proportional to relative class size in the training set, giving more importance to smaller classes.
weights = 1/instance.df_train['label'].value_counts(normalize=True).sort_index().values # None

instance.cm1 = confusion_matrix_with_metric(AxB=txp1,
                                            lst=None,
                                            full_pad=True,
                                            func=weighted_average_f,
                                            beta=beta,
                                            weights=weights,
                                            percentage=False,
                                            map_labels=map_labels)

instance.cm_a = confusion_matrix_with_metric(AxB=txp_a,
                                            lst=None,
                                            full_pad=True,
                                            func=weighted_average_f,
                                            beta=beta,
                                            weights=weights,
                                            percentage=False,
                                            map_labels=map_labels)

target1 = instance.df_pred_val1.drop_duplicates(subset='lesion_id')['label'] 
prediction1 = instance.df_pred_val1.drop_duplicates(subset='lesion_id')['pred_final'] 
probabilities1 = instance.df_probabilities_val1.drop_duplicates(subset='lesion_id').filter(regex=r'^prob_')
agg_probabilities1 = instance.df_pred_val1.drop_duplicates(subset='lesion_id').filter(regex=r'^prob_') 

target_a = instance.df_pred_val_a.drop_duplicates(subset='lesion_id')['label']  
prediction_a = instance.df_pred_val_a.drop_duplicates(subset='lesion_id')['pred_final']  
probabilities_a = instance.df_probabilities_val_a.drop_duplicates(subset='lesion_id').filter(regex=r'^prob_')
agg_probabilities_a = instance.df_pred_val_a.drop_duplicates(subset='lesion_id').filter(regex=r'^prob_') 

instance.metric_dict1 = metric_dictionary(target=target1, 
                                          prediction=prediction1, 
                                          probabilities=probabilities1)

instance.metric_dict_a = metric_dictionary(target=target_a, 
                                          prediction=prediction_a, 
                                          probabilities=probabilities_a)

print_header("Trivial prediction: all labels other")

print("\nOne image per lesion".upper())

display(instance.cm1.fillna('_'))
display(pd.DataFrame(instance.metric_dict1))

print("\nAll images per lesion".upper())
print("- This is the same as the one image per lesion case, because the validation sets are identical after dropping duplicates.")
print("- However, we include it here as a check.")
display(instance.cm_a.fillna('_'))
display(pd.DataFrame(instance.metric_dict_a))


TRIVIAL PREDICTION: ALL LABELS OTHER


ONE IMAGE PER LESION


predicted,other,nv,akiec,mel,bcc,All,recall
actual,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
other,225.0,0,0,0,0,225,1.0
nv,1351.0,0,0,0,0,1351,0.0
akiec,57.0,0,0,0,0,57,0.0
mel,154.0,0,0,0,0,154,0.0
bcc,82.0,0,0,0,0,82,0.0
All,1869.0,0,0,0,0,1869,_
precision,0.120385,_,_,_,_,_,_


Unnamed: 0,ACC,BACC,precision,recall,F1/2,F1,F2,MCC,ROC-AUC mac,ROC-AUC wt,ROC-AUC wt*
0,0.120385,0.2,0.120385,0.2,0.029217,0.04298,0.081257,0.0,0.5,0.5,0.5



ALL IMAGES PER LESION
- This is the same as the one image per lesion case, because the validation sets are identical after dropping duplicates.
- However, we include it here as a check.


predicted,other,nv,akiec,mel,bcc,All,recall
actual,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
other,225.0,0,0,0,0,225,1.0
nv,1351.0,0,0,0,0,1351,0.0
akiec,57.0,0,0,0,0,57,0.0
mel,154.0,0,0,0,0,154,0.0
bcc,82.0,0,0,0,0,82,0.0
All,1869.0,0,0,0,0,1869,_
precision,0.120385,_,_,_,_,_,_


Unnamed: 0,ACC,BACC,precision,recall,F1/2,F1,F2,MCC,ROC-AUC mac,ROC-AUC wt,ROC-AUC wt*
0,0.120385,0.2,0.120385,0.2,0.029217,0.04298,0.081257,0.0,0.5,0.5,0.5


<a id='baseline_models'></a>
# Baseline models
↑↑ [Contents](#contents) ↑ [Trivial models](#trivial_models) ↓ [Models with balancing](#models_with)

In [84]:
from typing import Type, Union      # For type hints
from processing import process      # Custom module for processing metadata

data_dir: Path = path["images"]     # Path to directory containing metadata.csv file
csv_filename: str = "metadata.csv"  # The filename
    
tvr: int = 3              # Ratio of training set to validation set. See discussion below for explanation.
seed: int = 0             # Random seed for parts of the process where randomness is called for.
keep_first: bool = False  # If False, then, for each lesion, we choose a random image to assign to our training set. 
stratified: bool = True   # If True, we stratify classes so that the proportions remain as stable as possible after train/val split. 
                          # If False, the proportions will be roughly similar.

to_classify: Union[list, dict] = ["mel",   # These are the lesion types we are interested in classifying. 
                                  "bcc",   # Any missing ones will be grouped together as the 0-label class: no need to write "other" here.
                                  "akiec", # If 'other' is not desired, use restrict_to attribute above
                                  "nv",]   # Can also be a dictionary, like { 'malignant' : ['mel', 'bcc'], 'benign' : ['nv', 'bkl']}
    
train_one_img_per_lesion: Union[None, bool] = True

In [90]:
# Create an instance of the process class with attribute values as above.
rn18_defaults = process(data_dir=data_dir,
                        csv_filename=csv_filename,
                        tvr=tvr,
                        seed=seed,
                        keep_first=keep_first,
                        stratified=stratified,
                        to_classify=to_classify,
                        train_one_img_per_lesion=train_one_img_per_lesion,)

- Loaded file 'D:\projects\skin-lesion-classification\images\metadata.csv'.
- Inserted 'num_images' column in dataframe, to the right of 'lesion_id' column.
- Inserted 'label' column in dataframe, to the right of 'dx' column: 
  {'df': 0, 'bkl': 0, 'vasc': 0, 'nv': 1, 'akiec': 2, 'mel': 3, 'bcc': 4}
- Added 'set' column to dataframe, with values 't1', 'v1', 'ta', and 'va', to the right of 'localization' column.
- Basic, overall dataframe (pre-train/test split): self.df
- Training set (not balanced, one image per lesion): self.df_train
- Validation set (not expanded, one image per lesion): self.df_val1
- Validation set (not expanded, use all images of each lesion): self.df_val_a
- Small sample dataframes for code testing: self._df_train_code_test, self._df_val1_code_test, self._df_val_a_code_test


In [91]:
import pandas as pd
from typing import Union, List, Callable
import torchvision.models as models
import torchvision.transforms as transforms

source: Union[process, pd.DataFrame] = rn18_defaults  # Processed data to be fed into model for training.
                                                      # Must either be an instance of the process class, or a dataframe of the same format as source.df if source were an instance of the process class.
model_dir: Path = path["models"]                      # Path to directory where models/model info/model results are stored.

transform: Union[None, 
                 transforms.Compose, 
                 List[Callable]] = None          # Transform to be applied to images before feeding into neural network.
    
filename_stem: Union[None, str] = "rn18"         # For saving model and related files. Default "rn18" (if ResNet model) or "EffNet" (if EfficientNet), or "cnn".
filename_suffix: Union[None, str] = "defaults"   # Something descriptive and unique for future reference. Default empty string "".

# model: Union[None, models.ResNet, models.EfficientNet] = models.efficientnet_b0(weights=models.EfficientNet_B0_Weights.DEFAULT) # Pre-trained model. Default: ResNet18.   
model: Union[None, models.ResNet, models.EfficientNet] = models.resnet18(weights="ResNet18_Weights.DEFAULT") 

In [92]:
# Create an instance of the resnet18 class with attribute values as above.
from multiclass_models import cnn

rn18_defaults = cnn(source=source,                                           
                    model_dir=model_dir,
                    transform=transform,
                    filename_stem=filename_stem,
                    filename_suffix=filename_suffix,                         
                    model=model,)

New files will be created. 
Base filename: rn18_t1_10e_defaults_00
Attributes saved to file: D:\projects\skin-lesion-classification\models\rn18_t1_10e_defaults_00_attributes.json


In [93]:
# from utils import print_header
import torch
import torch.nn as nn

instance = rn18_defaults

model = models.resnet18()  
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, len(instance.label_codes))

# model = models.efficientnet_b0()  
# num_ftrs = model.classifier[1].in_features
# model.classifier[1] = nn.Linear(num_ftrs, len(instance.label_codes))

instance.model = model

In [None]:
# from utils import print_header
from multiclass_models import get_probabilities

instance = rn18_defaults

instance.df_probabilities_val1 = get_probabilities(df=instance.df_val1,
                                                   data_dir=instance.data_dir,
                                                   model_dir=instance.model_dir,
                                                   model=instance.model,
                                                   filename=instance._filename,
                                                   label_codes=instance.label_codes,
                                                   transform=instance.transform,
                                                   batch_size=instance.batch_size,
                                                   Print=False,
                                                   save_as=instance._filename + "_val1",)

instance.df_probabilities_val_a = get_probabilities(df=instance.df_val_a,
                                                    data_dir=instance.data_dir,
                                                    model_dir=instance.model_dir,
                                                    model=instance.model,
                                                    filename=instance._filename,
                                                    label_codes=instance.label_codes,
                                                    transform=instance.transform,
                                                    batch_size=instance.batch_size,
                                                    Print=False,
                                                    save_as=instance._filename + "_val_a",)

In [None]:
print_header("...")
display_columns = ['lesion_id', 'image_id', 'dx'] + [col for col in instance.df_probabilities_val1.columns if col.startswith('prob')]
display(instance.df_probabilities_val1[display_columns].head())

print_header("...")
display(instance.df_probabilities_val_a[display_columns].head())

<a id='models_with'></a>
# Models with balancing
↑↑ [Contents](#contents) ↑ [Baseline models](#baseline_models) 