# Setup

In [1]:
import os
from pathlib import Path
import sys

# If we're using Google Colab, we set the environment variable to point to the relevant folder in our Google Drive:
if 'COLAB_GPU' in os.environ:
    from google.colab import drive
    drive.mount('/content/drive')
    os.environ['SKIN_LESION_CLASSIFICATION'] = '/content/drive/MyDrive/Colab Notebooks/skin-lesion-classification'

# Otherwise, we use the environment variable on our local system:
project_environment_variable = "SKIN_LESION_CLASSIFICATION"

# Path to the root directory of the project:
project_path = Path(os.environ.get(project_environment_variable))

# Relative path to /scripts (from where custom modules will be imported):
scripts_path = project_path.joinpath("scripts")

# Add this path to sys.path so that Python will look there for modules:
sys.path.append(str(scripts_path))

# Now import path_step from our custom utils module to create a dictionary to all subdirectories in our root directory:
from utils import path_setup
path = path_setup.subfolders(project_path)

Mounted at /content/drive
path['project'] : /content/drive/MyDrive/Colab Notebooks/skin-lesion-classification
path['models'] : /content/drive/MyDrive/Colab Notebooks/skin-lesion-classification/models
path['notebooks'] : /content/drive/MyDrive/Colab Notebooks/skin-lesion-classification/notebooks
path['scripts'] : /content/drive/MyDrive/Colab Notebooks/skin-lesion-classification/scripts
path['images'] : /content/drive/MyDrive/Colab Notebooks/skin-lesion-classification/images


<a id='model_setup'></a>
# Model setup

In [2]:
from typing import Type, Union      # For type hints
from processing import process      # Custom module for processing metadata

data_dir: Path = path["images"]     # Path to directory containing metadata.csv file
csv_filename: str = "metadata.csv"  # The filename

tvr: int = 3              # Ratio of training set to validation set. See discussion below for explanation.
seed: int = 0             # Random seed for parts of the process where randomness is called for.
keep_first: bool = False  # If False, then, for each lesion, we choose a random image to assign to our training set.
stratified: bool = True   # If True, we stratify classes so that the proportions remain as stable as possible after train/val split.
                          # If False, the proportions will be roughly similar.

to_classify: Union[list, dict] = ["mel",   # These are the lesion types we are interested in classifying.
                                  "bcc",   # Any missing ones will be grouped together as the 0-label class: no need to write "other" here.
                                  "akiec", # If 'other' is not desired, use restrict_to attribute above
                                  "nv",]   # Can also be a dictionary, like { 'malignant' : ['mel', 'bcc'], 'benign' : ['nv', 'bkl']}

train_one_img_per_lesion: Union[None, bool] = True

In [3]:
# Create an instance of the process class with attribute values as above.
base_t1 = process(data_dir=data_dir,
                  csv_filename=csv_filename,
                  tvr=tvr,
                  seed=seed,
                  keep_first=keep_first,
                  stratified=stratified,
                  to_classify=to_classify,
                  train_one_img_per_lesion=train_one_img_per_lesion,)

- Loaded file '/content/drive/MyDrive/Colab Notebooks/skin-lesion-classification/images/metadata.csv'.
- Inserted 'num_images' column in dataframe, to the right of 'lesion_id' column.
- Inserted 'label' column in dataframe, to the right of 'dx' column: 
  {'bkl': 0, 'vasc': 0, 'df': 0, 'nv': 1, 'bcc': 2, 'mel': 3, 'akiec': 4}
- Added 'set' column to dataframe, with values 't1', 'v1', 'ta', and 'va', to the right of 'localization' column.
- Basic, overall dataframe (pre-train/test split): self.df
- Training set (not balanced, one image per lesion): self.df_train
- Validation set (not expanded, one image per lesion): self.df_val1
- Validation set (not expanded, use all images of each lesion): self.df_val_a
- Small sample dataframes for code testing: self._df_train_code_test, self._df_val1_code_test, self._df_val_a_code_test


In [5]:
import torchvision.transforms as transforms
transform = transforms.Compose([
transforms.Resize((224,224)), # Resize images to fit ResNet input size
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # Normalize with ImageNet stats
])

In [8]:
import pandas as pd
from typing import Union, List, Callable
import torchvision.models as models

source: Union[process, pd.DataFrame] = base_t1        # Processed data to be fed into model for training.
                                                      # Must either be an instance of the process class, or a dataframe of the same format as source.df if source were an instance of the process class.
model_dir: Path = path["models"]                      # Path to directory where models/model info/model results are stored.

transform: Union[None,
                 transforms.Compose,
                 List[Callable]] = transform     # Transform to be applied to images before feeding into neural network.

filename_stem: Union[None, str] = "rn18"         # For saving model and related files. Default "rn18" (if ResNet model) or "EffNet" (if EfficientNet), or "cnn".
filename_suffix: Union[None, str] = "base"   # Something descriptive and unique for future reference. Default empty string "".

# model: Union[None, models.ResNet, models.EfficientNet] = models.efficientnet_b0(weights=models.EfficientNet_B0_Weights.DEFAULT) # Pre-trained model. Default: ResNet18.
model: Union[None, models.ResNet, models.EfficientNet] = models.resnet18(weights="ResNet18_Weights.DEFAULT")
overwrite: bool = True

In [9]:
# Create an instance of the resnet18 class with attribute values as above.
from multiclass_models import cnn

rn18_base_t1 = cnn(source=source,
                    model_dir=model_dir,
                    transform=transform,
                    filename_stem=filename_stem,
                    filename_suffix=filename_suffix,
                    model=model,
                    overwrite=overwrite)

Existing files will be overwritten. 
Base filename: rn18_t1_10e_base_00
Attributes saved to file: /content/drive/MyDrive/Colab Notebooks/skin-lesion-classification/models/rn18_t1_10e_base_00_attributes.json


In [None]:
# rn18_base_t1.train()

Epoch 1/10, Training Loss: 0.7301, Validation Loss 1: 0.6909, Validation Loss a: 0.8033
Epoch 2/10, Training Loss: 0.5956, Validation Loss 1: 0.7271, Validation Loss a: 0.8400
Epoch 3/10, Training Loss: 0.5618, Validation Loss 1: 0.7750, Validation Loss a: 0.9342
Epoch 4/10, Training Loss: 0.5287, Validation Loss 1: 0.6612, Validation Loss a: 0.7672
Epoch 5/10, Training Loss: 0.5126, Validation Loss 1: 0.6427, Validation Loss a: 0.7499
Epoch 6/10, Training Loss: 0.4898, Validation Loss 1: 0.5901, Validation Loss a: 0.7056
Epoch 7/10, Training Loss: 0.4918, Validation Loss 1: 0.7818, Validation Loss a: 0.8400
Epoch 8/10, Training Loss: 0.4406, Validation Loss 1: 0.7234, Validation Loss a: 0.8485
Epoch 9/10, Training Loss: 0.3782, Validation Loss 1: 0.6184, Validation Loss a: 0.7478
Epoch 10/10, Training Loss: 0.3489, Validation Loss 1: 0.6339, Validation Loss a: 0.7746
Saving model.state_dict() as /content/drive/MyDrive/Colab Notebooks/skin-lesion-classification/models/rn18_t1_10e_base_

In [12]:
from utils import print_header
import torch
import torch.nn as nn

instance = rn18_base_t1

model = models.resnet18()
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, len(instance.label_codes))

file_path_pth = instance.model_dir.joinpath(instance._filename + ".pth")
state_dict = torch.load(file_path_pth)
model.load_state_dict(state_dict)

# model = models.efficientnet_b0()
# num_ftrs = model.classifier[1].in_features
# model.classifier[1] = nn.Linear(num_ftrs, len(instance.label_codes))

instance.model = model
instance.state_dict = state_dict

print_header("Model architecture")
print(f"Note: \'out_features = {len(instance.label_codes)}\' at the end".upper())
display(instance.model)


MODEL ARCHITECTURE

NOTE: 'OUT_FEATURES = 5' AT THE END


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [13]:
# from utils import print_header
from multiclass_models import get_probabilities

instance = rn18_base_t1

instance.df_probabilities_val1 = get_probabilities(df=instance.df_val1,
                                                   data_dir=instance.data_dir,
                                                   model_dir=instance.model_dir,
                                                   model=instance.model,
                                                   filename=instance._filename,
                                                   label_codes=instance.label_codes,
                                                   transform=instance.transform,
                                                   batch_size=instance.batch_size,
                                                   Print=False,
                                                   save_as=instance._filename + "_val1",)

instance.df_probabilities_val_a = get_probabilities(df=instance.df_val_a,
                                                    data_dir=instance.data_dir,
                                                    model_dir=instance.model_dir,
                                                    model=instance.model,
                                                    filename=instance._filename,
                                                    label_codes=instance.label_codes,
                                                    transform=instance.transform,
                                                    batch_size=instance.batch_size,
                                                    Print=False,
                                                    save_as=instance._filename + "_val_a",)

Saving probabilities: /content/drive/MyDrive/Colab Notebooks/skin-lesion-classification/models/rn18_t1_10e_base_00_val1_probabilities.csv
Saving probabilities: /content/drive/MyDrive/Colab Notebooks/skin-lesion-classification/models/rn18_t1_10e_base_00_val_a_probabilities.csv


In [None]:
# instance = rn18_base_t1

# file_path1 = instance.model_dir.joinpath("_xxx_val1_probabilities.csv")
# file_path_a = instance.model_dir.joinpath("_10e_xxx_val_a_probabilities.csv")

# instance.df_probabilities_val1 = pd.read_csv(file_path1, index_col=0)
# instance.df_probabilities_val_a = pd.read_csv(file_path_a, index_col=0)

In [14]:
instance = rn18_base_t1

print_header("Validation set: one image per lesion")
display_columns = ['lesion_id', 'image_id', 'dx'] + [col for col in instance.df_probabilities_val1.columns if col.startswith('prob')]
display(instance.df_probabilities_val1[display_columns].head())

print_header("Validation set: all images per lesion")
display(instance.df_probabilities_val_a[display_columns].head())


VALIDATION SET: ONE IMAGE PER LESION



Unnamed: 0,lesion_id,image_id,dx,prob_other,prob_bcc,prob_nv,prob_mel,prob_akiec
0,HAM_0002730,ISIC_0025661,bkl,0.001487,0.161692,0.009482,0.826675,0.000664
1,HAM_0001466,ISIC_0027850,bkl,0.263979,0.585804,0.045473,0.081285,0.023459
2,HAM_0002761,ISIC_0029068,bkl,0.00362,0.876243,0.00031,0.004395,0.115432
3,HAM_0004234,ISIC_0029396,bkl,0.000106,0.001429,0.009952,0.000385,0.988128
4,HAM_0001949,ISIC_0025767,bkl,0.159607,0.085461,0.013533,0.007519,0.73388



VALIDATION SET: ALL IMAGES PER LESION



Unnamed: 0,lesion_id,image_id,dx,prob_other,prob_bcc,prob_nv,prob_mel,prob_akiec
0,HAM_0002730,ISIC_0026769,bkl,0.000516,0.360468,0.00126,0.637447,0.000308
1,HAM_0002730,ISIC_0025661,bkl,0.001487,0.161692,0.009482,0.826675,0.000664
2,HAM_0001466,ISIC_0031633,bkl,0.388564,0.070754,0.319496,0.16492,0.056266
3,HAM_0001466,ISIC_0027850,bkl,0.263979,0.585804,0.045473,0.081285,0.023459
4,HAM_0002761,ISIC_0029176,bkl,0.002928,0.982828,3e-05,0.012453,0.00176


In [16]:
from collections import OrderedDict
from typing import Dict, List
from multiclass_models import final_prediction

instance = rn18_base_t1

raw_probabilities_df1: pd.DataFrame = instance.df_probabilities_val1
raw_probabilities_df_a: pd.DataFrame = instance.df_probabilities_val_a
aggregate_method: Union[None, Dict[str, List[str]]] = None#{ 'max' : ['mel', 'bcc', 'akiec'], 'min' : ['nv'], 'mean' : ['other']}
threshold_dict_help: Union[None, OrderedDict[str, float]] = None# OrderedDict([('mel',0.4), ('bcc', 0.4), ('akiec', 0.4)])
threshold_dict_hinder: Union[None, OrderedDict[str, float]] = None#OrderedDict([('nv',0.6)])
votes_to_win_dict: Union[None, OrderedDict[str, int]] = None #OrderedDict([('mel',1), ('bcc',1), ('akiec',1)])
label_codes: Dict[int, str] = instance.label_codes
prefix: Union[None, str] = 'prob_'

print_header("Validation set, one image per lesion: combining probabilities and making predictions")

instance.df_pred_val1 = final_prediction(raw_probabilities_df=raw_probabilities_df1,
                                          threshold_dict_help=threshold_dict_help,
                                          threshold_dict_hinder=threshold_dict_hinder,
                                          votes_to_win_dict=votes_to_win_dict,
                                          label_codes=label_codes,)

display(instance.df_pred_val1)

print_header("Validation set, all images per lesion: combining probabilities, making predictions, and combining predictions")

instance.df_pred_val_a = final_prediction(raw_probabilities_df=raw_probabilities_df_a,
                                          threshold_dict_help=threshold_dict_help,
                                          threshold_dict_hinder=threshold_dict_hinder,
                                          votes_to_win_dict=votes_to_win_dict,
                                          label_codes=label_codes,)

display(instance.df_pred_val_a)


VALIDATION SET, ONE IMAGE PER LESION: COMBINING PROBABILITIES AND MAKING PREDICTIONS



Unnamed: 0,lesion_id,num_images,image_id,dx,label,dx_type,age,sex,localization,set,prob_other,prob_bcc,prob_nv,prob_mel,prob_akiec,pred,pred_final
0,HAM_0002730,2,ISIC_0025661,bkl,0,histo,80.0,male,scalp,v1,0.001487,0.161692,9.481674e-03,0.826675,0.000664,3,3
1,HAM_0001466,2,ISIC_0027850,bkl,0,histo,75.0,male,ear,v1,0.263979,0.585804,4.547286e-02,0.081285,0.023459,1,1
2,HAM_0002761,2,ISIC_0029068,bkl,0,histo,60.0,male,face,v1,0.003620,0.876243,3.095461e-04,0.004395,0.115432,1,1
3,HAM_0004234,2,ISIC_0029396,bkl,0,histo,85.0,female,chest,v1,0.000106,0.001429,9.952085e-03,0.000385,0.988128,4,4
4,HAM_0001949,2,ISIC_0025767,bkl,0,histo,70.0,male,trunk,v1,0.159607,0.085461,1.353307e-02,0.007519,0.733880,4,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1864,HAM_0003949,1,ISIC_0027254,akiec,4,histo,75.0,male,face,v1,0.000222,0.996716,7.912158e-07,0.002864,0.000197,1,1
1865,HAM_0003910,1,ISIC_0026194,akiec,4,histo,30.0,male,face,v1,0.013885,0.884097,9.407729e-05,0.101797,0.000126,1,1
1866,HAM_0001474,1,ISIC_0026765,akiec,4,histo,55.0,female,chest,v1,0.003909,0.896007,1.058043e-04,0.099592,0.000387,1,1
1867,HAM_0001152,1,ISIC_0030133,akiec,4,histo,65.0,male,face,v1,0.040546,0.804073,9.019514e-04,0.151423,0.003055,1,1



VALIDATION SET, ALL IMAGES PER LESION: COMBINING PROBABILITIES, MAKING PREDICTIONS, AND COMBINING PREDICTIONS



Unnamed: 0,lesion_id,num_images,image_id,dx,label,dx_type,age,sex,localization,set,prob_other,prob_bcc,prob_nv,prob_mel,prob_akiec,pred,pred_final
0,HAM_0002730,2,ISIC_0026769,bkl,0,histo,80.0,male,scalp,va,0.000516,0.360468,0.001260,0.637447,0.000308,3,3
1,HAM_0002730,2,ISIC_0025661,bkl,0,histo,80.0,male,scalp,v1,0.001487,0.161692,0.009482,0.826675,0.000664,3,3
2,HAM_0001466,2,ISIC_0031633,bkl,0,histo,75.0,male,ear,va,0.388564,0.070754,0.319496,0.164920,0.056266,0,0
3,HAM_0001466,2,ISIC_0027850,bkl,0,histo,75.0,male,ear,v1,0.263979,0.585804,0.045473,0.081285,0.023459,1,0
4,HAM_0002761,2,ISIC_0029176,bkl,0,histo,60.0,male,face,va,0.002928,0.982828,0.000030,0.012453,0.001760,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2527,HAM_0001474,1,ISIC_0026765,akiec,4,histo,55.0,female,chest,v1,0.003909,0.896007,0.000106,0.099592,0.000387,1,1
2528,HAM_0001152,1,ISIC_0030133,akiec,4,histo,65.0,male,face,v1,0.040546,0.804073,0.000902,0.151423,0.003055,1,1
2529,HAM_0004282,3,ISIC_0033811,akiec,4,histo,65.0,female,face,va,0.764199,0.030113,0.066984,0.014511,0.124193,0,0
2531,HAM_0004282,3,ISIC_0033358,akiec,4,histo,65.0,female,face,va,0.101574,0.331277,0.032219,0.471452,0.063479,3,0


In [19]:
# from utils import print_header
from evaluation import weighted_average_f, confusion_matrix_with_metric

instance = rn18_base_t1
map_labels = instance.label_codes

target1 = instance.df_pred_val1.drop_duplicates(subset='lesion_id')['label']
prediction1 = instance.df_pred_val1.drop_duplicates(subset='lesion_id')['pred_final']

target_a = instance.df_pred_val_a.drop_duplicates(subset='lesion_id')['label']
prediction_a = instance.df_pred_val_a.drop_duplicates(subset='lesion_id')['pred_final']

txp1 = pd.crosstab(target1,prediction1,margins=True,dropna=False)
txp_a = pd.crosstab(target_a,prediction_a,margins=True,dropna=False)

beta = 2
# Weights inversely proportional to relative class size in the training set, giving more importance to smaller classes.
weights = 1/instance.df_train['label'].value_counts(normalize=True).sort_index().values # None

instance.cm1 = confusion_matrix_with_metric(AxB=txp1,
                                            lst=None,
                                            full_pad=True,
                                            func=weighted_average_f,
                                            beta=beta,
                                            weights=weights,
                                            percentage=False,
                                            map_labels=map_labels)

instance.cm_a = confusion_matrix_with_metric(AxB=txp_a,
                                            lst=None,
                                            full_pad=True,
                                            func=weighted_average_f,
                                            beta=beta,
                                            weights=weights,
                                            percentage=False,
                                            map_labels=map_labels)

print_header("Confusion matrix: validation set, one image per lesion")
display(instance.cm1.fillna('_'))

print_header("Confusion matrix: validation set, all images per lesion")
display(instance.cm_a.fillna('_'))


CONFUSION MATRIX: VALIDATION SET, ONE IMAGE PER LESION



predicted,other,bcc,nv,mel,akiec,All,recall
actual,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
other,112.0,22.0,11.0,4.0,76.0,225,0.497778
bcc,12.0,53.0,2.0,6.0,9.0,82,0.646341
nv,45.0,16.0,13.0,3.0,1274.0,1351,0.009623
mel,19.0,2.0,41.0,4.0,88.0,154,0.025974
akiec,13.0,32.0,0.0,7.0,5.0,57,0.087719
All,201.0,125.0,67.0,24.0,1452.0,1869,_
precision,0.557214,0.424,0.19403,0.166667,0.003444,_,0.238604



CONFUSION MATRIX: VALIDATION SET, ALL IMAGES PER LESION



predicted,other,bcc,nv,mel,akiec,All,recall
actual,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
other,131.0,19.0,12.0,4.0,59.0,225,0.582222
bcc,16.0,55.0,2.0,3.0,6.0,82,0.670732
nv,47.0,21.0,26.0,3.0,1254.0,1351,0.019245
mel,27.0,4.0,44.0,4.0,75.0,154,0.025974
akiec,15.0,32.0,0.0,6.0,4.0,57,0.070175
All,236.0,131.0,84.0,20.0,1398.0,1869,_
precision,0.555085,0.419847,0.309524,0.2,0.002861,_,0.249223


In [18]:
# from utils import print_header
from evaluation import metric_dictionary
# import pandas as pd

instance = rn18_base_t1

target1 = instance.df_pred_val1.drop_duplicates(subset='lesion_id')['label']
prediction1 = instance.df_pred_val1.drop_duplicates(subset='lesion_id')['pred_final']
probabilities1 = instance.df_probabilities_val1.drop_duplicates(subset='lesion_id').filter(regex=r'^prob_')
agg_probabilities1 = instance.df_pred_val1.drop_duplicates(subset='lesion_id').filter(regex=r'^prob_')

target_a = instance.df_pred_val_a.drop_duplicates(subset='lesion_id')['label']
prediction_a = instance.df_pred_val_a.drop_duplicates(subset='lesion_id')['pred_final']
probabilities_a = instance.df_probabilities_val_a.drop_duplicates(subset='lesion_id').filter(regex=r'^prob_')
agg_probabilities_a = instance.df_pred_val_a.drop_duplicates(subset='lesion_id').filter(regex=r'^prob_')

beta = 2
# Weights inversely proportional to relative class size, giving more importance to smaller classes.
weights = 1/instance.df_train['label'].value_counts(normalize=True).sort_index().values # None

print_header("Baseline model: other metrics")

instance.metric_dict1 = metric_dictionary(target=target1,
                                          prediction=prediction1,
                                          probabilities=probabilities1)

instance.metric_dict_a = metric_dictionary(target=target_a,
                                          prediction=prediction_a,
                                          probabilities=probabilities_a)

print("\nOne image per lesion".upper())
display(pd.DataFrame(instance.metric_dict1))

print("\nAll images per lesion".upper())
display(pd.DataFrame(instance.metric_dict_a))


BASELINE MODEL: OTHER METRICS


ONE IMAGE PER LESION


Unnamed: 0,ACC,BACC,precision,recall,F1/2,F1,F2,MCC,ROC-AUC mac,ROC-AUC wt,ROC-AUC wt*
0,0.100054,0.253487,,,0.224791,0.221561,0.230326,0.080843,0.580959,0.424714,0.523276



ALL IMAGES PER LESION


Unnamed: 0,ACC,BACC,precision,recall,F1/2,F1,F2,MCC,ROC-AUC mac,ROC-AUC wt,ROC-AUC wt*
0,0.11771,0.27367,,,0.236035,0.234495,0.24863,0.099416,0.580421,0.428102,0.524548


## Further training

In [10]:
from utils import print_header
import torch
import torch.nn as nn

instance = rn18_base_t1

model = models.resnet18()
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, len(instance.label_codes))

file_path_pth = instance.model_dir.joinpath(instance._filename + ".pth")
state_dict = torch.load(file_path_pth)
model.load_state_dict(state_dict)

# model = models.efficientnet_b0()
# num_ftrs = model.classifier[1].in_features
# model.classifier[1] = nn.Linear(num_ftrs, len(instance.label_codes))

instance.model = model
instance.state_dict = state_dict

In [5]:
import torchvision.transforms as transforms
transform = transforms.Compose([
transforms.Resize((224,224)), # Resize images to fit ResNet input size
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # Normalize with ImageNet stats
])

In [11]:
import pandas as pd
from typing import Union, List, Callable
import torchvision.models as models

source: Union[process, pd.DataFrame] = base_t1        # Processed data to be fed into model for training.
                                                      # Must either be an instance of the process class, or a dataframe of the same format as source.df if source were an instance of the process class.
model_dir: Path = path["models"]                      # Path to directory where models/model info/model results are stored.

transform: Union[None,
                 transforms.Compose,
                 List[Callable]] = transform     # Transform to be applied to images before feeding into neural network.

filename_stem: Union[None, str] = "rn18"         # For saving model and related files. Default "rn18" (if ResNet model) or "EffNet" (if EfficientNet), or "cnn".
filename_suffix: Union[None, str] = "base"   # Something descriptive and unique for future reference. Default empty string "".

# model: Union[None, models.ResNet, models.EfficientNet] = models.efficientnet_b0(weights=models.EfficientNet_B0_Weights.DEFAULT) # Pre-trained model. Default: ResNet18.
model: Union[None, models.ResNet, models.EfficientNet] = instance.model

In [12]:
# Create a new instance of the resnet18 class with attribute values as above.
from multiclass_models import cnn

rn18_base_t1 = cnn(source=source,
                   model_dir=model_dir,
                   transform=transform,
                   filename_stem=filename_stem,
                   filename_suffix=filename_suffix,
                   model=model,)

New files will be created. 
Base filename: rn18_t1_10e_base_01
Attributes saved to file: /content/drive/MyDrive/Colab Notebooks/skin-lesion-classification/models/rn18_t1_10e_base_01_attributes.json


In [13]:
rn18_base_t1.train()

Epoch 1/10, Training Loss: 0.3399, Validation Loss 1: 0.8675, Validation Loss a: 1.0090
Epoch 2/10, Training Loss: 0.2342, Validation Loss 1: 0.7417, Validation Loss a: 0.8978
Epoch 3/10, Training Loss: 0.1638, Validation Loss 1: 0.9038, Validation Loss a: 1.1286
Epoch 4/10, Training Loss: 0.1114, Validation Loss 1: 0.9566, Validation Loss a: 1.1344
Epoch 5/10, Training Loss: 0.0906, Validation Loss 1: 1.2134, Validation Loss a: 1.4496
Epoch 6/10, Training Loss: 0.0906, Validation Loss 1: 1.2993, Validation Loss a: 1.5861
Epoch 7/10, Training Loss: 0.0626, Validation Loss 1: 1.0957, Validation Loss a: 1.3406
Epoch 8/10, Training Loss: 0.0514, Validation Loss 1: 1.2012, Validation Loss a: 1.4794
Epoch 9/10, Training Loss: 0.0681, Validation Loss 1: 1.1314, Validation Loss a: 1.4241
Epoch 10/10, Training Loss: 0.0516, Validation Loss 1: 1.2336, Validation Loss a: 1.5168
Saving model.state_dict() as /content/drive/MyDrive/Colab Notebooks/skin-lesion-classification/models/rn18_t1_10e_base_