# Set Up

In [None]:
#The Following cell of code is used everytime FASTAI library is used.
#They tell the notebook to reload any changes made to any libraries used.
#They also ensure that any graphs are plotted are shown in this notebook
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
from fastai.vision.all import *
from fastai.metrics import *
import pandas as pd
from pathlib import Path

In [None]:
csv_path = "/kaggle/input/skin-cancer-mnist-ham10000/HAM10000_metadata.csv"
skin_df = pd.read_csv(csv_path)
skin_df.sort_values(by="image_id")

In [None]:
path = Path('../input/skin-cancer-mnist-ham10000')
Path.BASE_PATH = path
path.ls()

## Rename Variables

In [None]:
short_to_full_name_dict = {
    "akiec" : "Bowen's disease", # very early form of skin cancer 
    "bcc" : "basal cell carcinoma" , # basal-cell cancer or white skin cancer
    "bkl" : "benign keratosis-like lesions", # non-cancerous skin tumour
    "df" : "dermatofibroma", # non-cancerous rounded bumps 
    "mel" : "melanoma", # black skin cancer
    "nv" : "melanocytic nevi", # mole non-cancerous
    "vasc" : "vascular lesions", # skin condition
}

# Get Images from file

In [None]:
# returns only dx and image id column
img_to_class_dict = skin_df.loc[:, ["image_id", "dx"]] 
# returns columns as lists in a dict
img_to_class_dict = img_to_class_dict.to_dict('list')  
# returns a dict mapping image id to disease name
img_to_class_dict = {img_id : short_to_full_name_dict[disease] for img_id,disease in zip(img_to_class_dict['image_id'], img_to_class_dict['dx']) } 
[x for x in img_to_class_dict.items()][:5]

In [None]:
# path.stem returns the filename without suffix
def get_label_from_dict(path):
    return img_to_class_dict[path.stem] 

# Constructing a DataBlock

In [None]:
dblock = DataBlock(
    # Designation the independent and dependent variables
    blocks = (ImageBlock, CategoryBlock), 
    # To get a list of those files,and returns a list of all of the images in that path
    get_items = get_image_files, 
    # Split our training and validation sets randomly
    splitter = RandomSplitter(valid_pct=0.2, seed=42),
    # We are telling fastai what function to call to create the labels in our dataset, in our case is independet variable
    get_y = get_label_from_dict,
    # DihedralItem all 4 90 deg roatations and for each: 
    #2 horizonntal flips -> 8 orientations
    item_tfms=[Resize(448), DihedralItem()],
    # Picks a random scaled crop of an image and resize it to size
    batch_tfms=RandomResizedCrop(size=224, min_scale=0.75, max_scale=1.0))

img_path = "/kaggle/input/skin-cancer-mnist-ham10000"
# create dataloader using img_path   
dls = dblock.dataloaders(img_path, bs=64) # bs = batch size

# Display the images

In [None]:
dls.show_batch(max_n=15)

Observations from these images will be noted below. First, I'll do some more checks to confirm our categories are just "Bowen's disease", 'basal cell carcinoma', 'benign keratosis-like lesions', 'dermatofibroma', 'melanocytic nevi', 'melanoma', 'vascular lesions':

In [None]:
print(dls.vocab)

Let's preview our datasets length:

In [None]:
len(dls.train_ds), len(dls.valid_ds)

# Train a simple model

In [None]:
learn = vision_learner(dls,
                    resnet18,
                    metrics=accuracy)
learn.fine_tune(4)

In [None]:
lr_min,lr_steep = learn.lr_find(suggest_funcs=(minimum, steep))

In [None]:
print(f"Minimum/10: {lr_min:.2e}, steepest point: {lr_steep:.2e}")

**train fit_one_cycle for 3 cycles get an idea of how accurate the model would be with resnet34.**

In [None]:
learn = vision_learner(dls,resnet34, metrics = accuracy)
learn.fit_one_cycle(3,1e-2) 

# Unfreezing and Transfer Learning

In [None]:
learn.unfreeze()

In [None]:
lr_min,lr_steep = learn.lr_find(suggest_funcs=(minimum, steep))

In [None]:
print(f"Minimum/10: {lr_min:.2e}, steepest point: {lr_steep:.2e}")

In [None]:
learn.fit_one_cycle(30 ,lr_max=slice(1e-4, 1e-2))

In [None]:
learn.recorder.plot_loss()

### saving the model

In [None]:
learn.save('model1')

# Model Interpretation

In [None]:
interp = ClassificationInterpretation.from_learner(learn)
interp.plot_confusion_matrix(figsize=(6,6), dpi=60)

# Top 6 losses

In [None]:
interp.plot_top_losses(6, nrows=2)

# Reference

[HAM10000 Vision ResNet18](https://www.kaggle.com/code/leonblum/ham10000-vision-resnet18-97-7-accuracy)

#### I hope you like it