# Deep Learning - Melanoma - V2

Plan:
- clean csv and get a proper csv with image file name, label
- get photos into image net format
    - training dataset
    - testing dataset
- apply basic resnet learner
- start modifying data for unbalanced dataset
    - SMOTE? oversampling? 5 fold cross validation
- apply resnet + training
- add optimizations
- test predictions & results

In [4]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [3]:
from fastai.vision import *
import pandas as pd
import numpy as np
import seaborn as sns

In [4]:
! pwd

/home/luis/code-projects/dl-melanoma-env/dl-melanoma-env-2


In [22]:
currPathStr = '/home/luis/code-projects/dl-melanoma-env/dl-melanoma-env-2'
currPath = Path('/home/luis/code-projects/dl-melanoma-env/dl-melanoma-env-2')
print(currPath)

/home/luis/code-projects/dl-melanoma-env/dl-melanoma-env-2


## Create clean csv

In [6]:
groundTruthDf = pd.read_csv('ISIC2018_Task3_Training_GroundTruth.csv')
groundTruthDf.head()

Unnamed: 0,image,MEL,NV,BCC,AKIEC,BKL,DF,VASC
0,ISIC_0024306,0.0,1.0,0.0,0.0,0.0,0.0,0.0
1,ISIC_0024307,0.0,1.0,0.0,0.0,0.0,0.0,0.0
2,ISIC_0024308,0.0,1.0,0.0,0.0,0.0,0.0,0.0
3,ISIC_0024309,0.0,1.0,0.0,0.0,0.0,0.0,0.0
4,ISIC_0024310,1.0,0.0,0.0,0.0,0.0,0.0,0.0


In [16]:
# given the csv label dataframe, it will return a new dataframe with two columns
# col1 = image: contains the image name
# col2 = classification: contains the abbreviated classification
def createNewLabelDataframe(df):
    imageNames = df['image'].values.tolist()
    for i in range(len(imageNames)):
        imageNames[i] = imageNames[i] + ".jpg"
    classLabels = df.columns[1:].tolist()
    (rowNum, colNum) = df.shape
    labelArray = []
    for i in range(0, rowNum):
        # find the index of the max
        maxIndex = np.argmax(df.iloc[i, 1:].values)
        # get the label of that index
        tempClassLabel  = classLabels[maxIndex]
        # append it to labelArray
        labelArray.append(tempClassLabel)
    labelArray = np.array(labelArray)
#     print(imageNames[0:5])
#     print(labelArray[0:5])
    outputDf = pd.DataFrame({'image_filename':imageNames, 'label':labelArray})
    return outputDf

In [17]:
newLabelsDf = createNewLabelDataframe(groundTruthDf)
newLabelsDf.head()

Unnamed: 0,image_filename,label
0,ISIC_0024306.jpg,NV
1,ISIC_0024307.jpg,NV
2,ISIC_0024308.jpg,NV
3,ISIC_0024309.jpg,NV
4,ISIC_0024310.jpg,MEL


In [25]:
newLabelsDf.to_csv(currPathStr + "/csv_labels.csv", index=False)

## Create folders for image classes

In [24]:
classLabels = groundTruthDf.columns[1:].tolist()
classLabels

['MEL', 'NV', 'BCC', 'AKIEC', 'BKL', 'DF', 'VASC']

In [58]:
trainDatasetPath = Path('clean_data/train/skin_cancer')
trainDatasetPathStr = str(trainDatasetPath)
trainDatasetPath.mkdir(parents=True, exist_ok=True)

validDatasetPath = Path('clean_data/valid/skin_cancer')
validDatasetPathStr = str(validDatasetPath)
validDatasetPath.mkdir(parents=True, exist_ok=True)

In [59]:
! ls -la {trainDatasetPath}

total 8
drwxrwxr-x 2 luis luis 4096 Mar 10 23:40 .
drwxrwxr-x 3 luis luis 4096 Mar 10 23:40 ..


In [60]:
! ls -la {validDatasetPath}

total 8
drwxrwxr-x 2 luis luis 4096 Mar 10 23:41 .
drwxrwxr-x 3 luis luis 4096 Mar 10 23:41 ..


In [61]:
for i in classLabels:
    dest1 = trainDatasetPath/i
    dest1.mkdir(parents=True, exist_ok=True)
    dest2 = validDatasetPath/i
    dest2.mkdir(parents=True, exist_ok=True)

In [62]:
! ls -la {trainDatasetPath}

total 36
drwxrwxr-x 9 luis luis 4096 Mar 10 23:42 .
drwxrwxr-x 3 luis luis 4096 Mar 10 23:40 ..
drwxrwxr-x 2 luis luis 4096 Mar 10 23:42 AKIEC
drwxrwxr-x 2 luis luis 4096 Mar 10 23:42 BCC
drwxrwxr-x 2 luis luis 4096 Mar 10 23:42 BKL
drwxrwxr-x 2 luis luis 4096 Mar 10 23:42 DF
drwxrwxr-x 2 luis luis 4096 Mar 10 23:42 MEL
drwxrwxr-x 2 luis luis 4096 Mar 10 23:42 NV
drwxrwxr-x 2 luis luis 4096 Mar 10 23:42 VASC


In [63]:
! ls -la {validDatasetPath}

total 36
drwxrwxr-x 9 luis luis 4096 Mar 10 23:42 .
drwxrwxr-x 3 luis luis 4096 Mar 10 23:41 ..
drwxrwxr-x 2 luis luis 4096 Mar 10 23:42 AKIEC
drwxrwxr-x 2 luis luis 4096 Mar 10 23:42 BCC
drwxrwxr-x 2 luis luis 4096 Mar 10 23:42 BKL
drwxrwxr-x 2 luis luis 4096 Mar 10 23:42 DF
drwxrwxr-x 2 luis luis 4096 Mar 10 23:42 MEL
drwxrwxr-x 2 luis luis 4096 Mar 10 23:42 NV
drwxrwxr-x 2 luis luis 4096 Mar 10 23:42 VASC


In [None]:
# the following function will take a path of images and split them into their proper labels
# into the destination folder
# based on a certain labelled csv
def imageNetFormat()