# Downloading data

In [None]:
#mkdir train valid test 

def download(url, destination_folder='.'):
  !wget -nc -q --show-progress $url -P $destination_folder

phases = ['train', 'valid', 'test']
for phase in phases:
  download(f'https://s3-us-west-1.amazonaws.com/udacity-dlnfd/datasets/skin-cancer/{phase}.zip', phase)

In [None]:
import zipfile

from tqdm import tqdm_notebook as tqdm

for phase in phases:
  with zipfile.ZipFile(os.path.join(phase, f'{phase}.zip'), 'r') as myzip:
    for file in tqdm(myzip.namelist(), desc=f'Extracting {phase}.zip'):
      myzip.extract(member=file, path=phase)

In [None]:
phases = ['train', 'valid', 'test']

data_root = os.path.join(os.getcwd())
data_dir = {phase:os.path.join(data_root, phase, phase) for phase in phases}

In [None]:
#data_dir['test']
data_dir

In [None]:
# There are three different classes

import glob
classes = ['melanoma', 'nevus', 'seborrheic_keratosis']
classes

In [None]:
import seaborn as sns
import warnings
import pandas as pd
import numpy as np

def print_images_distribution(plot=False):
  
  image_repartition = pd.DataFrame(index=[d.split(os.path.sep)[-1] for d in data_dir.values()], columns=classes)
  plot_data = pd.DataFrame(columns=['Class', 'Phase', 'Count'])
  
  for phase in [d.split(os.path.sep)[-1] for d in data_dir.values()]:
    for disease in classes:
      count =  len(glob.glob(os.path.join(data_root, phase, phase, disease, '*.jpg')))
      image_repartition.loc[phase][disease] = count
      plot_data = plot_data.append({'Class': disease, 'Phase': phase, 'Count': count}, ignore_index=True)

  image_repartition.loc['TOTAL'] = image_repartition.sum(axis=0)          
  image_repartition['TOTAL'] = image_repartition.sum(axis=1).astype(int)
  image_repartition['Ratio'] = np.round(image_repartition.TOTAL / (image_repartition.TOTAL.sum() - image_repartition.TOTAL[-1]), 2)
  
  if plot:
    sns.set_style("whitegrid")
    sns.barplot(x='Class', y='Count', hue='Phase', data=plot_data, palette='Blues')

  return image_repartition

In [None]:
print_images_distribution(True)

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing import image_dataset_from_directory

train_set = image_dataset_from_directory(data_dir['train'],
                                             shuffle=True,
                                             batch_size=32,
                                             image_size=(150, 150))

val_set = image_dataset_from_directory(data_dir['valid'],
                                             shuffle=True,
                                             batch_size=32,
                                             image_size=(150, 150))


test_set = image_dataset_from_directory(data_dir['test'],
                                             shuffle=True,
                                             batch_size=32,
                                             image_size=(150, 150))



In [None]:
data_augmentation = keras.Sequential(
    [       keras.layers.experimental.preprocessing.RandomFlip("horizontal"),
   keras.layers.experimental.preprocessing.RandomRotation(0.2),
    ]
)

In [None]:
import numpy as np
import matplotlib.pyplot as plt
for images, labels in val_set.take(1):
    plt.figure(figsize=(12, 12))
    first_image = images[0]
    for i in range(12):
        ax = plt.subplot(3, 4, i + 1)
        augmented_image = data_augmentation(
            tf.expand_dims(first_image, 0)
        )
        plt.imshow(augmented_image[0].numpy().astype("int32"))
        plt.axis("off")

In [None]:
plt.figure(figsize=(10, 10))
for images, labels in val_set.take(1):
    for i in range(9):
        ax = plt.subplot(3, 3, i + 1)
        plt.imshow(images[i].numpy().astype("uint8"))
        plt.title(int(labels[i]))
        plt.axis("off")

In [None]:
# In the following , we take the same approach as for Dog_bread project
base_model = keras.applications.Xception(
    weights='imagenet',  
    input_shape=(150, 150, 3),
    include_top=False) 

In [None]:
base_model.trainable = False
inputs = keras.Input(shape=(150, 150, 3))
x = data_augmentation(inputs) 


In [None]:
x = tf.keras.applications.xception.preprocess_input(x)


In [None]:
x = base_model(x, training=False)
x = keras.layers.GlobalAveragePooling2D()(x)
x = keras.layers.Dropout(0.2)(x)  
outputs = keras.layers.Dense(3, activation='softmax')(x)
model = keras.Model(inputs, outputs)

In [None]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy' ,metrics=['accuracy'])
model.fit(train_set, epochs=5, validation_data=val_set)