In [0]:
import numpy as np
import tensorflow as tf
import pandas as pd
from tensorflow import keras
import matplotlib.pyplot as plt
import os
import IPython.display as display
import random
from sklearn.preprocessing import LabelEncoder
import pathlib
import math
% matplotlib inline

In [2]:
tf.__version__

'1.13.1'

In [0]:
tf.enable_eager_execution()

# **Importing Data**

In [4]:
# Start google drive connection with notebook
from google.colab import drive

drive.mount('/content/gdrive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/gdrive


In [5]:
print(os.getcwd())

/content


In [6]:
# Check current directory
dir_path = os.getcwd()
if dir_path == '/content':
  %cd ./gdrive/My\ Drive/SkinCancer

/content/gdrive/My Drive/SkinCancer


In [7]:
# read dataset csv file with image id's and mole types
dataset = pd.read_csv('./dataset/HAM10000_metadata.csv')
dataset['image_path'] = ['./dataset/images/' + path +'.jpg' for path in dataset['image_id']]
dataset.head()

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization,image_path
0,HAM_0000118,ISIC_0027419,bkl,histo,80.0,male,scalp,./dataset/images/ISIC_0027419.jpg
1,HAM_0000118,ISIC_0025030,bkl,histo,80.0,male,scalp,./dataset/images/ISIC_0025030.jpg
2,HAM_0002730,ISIC_0026769,bkl,histo,80.0,male,scalp,./dataset/images/ISIC_0026769.jpg
3,HAM_0002730,ISIC_0025661,bkl,histo,80.0,male,scalp,./dataset/images/ISIC_0025661.jpg
4,HAM_0001466,ISIC_0031633,bkl,histo,75.0,male,ear,./dataset/images/ISIC_0031633.jpg


In [0]:
dataset.columns

Index(['lesion_id', 'image_id', 'dx', 'dx_type', 'age', 'sex', 'localization',
       'image_path'],
      dtype='object')

In [0]:
#import IPython.display as display

#display.display(display.Image(img_path))

# Data Processing

In [0]:
BATCH_SIZE = 128

image_count = len(dataset)
image_shape = [128,128]
input_sp = (128,128,3)
EPOCHS = 15
dropout_rate = 0.25
split_ratio = 0.8

AUTOTUNE = tf.data.experimental.AUTOTUNE

In [0]:
#Loading training image paths from directory...
train_data_root = pathlib.Path('./train_dir')

train_image_paths = list(train_data_root.glob('*/*'))
train_image_paths = [str(path) for path in train_image_paths]
train_image_paths = train_image_paths[:38569]
print(train_image_paths)
random.shuffle(train_image_paths)

#Loading validation image paths from val directory...
val_data_root = pathlib.Path('./val_dir')
  
val_image_paths = list(val_data_root.glob('*/*'))
val_image_paths = [str(path) for path in val_image_paths]
random.shuffle(val_image_paths)
len(val_image_paths)

['train_dir/nv/ISIC_0025112.jpg', 'train_dir/nv/ISIC_0028507.jpg', 'train_dir/nv/ISIC_0029773.jpg', 'train_dir/nv/ISIC_0029203.jpg', 'train_dir/nv/ISIC_0026712.jpg', 'train_dir/nv/ISIC_0032444.jpg', 'train_dir/nv/ISIC_0028305.jpg', 'train_dir/nv/ISIC_0026452.jpg', 'train_dir/nv/ISIC_0027007.jpg', 'train_dir/nv/ISIC_0029645.jpg', 'train_dir/nv/ISIC_0026398.jpg', 'train_dir/nv/ISIC_0026655.jpg', 'train_dir/nv/ISIC_0026672.jpg', 'train_dir/nv/ISIC_0028186.jpg', 'train_dir/nv/ISIC_0026099.jpg', 'train_dir/nv/ISIC_0024836.jpg', 'train_dir/nv/ISIC_0030416.jpg', 'train_dir/nv/ISIC_0030180.jpg', 'train_dir/nv/ISIC_0026043.jpg', 'train_dir/nv/ISIC_0025074.jpg', 'train_dir/nv/ISIC_0029544.jpg', 'train_dir/nv/ISIC_0031612.jpg', 'train_dir/nv/ISIC_0028529.jpg', 'train_dir/nv/ISIC_0026588.jpg', 'train_dir/nv/ISIC_0030115.jpg', 'train_dir/nv/ISIC_0029898.jpg', 'train_dir/nv/ISIC_0028364.jpg', 'train_dir/nv/ISIC_0024599.jpg', 'train_dir/nv/ISIC_0024352.jpg', 'train_dir/nv/ISIC_0026885.jpg', 'train_di

938

In [0]:
lbl_names = os.listdir('./train_dir')
lbl_names.remove('normal')
lbl_to_idx = dict((name, index) for index, name in enumerate(lbl_names))

train_image_labels = [lbl_to_idx[pathlib.Path(path).parent.name] for path in train_image_paths]
print(train_image_paths[0])
val_image_labels = [lbl_to_idx[pathlib.Path(path).parent.name] for path in val_image_paths]
print(val_image_labels[:10])

train_dir/bkl/_50_5373492.jpg
[0, 0, 0, 2, 0, 2, 0, 2, 2, 0]


In [0]:
def preprocess_image(path, label):
  image = tf.io.read_file(path)
  image = tf.image.decode_jpeg(image,channels = 3)
  image = tf.image.convert_image_dtype(image, tf.float32)
  image = tf.image.resize(image,image_shape)
  image /= 255.0
  return image, label

#features_placeholder = tf.placeholder(features.dtype, features.shape)
#labels_placeholder = tf.placeholder(labels.dtype, labels.shape)
  
def load_dataset(image_paths,image_labels):
  label_dataset = tf.cast(image_labels, tf.int32)
  ds_tensor = tf.data.Dataset.from_tensor_slices((image_paths,label_dataset))
  ds_mapped = ds_tensor.map(preprocess_image, AUTOTUNE)
  return ds_mapped


In [0]:
def load_with_records(all_paths,all_labels):
  if 'images.tfrec' not in os.listdir():
    image_ds = tf.data.Dataset.from_tensor_slices(all_paths).map(tf.read_file)
    tfrec = tf.data.experimental.TFRecordWriter('images.tfrec')
    tfrec.write(image_ds)

  image_ds = tf.data.TFRecordDataset('images.tfrec').map(preprocess_image)
  label_ds = tf.data.Dataset.from_tensor_slices(all_labels)
  
  ds = tf.data.Dataset.zip((image_ds, label_ds))
  return ds

In [0]:
def get_dataset(image_file, label_file, batch_size):
    dataset = load_dataset(image_file, label_file)
    dataset = dataset.cache()  # this small dataset can be entirely cached in RAM, for TPU this is important to get good performance from such a small dataset    
    dataset = dataset.repeat() # Mandatory for Keras for now
    dataset = dataset.batch(batch_size,drop_remainder=True) # drop_remainder is important on TPU, batch size must be fixed
    dataset = dataset.prefetch(buffer_size=AUTOTUNE)  # fetch next batches while training on the current one
    return dataset
  
# instantiate the datasets
#X_placeholder = tf.placeholder(tf.float32, shape = [None, 120, 120, 3])
#Y_placeholder = tf.placeholder(tf.float32, shape = [None])

training_dataset = get_dataset(train_image_paths, train_image_labels, BATCH_SIZE)
validation_dataset = get_dataset(val_image_paths, val_image_labels, BATCH_SIZE)

# For TPU, we will need a function that returns the dataset with batches

def training_input_fn():
   return get_dataset(train_image_paths, train_image_labels, BATCH_SIZE)
def validation_input_fn():
  return get_dataset(val_image_paths, val_image_labels,BATCH_SIZE )

NameError: ignored

In [0]:
training_dataset

<DatasetV1Adapter shapes: ((128, 128, 128, 3), (128,)), types: (tf.float32, tf.int32)>

# Training

In [0]:
import keras
from tensorflow.keras.layers import Input, Add, Dense, Activation, ZeroPadding2D, BatchNormalization, Flatten, Conv2D, AveragePooling2D, MaxPooling2D, Dropout, GlobalAveragePooling2D
from tensorflow.keras.initializers import glorot_uniform
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.applications.inception_resnet_v2 import InceptionResNetV2
from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2
from keras import backend as K
#import keras.applications.resnet.ResNet152
import time


Using TensorFlow backend.


In [0]:


#tf.get_default_graph()
#K.clear_session()

help(tf.keras.applications.resnet50.preprocess_input)

Help on function wrapper in module tensorflow.python.keras.applications:

wrapper(*args, **kwargs)



In [0]:
"""img_placeholder = tf.placeholder(tf.string, shape=[None])
labels_placeholder = tf.placeholder(tf.int32, [None])


ds_tensor = tf.data.Dataset.from_tensor_slices((img_placeholder,labels_placeholder))
dataset = ds_tensor.map(preprocess_image, AUTOTUNE)
dataset.cache().repeat().batch(BATCH_SIZE,drop_remainder=True).prefetch(buffer_size=AUTOTUNE)


iterator = batched_dataset.take(5).make_one_shot_iterator()
next_element = iterator.get_next()

# You can feed the initializer with the appropriate filenames for the current
# phase of execution, e.g. training vs. validation.

# Initialize `iterator` with training data.
with tf.Session() as sess:
  sess.run(iterator.initializer, feed_dict={img_placeholder: train_image_paths[:10],labels_placeholder:train_image_labels[:10]})"""

'img_placeholder = tf.placeholder(tf.string, shape=[None])\nlabels_placeholder = tf.placeholder(tf.int32, [None])\n\n\nds_tensor = tf.data.Dataset.from_tensor_slices((img_placeholder,labels_placeholder))\ndataset = ds_tensor.map(preprocess_image, AUTOTUNE)\ndataset.cache().repeat().batch(BATCH_SIZE,drop_remainder=True).prefetch(buffer_size=AUTOTUNE)\n\n\niterator = batched_dataset.take(5).make_one_shot_iterator()\nnext_element = iterator.get_next()\n\n# You can feed the initializer with the appropriate filenames for the current\n# phase of execution, e.g. training vs. validation.\n\n# Initialize `iterator` with training data.\nwith tf.Session() as sess:\n  sess.run(iterator.initializer, feed_dict={img_placeholder: train_image_paths[:10],labels_placeholder:train_image_labels[:10]})'

In [0]:
# Sequential Model

def create_mobilenet(input_sp):
  return MobileNetV2(input_shape=input_sp,include_top=False,weights='imagenet')

def create_res(input_sp):
  resnet = ResNet50(input_shape=input_sp,include_top=False,weights='imagenet')
  return resnet

def create_inception(input_sp):
  incept_res = InceptionResNetV2(input_shape=input_sp,include_top=False,weights='imagenet')
  return incept_res

def create_seq_model(input_shape):
  tf.keras.backend.clear_session()
  
  base_model = create_mobilenet(input_shape)
  model = Sequential()
  model.add(base_model)
  model.add(GlobalAveragePooling2D())
  model.add(Dense(2048,activation= 'relu',kernel_regularizer=tf.keras.regularizers.l2(0.01)))
  model.add(Dropout(dropout_rate))
  model.add(Dense(1024,activation= 'relu',kernel_regularizer=tf.keras.regularizers.l2(0.01)))
  model.add(Dropout(dropout_rate))
  model.add(Dense(7,activation='softmax'))
  
  model.compile(optimizer=tf.keras.optimizers.Adam(),
               loss='sparse_categorical_crossentropy',
               metrics=['accuracy'])

 
  return model


In [0]:
steps_per_epoch= int((len(train_image_labels))/BATCH_SIZE)
print("training epoch: ", steps_per_epoch)

step = math.ceil(len(val_image_labels) / BATCH_SIZE)
step

training epoch:  301


8

In [0]:
checkpoint_path = "training_checkpoints/cp.ckpt"

if os.path.exists('./training_checkpoints'):
  checkpoint_dir = os.path.dirname(checkpoint_path)  
else:
  os.mkdir('./training_checkpoints')
  checkpoint_dir = os.path.dirname(checkpoint_path)  

checkpoint_callback=tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_path,
    save_weights_only=True)

In [0]:
model = create_seq_model(input_sp)
#if os.path.exists('./training_checkpoints/' + 'cp.ckpt.data-00000-of-00001'):
  #print('Weights Loaded')
  #model.load_weights(checkpoint_path)

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [0]:
lr_decay = tf.keras.callbacks.LearningRateScheduler(lambda epoch: 0.0001 + 0.02 * math.pow(0.5, 1+epoch), verbose=True)
model.fit(training_dataset, steps_per_epoch=steps_per_epoch, epochs=15, callbacks=[checkpoint_callback,lr_decay],validation_data=validation_dataset,validation_steps=step)


Epoch 00001: LearningRateScheduler reducing learning rate to 0.0101.
Epoch 1/15
Instructions for updating:
Use tf.cast instead.

In [0]:
"""lr_decay = tf.keras.callbacks.LearningRateScheduler(lambda epoch: 0.0001 + 0.02 * math.pow(0.5, 1+epoch), verbose=True)
try: # TPU detection
  tpu = tf.contrib.cluster_resolver.TPUClusterResolver(tpu='grpc://' + os.environ['COLAB_TPU_ADDR']) # Picks up a connected TPU on Google's Colab, ML Engine, Kubernetes and Deep Learning VMs accessed through the 'ctpu up' utility
  #tpu = tf.contrib.cluster_resolver.TPUClusterResolver('MY_TPU_NAME') # If auto-detection does not work, you can pass the name of the TPU explicitly (tip: on a VM created with "ctpu up" the TPU has the same name as the VM)
except ValueError:
  tpu = False
  print('Training on GPU/CPU')
  
if tpu: # TPU training
  strategy = tf.contrib.tpu.TPUDistributionStrategy(tpu)
  trained_model = tf.contrib.tpu.keras_to_tpu_model(model, strategy=strategy)
  # Work in progress: reading directly from dataset object not yet implemented
  # for Keras/TPU. Keras/TPU needs a function that returns a dataset.
  trained_model.compile(
    optimizer=tf.train.AdamOptimizer(),
    loss= 'sparse_categorical_crossentropy')

  history = trained_model.fit(training_input_fn, steps_per_epoch=10, epochs=EPOCHS,
                             validation_data=validation_input_fn, validation_steps=step)
else: # GPU/CPU training
  history = model.fit(training_dataset, steps_per_epoch=10, epochs=EPOCHS)"""

In [0]:
model.evaluate(validation_dataset,steps=step)

In [0]:
model_json = model.to_json()
with open("model.json", "w") as json_file:
  json_file.write(model_json)

model.save_weights("model.h5")

In [0]:
tf.keras.utils.plot_model(model,to_file = 'model.png')

In [0]:
plt.plot(model.history.history['acc'])

In [0]:
plt.plot(model.history.history['loss'])

In [0]:
import tensorflowjs as tfjs

In [0]:
os.mkdir('tfjs')

In [0]:
tfjs.converters.save_keras_model(model, 'tfjs_dir')