In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# Turn on GPU
# Add image data set
# we will use resized image  

In [None]:
# import all required libraries
import os
import cv2
import numpy as np
import random as rn
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt

In [None]:
# import all tensorflow library
import tensorflow as tf
import keras
from keras import initializers
from keras import regularizers
from keras import constraints
from keras import backend as k
from keras.activations import elu
from tensorflow.keras.optimizers import Adam
from keras.models import Sequential
from tensorflow.keras.layers import Layer, InputSpec
from keras.utils.generic_utils import get_custom_objects
from keras.callbacks import Callback, EarlyStopping, ReduceLROnPlateau
from keras.layers import Dense, Conv2D, Flatten, GlobalAveragePooling2D, Dropout, MaxPooling2D, BatchNormalization, GlobalMaxPooling2D


In [None]:
# set seed for reproducability
seed=1234
rn.seed(seed)
np.random.seed(seed)
tf.random.set_seed(seed)
os.environ["PYTHONHASHSEED"]=str(seed)

In [None]:
# Downloading basic model
#                           Model name           input shape                          pretained weight       
base_model =tf.keras.applications.MobileNetV2(input_shape=(256,256,3),include_top=False,weights="imagenet")

# if you use bigger model give more accuracy but you will get less from rate
# if you use small model gives less accuracy but you will get high from rate



In [None]:
# set all layers trainable
for layer in base_model.layers:
    layer.trainable=True


In [None]:
# build model according to your output
def build_model():
    model= Sequential()
    model.add(base_model)
    model.add(GlobalAveragePooling2D())
    model.add(Dropout(0.3))   #change dropout to get better result
    model.add(Dense(1,activation="sigmoid")) # output is 0 or 1 binary
    
    # now complie model
    #we are using adam we can use another one to optimize better
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.00005,beta_1=.9,beta_2=0.999,amsgrad=False)
    #number of positive image  is less than the number og negative images
    #we use AUC metrics
    metrics = tf.keras.metrics.AUC(name="auc")
    model.compile(loss="binary_crossentropy",optimizer=optimizer,metrics=metrics)
    print(model.summary())
    return model
model =build_model()


In [None]:
df=pd.read_csv("../input/jpeg-melanoma-256x256/train.csv")

In [None]:
df     # we will use only image_name and target

# Total number of dataset images is 33126

In [None]:
# let's see the number of positive and negative image
a,b=np.unique(df["target"],return_counts=True)

In [None]:
a

In [None]:
b    #32542 negative images and only 584 positive image

# That is why we use " metrics = tf.keras.metrics.AUC(name="auc")"
# because number of negative images is grater

In [None]:
# split training dataframe into train and validation

In [None]:
from sklearn.model_selection import train_test_split
train,valid=train_test_split(df,test_size=0.2)


In [None]:
#now split train to get some image for testing
train,test=train_test_split(train,test_size=.01)

In [None]:
train

In [None]:
test

In [None]:
valid

In [None]:
# add a new column to train,test, and valid tables, containing image name+".jpg"
train["image_name_jpg"]=train["image_name"]+".jpg"
test["image_name_jpg"]=test["image_name"]+".jpg"
valid["image_name_jpg"]=valid["image_name"]+".jpg"

In [None]:
# convert target column to string
train["target"]=train["target"].astype(str)
test["target"]=test["target"].astype(str)
valid["target"]=valid["target"].astype(str)

In [None]:
# Create train_generator and validation_generator
# We can add other augmentation for example vertical_flip, random cropping, etc to get better accuracy
train_datagen=tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1/255,
    horizontal_flip=True
)

In [None]:
# we divide by 255 for testing in android app
test_datagen=tf.keras.preprocessing.image.ImageDataGenerator(rescale=1/255)
 

In [None]:
train_generator=train_datagen.flow_from_dataframe(
    dataframe=train,
    directory="../input/jpeg-melanoma-256x256/train/",
    x_col="image_name_jpg",    # name+".jpg"
    y_col="target",
    target_size=(255,255),
    batch_size=32,
    class_mode="binary"
)

In [None]:
validation_generator=test_datagen.flow_from_dataframe(
    dataframe=valid,
    directory="../input/jpeg-melanoma-256x256/train/",
    x_col="image_name_jpg",
    y_col="target",
    target_size=(255,255),
    batch_size=32,
    class_mode="binary"
)

In [None]:
# train model
model.fit_generator(
    train_generator,
    epochs=3,   # you have to set or use early stopping to stop before overfiting
    shuffle=True,
    validation_data=validation_generator
)

In [None]:
# After 3 epoch it start to overfit
#train it 3 epoch only
# When the trainging is done save model in tflite format which is faster but accuracy decreases

converter =tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model=converter.convert()
# save model
with open ("model.tflite","wb") as f:
    f.write(tflite_model)