In [None]:
import numpy as np
import pandas as pd
import os
import gc
import cv2
import tensorflow as tf
import seaborn as sn
import matplotlib.pyplot as plt
import keras
from keras.layers import *
from keras.models import Model
from keras.utils import plot_model
from keras.applications.resnet_v2 import ResNet50V2
from keras.preprocessing.image import ImageDataGenerator
from tqdm import tqdm
from sklearn.model_selection import train_test_split


print(tf.__version__)

In [None]:
training = pd.read_csv('../input/siim-isic-melanoma-classification/train.csv')
print(training.head(10))

In [None]:
print(training["target"].value_counts())

We take a very small sample of the benign data to balance out the malignant data.

In [None]:
m = training[training["target"]==1]
b = training[training["target"]==0].sample(3000)
df = pd.concat([m,b])        
df.reset_index(inplace=True)
df.drop(labels=["index", "patient_id", "sex", "age_approx", "anatom_site_general_challenge", "diagnosis", "benign_malignant"], axis=1, inplace=True)
df["image_name"] = "../input/images-siim-512x512/train/train_512x512/" + df["image_name"].astype(str) + ".jpg"
df.head()

In [None]:
imgs = []
labels = []
for index, row in tqdm(df.iterrows()):
    img = cv2.imread(str(row["image_name"]))
    img = cv2.resize(img, (300,300))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    imgs.append(img)
    labels.append(row["target"])
imgs = np.array(imgs)
labels = np.array(labels)
print(imgs.shape)
print(labels.shape)

In [None]:
with open("res_imgs4k.npz", "wb") as file:
    np.savez_compressed(file, images=imgs)

In [None]:
with open("./res_imgs4k.npz", "rb") as file:
    imgs = np.load(file)["images"]

Note that we set the random_state in the train-test split as 888. This means the model will be very lucky.

In [None]:
labels = np.concatenate([np.full(584,1),np.full(3000,0)])
print(imgs.shape)
print(labels.shape)

In [None]:
trainX, valX, trainY, valY = train_test_split(imgs, labels, test_size=0.2, random_state=888)
plt.imshow(trainX[0])
del imgs, labels
gc.collect()

In [None]:
# trainX, valX, trainY, valY = train_test_split(
#     df["image_name"], 
#     df["target"],
#     test_size = 0.2, 
#     random_state = 888
# )
# train = list(zip(trainX, trainY))
# train = pd.DataFrame(train, columns = ["images", "target"])
# val = list(zip(valX, valY))
# val = pd.DataFrame(val, columns = ["images", "target"])

# train.head()

In [None]:
train_aug = ImageDataGenerator(rescale=1./255,
                     rotation_range=80,
                     width_shift_range=0.25, 
                     height_shift_range=0.25,
                     shear_range=0.2,
                     horizontal_flip=True,
                     vertical_flip=True)

val_aug = ImageDataGenerator(rescale=1./255)

In [None]:
train_gen = train_aug.flow(trainX, trainY, batch_size = 12, shuffle = True)
val_gen = val_aug.flow(valX, valY, batch_size = 12, shuffle = False)

Flow from dataframe reads in images from disk given the filepaths (stored under column "images").

In [None]:
# train_gen = train_aug.flow_from_dataframe(train, x_col="images", y_col="target", batch_size = 8, target_size=(224,224),shuffle = True, class_mode="raw")
# val_gen = val_aug.flow_from_dataframe(val, x_col="images", y_col="target", batch_size = 8, target_size=(224,224),shuffle = False, class_mode="raw")

In [None]:
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import ModelCheckpoint
early_stop = EarlyStopping(monitor='val_loss', patience=2)
checkpoint = ModelCheckpoint("{val_loss:.2f}-{epoch:02d}.hdf5",monitor = 'val_loss',verbose = 1,save_best_only = True,mode = 'min')

callbacks = [early_stop, checkpoint]

In [None]:
optimizer = keras.optimizers.Adam(lr=1e-3)
auc = keras.metrics.AUC()

ResNet with our own top layer. ResNet is frozen.

In [None]:
!pip install git+https://github.com/qubvel/efficientnet

In [None]:
import efficientnet.keras as efn 

In [None]:
effnet = efn.EfficientNetB3(
    include_top=True,
    weights="imagenet",
    input_shape=(300,300,3)
)


In [None]:
effnet.trainable=False
flat = Flatten()(effnet.output)
# gap = GlobalAveragePooling2D()(resnet.output)
final = Dropout(0.2)(flat)
final = Dense(256, activation='relu', kernel_regularizer=keras.regularizers.l2(0.001))(final)
final = Dropout(0.2)(final)
final = Dense(1, activation="sigmoid")(final)
model = Model(effnet.input,final)
model.summary()

In [None]:
model.compile(loss="binary_crossentropy", metrics=["accuracy"], optimizer="adam")

history = model.fit_generator(
    train_gen,
    steps_per_epoch = trainX.shape[0] // 12,
    epochs = 3, 
    validation_data = val_gen,
    validation_steps = valX.shape[0] // 12
)

# model.save('resnet2.h5')

In [None]:
for layer in effnet.layers[120:]:
   layer.trainable = True
for i, layer in enumerate(effnet.layers):
   print(i, layer.name, layer.trainable)

In [None]:
opt = keras.optimizers.Adamax(learning_rate=1e-3)

In [None]:
model.compile(loss="binary_crossentropy", metrics=["accuracy"], optimizer=opt)

history = model.fit_generator(
    train_gen,
    steps_per_epoch = trainX.shape[0] // 12,
    epochs = 5, 
    validation_data = val_gen,
    validation_steps = valX.shape[0] // 12,
)

In [None]:
history = model.fit_generator(
    train_gen,
    steps_per_epoch = trainX.shape[0] // 12,
    epochs = 10, 
    validation_data = val_gen,
    validation_steps = valX.shape[0] // 12,
    callbacks = callbacks
)

In [None]:
hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch

plt.figure()
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.plot(hist['epoch'], hist['loss'], label='Train Error')
plt.plot(hist['epoch'], hist['val_loss'], label='Val Error')
plt.ylim([0, 0.5])
plt.legend()

plt.figure()
plt.xlabel('Epoch')
plt.ylabel('accuracy')
plt.plot(hist['epoch'], hist['accuracy'], label='Train Acc')
plt.plot(hist['epoch'], hist['val_accuracy'], label='Val Acc')
plt.ylim([0, 1])
plt.legend()
plt.show()