In [3]:
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras_preprocessing.image import ImageDataGenerator
from keras.layers import Dense, Activation, Flatten, Dropout, BatchNormalization
from keras.layers import Conv2D, MaxPooling2D
from keras import regularizers, optimizers
import pandas as pd
import numpy as np

def append_ext(fn):
    return fn+".jpg"

traindf=pd.read_csv('D:\\UNI\\5º Ano - 2º Semestre - 2020-2021 - TESE\\Dataset\\ISIC-2017_Training_Part3_GroundTruth.csv',dtype=str)
testdf=pd.read_csv('D:\\UNI\\5º Ano - 2º Semestre - 2020-2021 - TESE\\Dataset\\ISIC-2017_Test_v2_Part3_GroundTruth.csv',dtype=str)
traindf['image_id']=traindf['image_id'].apply(append_ext)
testdf['image_id']=testdf['image_id'].apply(append_ext)

In [4]:
#create a label column
label=[0]*traindf.shape[0]
for i in range(traindf.shape[0]):
    if traindf['melanoma'][i] == '1.0':
        label[i]='1'
    else:
        label[i]='0'
traindf['label']=label    

label=[0]*testdf.shape[0]
for i in range(testdf.shape[0]):
    if testdf['melanoma'][i] == '1.0':
        label[i]='1'
    else:
        label[i]='0'
testdf['label']=label

In [5]:
X = traindf.loc[:,'image_id']
y = traindf.loc[:,'label']

In [6]:
from sklearn.model_selection import train_test_split
train_x, val_x, train_y, val_y = train_test_split(X, y, 
                                                  test_size = 0.25, 
                                                  random_state = 27, 
                                                  stratify=y)

In [7]:
df_train = pd.DataFrame(columns=['image_id','label'])
df_train['image_id'] = train_x
df_train['label'] = train_y

df_val= pd.DataFrame(columns=['image_id','label'])
df_val['image_id'] = val_x
df_val['label'] = val_y

In [8]:
df_train['label'].value_counts()

0    1220
1     280
Name: label, dtype: int64

In [9]:
from sklearn.utils import resample
non_melanoma = df_train[df_train['label']=='0']
melanoma = df_train[df_train['label']=='1']

# upsample minority -> some samples of melanoma will be duplicated
melanoma_upsampled = resample(melanoma,
                          replace=True, # sample with replacement
                          n_samples=len(non_melanoma), # match number in majority class
                          random_state=27) # reproducible results


# combine majority and upsampled minority alternately
# by reseting indexes, both dataframes now start at 0, 1, 2, etc. and are then sorted
df_train_upsampled = pd.concat([non_melanoma.reset_index(), melanoma_upsampled.reset_index()]).sort_index()
print(df_train_upsampled)


df_train_upsampled['label'].value_counts()

      index          image_id label
0      1864  ISIC_0014658.jpg     0
0       387  ISIC_0000482.jpg     1
1       689  ISIC_0010016.jpg     1
1      1324  ISIC_0012986.jpg     0
2      1078  ISIC_0012182.jpg     0
...     ...               ...   ...
1217   1055  ISIC_0012102.jpg     0
1218   1361  ISIC_0013079.jpg     1
1218    658  ISIC_0009963.jpg     0
1219   1996  ISIC_0015233.jpg     0
1219   1031  ISIC_0011348.jpg     1

[2440 rows x 3 columns]


0    1220
1    1220
Name: label, dtype: int64

In [10]:
#uncomment according to the architecture being used
# IMG_SIZE = 224 #B0
# IMG_SIZE = 240 #B1
# IMG_SIZE = 260 #B2
IMG_SIZE = 300 #B3
# IMG_SIZE = 380 #B4
# IMG_SIZE = 456 #B5
# IMG_SIZE = 528 #B6
# IMG_SIZE = 600 #B7

In [11]:
train_datagen=ImageDataGenerator(rotation_range=5,  # rotation
                                 width_shift_range=0.2,  # horizontal shift
                                 zoom_range=0.2,  # zoom
                                 horizontal_flip=True,  # horizontal flip
                                 brightness_range=[0.2,0.8] # brightness
                                 #rescale=1./255.
                                )  

train_generator=train_datagen.flow_from_dataframe(dataframe=df_train_upsampled,
                                            directory='D:\\UNI\\5º Ano - 2º Semestre - 2020-2021 - TESE\\Dataset\\ISIC-2017_Training_Data\\',
                                            x_col='image_id',
                                            y_col='label',
                                            batch_size=16,
                                            seed=42,
                                            shuffle=False,
                                            class_mode='binary',
                                            target_size=(IMG_SIZE,IMG_SIZE)
                                           )

valid_datagen=ImageDataGenerator()#rescale=1./255.)
valid_generator=valid_datagen.flow_from_dataframe(dataframe=df_val,
                                            directory='D:\\UNI\\5º Ano - 2º Semestre - 2020-2021 - TESE\\Dataset\\ISIC-2017_Training_Data\\',
                                            x_col='image_id',
                                            y_col='label',
                                            batch_size=16,
                                            seed=42,
                                            shuffle=False,
                                            class_mode='binary',
                                            target_size=(IMG_SIZE,IMG_SIZE)
                                           )

test_datagen=ImageDataGenerator()#rescale=1./255.)
test_generator=test_datagen.flow_from_dataframe(dataframe=testdf,
                                                directory='D:\\UNI\\5º Ano - 2º Semestre - 2020-2021 - TESE\\Dataset\\ISIC-2017_Test_v2_Data\\',
                                                x_col='image_id',
                                                y_col=None,
                                                batch_size=30,
                                                seed=40,
                                                shuffle=False,
                                                class_mode=None,
                                                target_size=(IMG_SIZE,IMG_SIZE)
                                               )

Found 2440 validated image filenames belonging to 2 classes.
Found 500 validated image filenames belonging to 2 classes.
Found 600 validated image filenames.


In [13]:
from tensorflow.keras.applications import EfficientNetB3
# Create the base model from the pre-trained model EfficientNet
base_model = EfficientNetB3(include_top=False, weights="imagenet")

In [15]:
from tensorflow.keras import layers
from tensorflow.keras.layers.experimental import preprocessing
len(base_model.layers)

384

In [21]:
from tensorflow.keras import layers
from tensorflow.keras.layers.experimental import preprocessing

IMG_SIZE=300
top_model = Sequential()
top_model.add(Flatten(input_shape=(IMG_SIZE, IMG_SIZE, 3)))
top_model.add(Dropout(0.5))
top_model.add(Dense(1, activation='sigmoid'))

# note that it is necessary to start with a fully-trained
# classifier, including the top classifier,
# in order to successfully do fine-tuning
top_model.load_weights(top_model_weights_path)

# add the model on top of the convolutional base
base_model.add(top_model)

# set the first 25 layers (up to the last conv block)
# to non-trainable (weights will not be updated)
for layer in base_model.layers[:384]:
    layer.trainable = False

optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
base_model.compile(optimizer=optimizer, loss="binary_crossentropy", metrics=["accuracy"])

base_model.summary()

NameError: name 'top_model_weights_path' is not defined

In [13]:
for l in model.layers:
    print(l.name, l.trainable)

input_2 True
efficientnetb3 False
global_average_pooling2d True
batch_normalization True
top_dropout True
pred True


In [12]:
#how this model performs on this data before fitting
STEP_SIZE_VALID=valid_generator.n//valid_generator.batch_size
model.evaluate(valid_generator,
               steps=STEP_SIZE_VALID)



[0.7263209223747253, 0.1854838728904724]

In [13]:
STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size

model.fit(train_generator,
          epochs=15,
          validation_data=valid_generator,
          verbose=1
         )

Epoch 1/15
Epoch 2/15
Epoch 3/15

KeyboardInterrupt: 

In [None]:
#how this model performs after fitting
STEP_SIZE_VALID=valid_generator.n//valid_generator.batch_size
model.evaluate(valid_generator,
               steps=STEP_SIZE_VALID)

In [None]:
test_generator.reset()

STEP_SIZE_TEST=test_generator.n//test_generator.batch_size
pred=model.predict(test_generator,
                   steps=STEP_SIZE_TEST,
                   verbose=1)

In [None]:
labels_pred=np.where(pred > 0.5, 1, 0)

In [None]:
from keras.preprocessing import image
import matplotlib.pyplot as plt
x = test_generator.next() #this unpacks the ImageDataGenerator which is composed by x(image) and y(label). In the case of test, there are not labels
for i in range(5):
    image = x[i]
    plt.title('Predicted Label: ' + str(labels_pred[i]))
    plt.imshow(image.astype(np.uint8))
    plt.show()

In [None]:
import sklearn
sklearn.metrics.accuracy_score(testdf['label'].astype(int).to_numpy(), labels_pred)

In [None]:
confusion_matrix=sklearn.metrics.confusion_matrix(testdf['label'].astype(int).to_numpy(), labels_pred)

print('Confusion Matrix: \n' + str(confusion_matrix))

In [None]:
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score

fpr, tpr, threshold = roc_curve(testdf['label'].astype(int).to_numpy(),labels_pred)
roc_auc = metrics.auc(fpr, tpr)
print('AUC Score: ' + str(roc_auc))

# plot the roc curve for the model
plt.plot(fpr, tpr, linestyle='-', label='EfficientNet B3')
# axis labels
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
# show the legend
plt.legend()
# show the plot
plt.show()

In [None]:
jac=sklearn.metrics.jaccard_score(testdf['label'].astype(int).to_numpy(), labels_pred)
print('Jaccard Score: ' + str(jac))

In [None]:
model.save("EfficientNetB3_sigmoid.h5")
print("Saved model to disk")

# load model
#model = tf.keras.models.load_model('EfficientNetB3_sigmoid.h5')
#model.summary()