In [12]:
import tensorflow as tf
import numpy as np
import tensorflow.keras
import tensorflow.keras.backend as K
import cv2
import os
import math
import scipy as sp
import PIL

#import scikitplot as askplt
import matplotlib.pyplot as plt
#import efficientnet.tfkeras as efn

from tensorflow.keras import models, layers, Model, regularizers
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.layers import Dense, Dropout, Flatten, GlobalAveragePooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard, ReduceLROnPlateau
from tensorflow.keras import optimizers
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications import EfficientNetB4, EfficientNetB6, ResNet50V2

from PIL import Image
import pandas as pd
import os

from sklearn.metrics import roc_auc_score

## Hyper parameter setting

In [13]:
batch_size = 21
epochs = 20
LearningRate = 3e-3
Decay = 1e-6
img_width = 224
img_height = 224

In [14]:
MODEL_DIRECTORY = "MODEL\\"
TENSB_DIRECTORY = "TENSORBOARD\\"

## Read metafile (dataframe)
sample 함수를 활용해서 dataframe row를 shuffle 하는 이유: 한 batch 안에 label이 모두 동일 하다면, 학습이 잘못된 방향으로 진행 됨

In [15]:
Data = pd.read_csv("HAM10000_metadata.csv")
Data['image_id'] = Data['image_id'] + '.jpg'
Data = Data.sample(frac=1)
Data.head()

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization
5524,HAM_0003085,ISIC_0027809.jpg,nv,follow_up,45.0,female,trunk
3008,HAM_0000699,ISIC_0031575.jpg,nv,follow_up,50.0,male,lower extremity
4665,HAM_0001571,ISIC_0029156.jpg,nv,follow_up,55.0,female,lower extremity
6945,HAM_0007295,ISIC_0031380.jpg,nv,histo,60.0,female,chest
803,HAM_0004224,ISIC_0026248.jpg,bkl,confocal,80.0,male,face


## Image Generator

In [16]:
DATAGEN_TRAIN = ImageDataGenerator(
    rescale=1./255,
    rotation_range=15,
    width_shift_range=0.13,
    height_shift_range=0.13,
    shear_range=0.13,
    zoom_range=0.13, 
    horizontal_flip=True,
    vertical_flip=True,
    data_format="channels_last",
    brightness_range=[0.2,1.2],
    validation_split=0.10)

In [17]:
TRAIN_GENERATOR = DATAGEN_TRAIN.flow_from_dataframe(
    Data,
    directory='DATA_TRAIN',
    x_col='image_id',
    y_col='dx', 
    target_size=(img_height, img_width), 
    color_mode='rgb',
    class_mode='categorical', 
    batch_size=batch_size,
    shuffle=True,
    subset='training')

Found 4500 validated image filenames belonging to 7 classes.




In [18]:
VALID_GENERATOR = DATAGEN_TRAIN.flow_from_dataframe(
    Data,
    directory='DATA_TRAIN',
    x_col='image_id',
    y_col='dx', 
    target_size=(img_height, img_width), 
    color_mode='rgb',
    class_mode='categorical', 
    batch_size=batch_size,
    shuffle=True,
    subset='validation')

Found 500 validated image filenames belonging to 7 classes.


In [19]:
DATAGEN_TEST = ImageDataGenerator(
    rescale=1./255,
    data_format="channels_last")

TEST_GENERATOR = DATAGEN_TEST.flow_from_dataframe(
    Data,
    directory='DATA_TEST',
    x_col='image_id',
    y_col='dx', 
    target_size=(img_height, img_width), 
    color_mode='rgb',
    class_mode='categorical', 
    batch_size=batch_size,
    shuffle=False,)

Found 5015 validated image filenames belonging to 7 classes.




'binary'      :   [0] or   [1] # Sigmoid & binary_crossentropy -> Binary classification or Mutli-label classification <br>
'categorical' : [1,0] or [0,1] # Softmax & categorical_crossentropy -> Multi-class classification <br>

## 임의의 batch로 이미지와 label을 가져와서, 분포를 확인
(분포 확인 안했을 때, 한 batch에서 모든 class의 label이 동일하다 -> 이러면 학습에 문제가 발생

In [20]:
Images, Labels = VALID_GENERATOR.__getitem__(0)

In [21]:
Labels

array([[0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1., 0.],
       [0., 1., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0.]], dtype=float32)

## Model architecutre & Fine-tunning

7가지 class 중 하나를 선택 해야하는 Multi-class problem<br>
-> 따라서 activation function: softmax<br>
-> unit = 7개<br>
<br>
compile loss: categorical_crossentropy

In [22]:
ResNet50 = tf.keras.applications.ResNet50(include_top=False, weights='imagenet',
                                          input_tensor=None, input_shape=(img_width,img_height,3), pooling=None)
x = GlobalAveragePooling2D()(ResNet50.output)
Prediction = Dense(7, activation='softmax')(x)
model = Model(inputs=ResNet50.input, outputs=Prediction)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5


In [23]:
model.compile(
    loss='categorical_crossentropy',
    optimizer='adam',
    metrics=['acc']
)

In [24]:
CP = ModelCheckpoint(filepath=MODEL_DIRECTORY+
                     'ResNet50-RESNET-Softmax-{epoch:03d}-{loss:.4f}-{acc:.4f}-{val_loss:.4f}-{val_acc:.4f}.hdf5',
     monitor='val_loss', verbose=1, save_best_only=True, mode='min')

TB = TensorBoard(log_dir=TENSB_DIRECTORY, write_graph=True, write_images=True, profile_batch = 100000000)
LR = ReduceLROnPlateau(monitor='val_loss',factor=0.5,patience=5, verbose=1, min_lr=0.00005)
CALLBACK = [CP, TB, LR]

In [None]:
model.fit(
    TRAIN_GENERATOR,
    validation_data  = VALID_GENERATOR,
    epochs=3,
    callbacks=CALLBACK,
    shuffle=True)

Epoch 1/3
  7/215 [..............................] - ETA: 1:02:39 - loss: 1.4092 - acc: 0.5986

#### 강사가 보내준 완벽하게 학습되지 않은 애매한 모델

In [None]:
model.load_weights("MODEL/ResNet50-Softmax-028-0.4848-0.8207-0.8499-0.7000.hdf5")

In [None]:
model.evaluate(TEST_GENERATOR)

# Class Activation Map

Conv output value를 가져오고자 하는 layer: conv5_block3_out

In [None]:
model.summary()

conv5_block3_out에서 출력되는 값을 가져오기 위한 모델, CAM_MODEL 정의

In [None]:
CAM_MODEL = Model(inputs=model.input, outputs=[model.get_layer('conv5_block3_out').output, model.output])

In [None]:
Images, Labels = TEST_GENERATOR.__getitem__(0)

In [None]:
ConvOutput, Predicted = CAM_MODEL.predict(Images[:1])
print(Predicted.round(3))
ConvOutput = ConvOutput[0, :, :, :]
Weights = CAM_MODEL.get_layer('dense').get_weights()[0][:, 5]

In [None]:
print(ConvOutput.shape)
print(Weights.shape)

##### Heatmap * weight 의 합

In [None]:
Heatmap = np.ndarray(shape=(7, 7), dtype='uint8')
for i in range(2048):
    Heatmap = Heatmap + ConvOutput[:, :, i] * Weights[i]

In [None]:
Heatmap = cv2.resize(Heatmap, (224, 224))
Heatmap /= Heatmap.max()

In [None]:
fig, ax = plt.subplots( nrows=1, ncols=2 )  # create figure & 1 axis
fig.set_size_inches(6, 3.0)
ax = plt.subplot(1,2,1)
ax.imshow(Images[0])

ax = plt.subplot(1,2,2)
ax.imshow(Images[0])
ax.imshow(Heatmap, cmap=plt.cm.jet, alpha=0.3, interpolation='nearest')