In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from keras import models, layers
from keras.preprocessing import image
from keras.applications import DenseNet121
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, cohen_kappa_score
import keras
import warnings
warnings.filterwarnings('ignore')

Using TensorFlow backend.


In [0]:
df = pd.read_csv('/content/train.csv')

In [0]:
df['img_path'] = df['id_code'].apply(lambda x: x + '.png')
df['diagnosis'] = df['diagnosis'].apply(lambda x: str(x))

In [0]:
df_x, df_y = df[['id_code', 'img_path']], df['diagnosis']

In [0]:
x_train, x_test, y_train, y_test = train_test_split(df_x, df_y, test_size=0.15, stratify= df_y, random_state=2019)

In [6]:
train = pd.concat([x_train, y_train], axis=1)
display(train.head())

test = pd.concat([x_test, y_test], axis=1)
display(test.head())

Unnamed: 0,id_code,img_path,diagnosis
345,194814669fee,194814669fee.png,1
645,2d870833c0c9,2d870833c0c9.png,2
3399,ebe0175e530c,ebe0175e530c.png,4
3290,e499434242cc,e499434242cc.png,2
2812,c2a58b2cfd0b,c2a58b2cfd0b.png,0


Unnamed: 0,id_code,img_path,diagnosis
73,05b1bb2bdb81,05b1bb2bdb81.png,0
2811,c280730cc211,c280730cc211.png,2
3656,ff8a0b45c789,ff8a0b45c789.png,4
1795,7ed4128b2a4e,7ed4128b2a4e.png,0
1682,76e589911303,76e589911303.png,1


In [7]:
rows = 5
cols = 5
fig = plt.figure(figsize=(cols * 5, rows * 5))
for i in range(rows * cols):
    img_path = '/content/train_images/'
    img_index = train['img_path'].iloc[i, ]
    full_img_path = img_path + img_index
    img = image.load_img(full_img_path, target_size = (224,224))
    fig.add_subplot(cols, rows, i + 1)
    plt.imshow(img)

Output hidden; open in https://colab.research.google.com to view.

In [0]:
train_gen = image.ImageDataGenerator(rescale = 1./255,
                                    width_shift_range = 0.2,
                                    height_shift_range = 0.2,
                                    shear_range = 0.2,
                                    zoom_range = 0.2,
                                    horizontal_flip = True,
                                    vertical_flip = True)

test_gen = image.ImageDataGenerator(rescale = 1./255)

In [9]:
img_directory = '/content/train_images'
batch_size = 20
train_generator = train_gen.flow_from_dataframe(train, 
                                                directory = img_directory, 
                                                x_col = 'img_path', 
                                                y_col = 'diagnosis', 
                                                target_size = (224,224), 
                                                class_mode = 'categorical', 
                                                seed = 2019, 
                                                shuffle = False,
                                                batch_size = batch_size)

test_generator = test_gen.flow_from_dataframe(test,
                                             directory = img_directory, 
                                             x_col = 'img_path', 
                                             y_col = 'diagnosis', 
                                             target_size=(224,224),
                                             class_mode = 'categorical', 
                                             seed= 2019,
                                             shuffle = False,
                                             batch_size = batch_size)

Found 3112 validated image filenames belonging to 5 classes.
Found 550 validated image filenames belonging to 5 classes.


In [10]:
test_generator_for_eval = test_gen.flow_from_dataframe(test,
                                     directory = img_directory, 
                                     x_col = 'img_path', 
                                     y_col = 'diagnosis', 
                                     target_size=(224,224),
                                     class_mode = 'categorical', 
                                     seed= 2019,
                                     shuffle = False,
                                     batch_size = 1)

Found 550 validated image filenames belonging to 5 classes.


In [0]:
def get_preds_and_label(model, generator = test_generator_for_eval):

  
    preds = []
    labels = []
    
    generator.reset()
    preds_ohe = model.predict_generator(generator, steps = generator.n // generator.batch_size)
    predict = np.argmax(preds_ohe, axis=1)
    true_label = np.array(generator.labels)
    
    preds.append(predict)
    labels.append(true_label)
    
    return(preds[0], labels[0])

In [0]:
class kappa_metrics(keras.callbacks.Callback):
       
    
    def on_train_begin(self, logs ={}):
        self.val_kappa = []
        self.val_acc = []

    def on_epoch_end(self, epoch, logs={}):
        
        y_pred, labels = get_preds_and_label(model)
        
        _val_kappa = cohen_kappa_score(labels, y_pred, weights='quadratic')
        _val_acc = accuracy_score(labels, y_pred)
        
        self.val_kappa.append(_val_kappa)
        self.val_acc.append(_val_acc)
        
        print(f"val_kapp : {_val_kappa}")
        print(f"val_acc : {_val_acc}")
        
        if _val_kappa == max(self.val_kappa):
            print('validation kapp has improved, Saving Model')
            self.model.save('kappa_model.h5')
        return

In [13]:
dense_net = DenseNet121(include_top = False, input_shape = (224,224,3), weights=None)











In [14]:
dense_net.summary()

Model: "densenet121"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
zero_padding2d_1 (ZeroPadding2D (None, 230, 230, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1/conv (Conv2D)             (None, 112, 112, 64) 9408        zero_padding2d_1[0][0]           
__________________________________________________________________________________________________
conv1/bn (BatchNormalization)   (None, 112, 112, 64) 256         conv1/conv[0][0]                 
________________________________________________________________________________________

In [15]:
model = models.Sequential()
model.add(dense_net)
model.add(layers.GlobalAveragePooling2D())
model.add(layers.Dropout(0.4))
model.add(layers.Dense(1024, activation='relu'))
model.add(layers.Dropout(0.4))
model.add(layers.Dense(5, activation='softmax'))

model_opt = keras.optimizers.Adam(lr = 0.0008, beta_1= 0.5)

model.compile(loss='categorical_crossentropy', optimizer=model_opt, metrics=['acc'])

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.



In [0]:
# Difine call back
call_kappa = kappa_metrics()
call_lr_reduce = keras.callbacks.ReduceLROnPlateau(monitor = 'val_loss', factor = .5, patience=4, verbose = 1, min_lr = 0)

In [0]:
model.fit_generator(train_generator, 
                    steps_per_epoch = np.ceil(train_generator.n // train_generator.batch_size),
                    epochs = 300,
                    callbacks = [call_lr_reduce],
                    validation_data = test_generator,
                    validation_steps = np.ceil(test_generator.n // test_generator.batch_size),
                    shuffle = False)

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300

Epoch 00007: ReduceLROnPlateau reducing learning rate to 0.00039999998989515007.
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300

Epoch 00011: ReduceLROnPlateau reducing learning rate to 0.00019999999494757503.
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300

Epoch 00017: ReduceLROnPlateau reducing learning rate to 9.999999747378752e-05.
Epoch 18/300
 18/155 [==>...........................] - ETA: 4:07 - loss: 0.7722 - acc: 0.7222