In [43]:
!pip install keras==2.2.4

You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [1]:
import torch
import torchvision
import torchvision.models as models
from PIL import Image, ImageFile
import numpy as np
import matplotlib.pyplot as plt
import os
import cv2
import gc; gc.enable()
import pandas as pd
from torch.utils.data import Dataset
import tensorflow as tf
from tensorflow import keras
from torchvision import transforms

In [2]:
import warnings
warnings.simplefilter(action="ignore", category=FutureWarning)
# keras imports
from keras.models import Model
from keras.models import Sequential
from keras.models import model_from_json
from keras.layers import Input
# other imports
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import cohen_kappa_score
import numpy as np
import glob
import h5py
import json
import datetime
import time

Using TensorFlow backend.


In [3]:
import efficientnet.keras as efn 

f7 = efn.EfficientNetB7(include_top = False, weights='imagenet')

Instructions for updating:
Shapes are always computed; don't use the compute_shapes as it has no effect.


In [4]:
from tensorflow.keras.callbacks import Callback
# Specify title of our final model
SAVED_MODEL_NAME = 'effnet_modelB7.h5'
def get_preds_and_labels(model, generator):
    """
    Get predictions and labels from the generator
    """
    preds = []
    labels = []
    for _ in range(int(np.ceil(generator.samples / batch_size))):
        x, y = next(generator)
        preds.append(model.predict(x))
        labels.append(y)
    # Flatten list of numpy arrays
    return np.concatenate(preds).ravel(), np.concatenate(labels).ravel()

class Metrics(Callback):
    """
    A custom Keras callback for saving the best model
    according to the Quadratic Weighted Kappa (QWK) metric
    """
    def on_train_begin(self, logs={}):
        """
        Initialize list of QWK scores on validation data
        """
        self.val_kappas = []

    def on_epoch_end(self, epoch, logs={}):
        """
        Gets QWK score on the validation data
        """
        # Get predictions and convert to integers
        y_pred, labels = get_preds_and_labels(model, val_generator)
        y_pred = np.rint(y_pred).astype(np.uint8).clip(0, 4)
        # We can use sklearns implementation of QWK straight out of the box
        # as long as we specify weights as 'quadratic'
        _val_kappa = cohen_kappa_score(labels, y_pred, weights='quadratic')
        self.val_kappas.append(_val_kappa)
        print(f"val_kappa: {round(_val_kappa, 4)}")
        if _val_kappa == max(self.val_kappas):
            print("Validation Kappa has improved. Saving model.")
            self.model.save(SAVED_MODEL_NAME)
        return

In [6]:
gc.collect()

0

In [5]:
train_dir = '/Users/flatironschool/Documents/Kaggle/Kaggle-DR-detection/data/'
img_dir = train_dir + 'train/'

In [6]:
train_df = pd.read_csv(train_dir+'dr15labels_2.csv')
train_df_2 = pd.read_csv('/Users/flatironschool/Documents/Kaggle/Kaggle-DR-detection/drlabels.csv')

In [7]:
sub = train_df.sample(frac = 0.2)
sub2 = train_df_2.sample(frac = 0.2)
subs = pd.concat([sub,sub2])
val_sub = train_df.sample(frac = 0.05)
val_sub2 = train_df_2.sample(frac = 0.05)
val_subs = pd.concat([val_sub,val_sub2])

In [8]:
subs.columns = ['id_code','diagnosis']
subs['diagnosis'] = subs['diagnosis'].astype(str)
for ind in subs.index:
    diag = subs['diagnosis'][ind]
    subs['id_code'][ind] = img_dir+ str(diag) +'/'+ subs['id_code'][ind] +'.png'

In [9]:
set_trainable = False
for layer in f7.layers:
    if 'block3' in layer.name:
        set_trainable = True
    if set_trainable:
        layer.trainable = True
    else:
        layer.trainable = False

In [53]:
val_subs.columns = ['id_code','diagnosis']
val_subs['diagnosis'] = val_subs['diagnosis'].astype(str)
for ind in val_subs.index:
    diag = val_subs['diagnosis'][ind]
    val_subs['id_code'][ind] = img_dir+ str(diag) +'/'+ val_subs['id_code'][ind] +'.png'

In [63]:
train = pd.concat([train_df,train_df_2])
train.columns = ['id_code','diagnosis']
train.reindex()

In [66]:
for ind in train.index:
    diag = train['diagnosis'][ind]
    train['id_code'][ind] = img_dir+ str(diag) +'/'+ train['id_code'][ind] +'.png'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until


KeyboardInterrupt: 

In [10]:
from keras import layers
def build_model():
    """
    A custom implementation of EfficientNetB7
    for the APTOS 2019 competition
    """
    model = Sequential()
    model.add(f7)
    model.add(layers.GlobalAveragePooling2D())
    model.add(layers.Dropout(0.5))
    model.add(layers.Dense(512))
    model.add(layers.Dense(5, activation='elu'))
#     model.add(layers.Dense(1, activation="linear"))
#     print(model.summary())
    return model

# Initialize model
model = build_model()

In [13]:
pd.set_option('max_colwidth', -1)

layers = [(layer, layer.name, layer.trainable) for layer in f7.layers]
df_layers = pd.DataFrame(layers, columns=['Layer Type', 'Layer Name', 'Layer Trainable'])
df_layers

Unnamed: 0,Layer Type,Layer Name,Layer Trainable
0,<keras.engine.input_layer.InputLayer object at 0x14116da58>,input_1,False
1,<keras.layers.convolutional.Conv2D object at 0x1437077b8>,stem_conv,False
2,<keras.layers.normalization.BatchNormalization object at 0x143fb8518>,stem_bn,False
3,<keras.layers.core.Activation object at 0x143fb8da0>,stem_activation,False
4,<keras.layers.convolutional.DepthwiseConv2D object at 0x143fb8eb8>,block1a_dwconv,False
5,<keras.layers.normalization.BatchNormalization object at 0x144470198>,block1a_bn,False
6,<keras.layers.core.Activation object at 0x144470588>,block1a_activation,False
7,<keras.layers.pooling.GlobalAveragePooling2D object at 0x144394898>,block1a_se_squeeze,False
8,<keras.layers.core.Reshape object at 0x144470390>,block1a_se_reshape,False
9,<keras.layers.convolutional.Conv2D object at 0x144470400>,block1a_se_reduce,False


In [None]:
# training_generator = BalancedBatchGenerator(X_train, y_train,
#                                                 batch_size=1000,
#                                                 random_state=42)
#     model.fit_generator(generator=training_generator, epochs=5, verbose=1)
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from keras import optimizers
datagen = keras.preprocessing.image.ImageDataGenerator(validation_split = 0.15)
batch_size = 32
train_steps_per_epoch = len(subs)//batch_size
adm = optimizers.Adam(lr = 0.0001)

train_generator = datagen.flow_from_dataframe(subs, 
                                              x_col = 'id_code', 
                                              y_col = 'diagnosis',
                                              target_size = (224,224),
                                              batch_size = batch_size,
                                              class_mode ='categorical',
                                              subset = 'training') 
val_generator = datagen.flow_from_dataframe(subs, 
                                            x_col='id_code', 
                                            y_col='diagnosis',
                                            target_size=(224, 224),
                                            batch_size=batch_size,
                                            class_mode='categorical',
                                            subset = 'validation')
# For tracking Quadratic Weighted Kappa score
kappa_metrics = Metrics()
es = EarlyStopping(monitor='val_loss', mode='auto', verbose=1, patience=10)
rlr = ReduceLROnPlateau(monitor='val_loss', 
                        factor=0.5, 
                        patience=3, 
                        verbose=1, 
                        mode='auto', 
                        epsilon=0.0001)

model.compile(optimizer = adm, loss = 'categorical_crossentropy')
history = model.fit_generator(train_generator,
                              steps_per_epoch=train_steps_per_epoch, 
                              validation_data=val_generator,
                              validation_steps = val_generator.samples // batch_size,
                              epochs=150, verbose=1, callbacks=[kappa_metrics, es, rlr])

Found 4248 validated image filenames belonging to 5 classes.
Found 749 validated image filenames belonging to 5 classes.


  .format(n_invalid, x_col)


Epoch 1/150

In [None]:
plt.plot(history.history['loss'])
plt.savefig('loss_f7')

In [None]:
model_json = model.to_json()
with open("f7.json", "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
model.save_weights("f7.h5")
print("Saved model to disk")

In [None]:
get_last_pretrained_layer = keras.backend.function([model.layers[0].input],
                                  [model.layers[131].output])

In [None]:
train_features = np.zeros(shape=(21972, 7, 7, 2048))
train_labels = np.zeros(shape=(21972,5))

In [None]:
i = 0
for inputs_batch, labels_batch in train_generator:
    features_batch = get_last_pretrained_layer(inputs_batch)[0]
    train_features[i : i + len(inputs_batch)] = features_batch
    train_labels[i : i +len(inputs_batch)] = labels_batch
    i += len(inputs_batch)
    print(i)
    if i+2 > len(train_features):
        break
         
train_features = np.reshape(train_features, (21972, 7 * 7 * 2048))

In [None]:
import gc; gc.enable()
del model, xception, train_generator; gc.collect()

In [None]:
from sklearn.model_selection import train_test_split
feat_train, feat_test, labels_train, labels_test = train_test_split(train_features, train_labels, test_size = 0.2)

In [None]:
oof_preds = np.zeros(train.shape[0])
sub_preds = np.zeros(test.shape[0])

feature_importance_df = pd.DataFrame()

In [None]:
from sklearn.model_selection import StratifiedKFold, KFold
import xgboost as xgb
params = {
    'objective': 'multi:softprob',  # error evaluation for multiclass training
    'num_class': 5,
}

In [None]:
for n_fold, (trn_idx, val_idx) in enumerate(folds.split(train, y_train)):
    
    trn_x, trn_y = train[train_cols].iloc[trn_idx], y_train.iloc[trn_idx]
    val_x, val_y = train[train_cols].iloc[val_idx], y_train.iloc[val_idx]
    gc.collect()
    dtrain = xgb.DMatrix(trn_x, trn_y, feature_names=trn_x.columns)
    dval = xgb.DMatrix(val_x, val_y, feature_names=val_x.columns)
    gc.collect()
    
    clf = xgb.train(params=params, dtrain=dtrain, num_boost_round=1500, evals=[(dtrain, "Train"), (dval, "Val")],
        verbose_eval= 250, early_stopping_rounds=100) 
    gc.collect()
    
    oof_preds[val_idx] = clf.predict(xgb.DMatrix(val_x))
    sub_preds += clf.predict(xgb.DMatrix(test[train_cols])) / folds.n_splits
    gc.collect()
    
    xgbfir.saveXgbFI(clf, feature_names=trn_x.columns, OutputXlsxFile='ieee_xgbfir_%sFold.xlsx'%str(n_fold+1), MaxInteractionDepth=9, MaxHistograms=15)
    gc.collect()
    fold_importance_df = pd.DataFrame()
    fold_importance_df["feature"] = pd.DataFrame.from_dict(data=clf.get_fscore(), orient="index", columns=["FScore"])["FScore"].index
    fold_importance_df["fscore"] = pd.DataFrame.from_dict(data=clf.get_fscore(), orient="index", columns=["FScore"])["FScore"].values
    fold_importance_df["fold"] = n_fold + 1
    feature_importance_df = pd.concat([feature_importance_df, fold_importance_df], axis=0)
    gc.collect()
    
    print('\nFold %2d AUC %.6f & std %.6f' %(n_fold + 1, roc_auc_score(val_y, oof_preds[val_idx]), np.std([oof_preds[val_idx]])))
    gc.collect()

print('\nCV AUC score %.6f & std %.6f' % (roc_auc_score(y_train, oof_preds), np.std((oof_preds))))


In [None]:
X = feat_train
bleh, y = np.where(labels_train)

xgb_model = xgb.XGBClassifier(objective="multi:softprob", random_state=42)
xgb_model.fit(X, y)

y_pred = xgb_model.predict(feat_test)

In [None]:
from sklearn.metrics import confusion_matrix, cohen_kappa_score
bleh, y_test = np.where(labels_test)
print(cohen_kappa_score(y_test, y_pred))