In [None]:
import json
import math
import os

import cv2
from PIL import Image
import numpy as np
import seaborn as sns
from keras import layers
from keras.applications import DenseNet121, MobileNetV2
from keras.callbacks import Callback, ModelCheckpoint
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential, load_model
from keras.optimizers import Adam
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import cohen_kappa_score, accuracy_score, confusion_matrix
import scipy
import tensorflow as tf
from tqdm import tqdm

%matplotlib inline

Set random seed for reproducibility.

# Loading & Exploration

In [None]:
train_df = pd.read_csv('../input/valid-and-test-ta/x_train_8.csv')
valid_df = pd.read_csv('../input/valid-and-test-ta/x_valid_8.csv')
test_df = pd.read_csv('../input/aptos2019-blindness-detection/test.csv')
print(train_df.shape)
print(valid_df.shape)
print(test_df.shape)
train_df.head()

In [None]:
#resample
from sklearn.utils import resample
X=train_df
normal=X[X.diagnosis==0]
mild=X[X.diagnosis==1]
moderate=X[X.diagnosis==2]
severe=X[X.diagnosis==3]
pdr=X[X.diagnosis==4]

#downsampled
mild = resample(mild,
                replace=True, # sample with replacement
                n_samples=700, # match number in majority class
                random_state=2020) # reproducible results
moderate = resample(moderate,
                    replace=False, # sample with replacement
                    n_samples=700, # match number in majority class
                    random_state=2020) # reproducible results
severe = resample(severe,
                  replace=True, # sample with replacement
                  n_samples=700, # match number in majority class
                  random_state=2020) # reproducible results
normal = resample(normal,
                  replace=False, # sample with replacement
                  n_samples=700, # match number in majority class
                  random_state=2020) # reproducible results
pdr = resample(pdr,
               replace=True, # sample with replacement
               n_samples=700, # match number in majority class
               random_state=2020) # reproducible results    

# combine minority and downsampled majority
sampled = pd.concat([normal, mild, moderate, severe, pdr])

# checking counts
sampled.diagnosis.value_counts()

train_df = sampled
train_df = train_df.sample(frac=1).reset_index(drop=True)

In [None]:
#Mengecek apakah ukuran sudah sesuai
print('Number of train samples: ', train_df.shape[0])
print('Number of test samples: ', valid_df.shape[0])

train_df.head()

In [None]:
valid_df['diagnosis'].value_counts()

# Resize Images

We will resize the images to 224x224, then create a single numpy array to hold the data.

In [None]:
def preprocess_image(image_path, desired_size=224):
    im = Image.open(image_path)
    im = im.resize((desired_size, )*2, resample=Image.BILINEAR)
    
    return im

In [None]:
N = train_df.shape[0]
x_train = np.empty((N, 224, 224, 3), dtype=np.float32)

for i, image_id in enumerate(tqdm(train_df['id_code'])):
    x_train[i, :, :, :] = preprocess_image(
        f'../input/aptos2019-blindness-detection/train_images/{image_id}.png'
    )

In [None]:
N = valid_df.shape[0]
x_val = np.empty((N, 224, 224, 3), dtype=np.float32)

for i, image_id in enumerate(tqdm(valid_df['id_code'])):
    x_val[i, :, :, :] = preprocess_image(
        f'../input/aptos2019-blindness-detection/train_images/{image_id}.png'
    )

In [None]:
N = test_df.shape[0]
x_test = np.empty((N, 224, 224, 3), dtype=np.float32)

for i, image_id in enumerate(tqdm(test_df['id_code'])):
    x_test[i, :, :, :] = preprocess_image(
        f'../input/aptos2019-blindness-detection/test_images/{image_id}.png'
    )

In [None]:
y_train = train_df['diagnosis']
y_val = valid_df['diagnosis']
print(x_train.shape)
print(y_train.shape)
print(x_val.shape)
print(y_val.shape)
print(x_test.shape)

# Model: MobilenetV2

In [None]:
model = load_model('../input/my-best/model_8.h5')
model.summary()

## Submit

In [None]:
train_pred = model.predict(x_train)
y_train_pred = train_pred
y_train_pred = np.clip(y_train_pred,0,4)
y_train_pred = y_train_pred.astype(int)

labels = ['0 - No DR', '1 - Mild', '2 - Moderate', '3 - Severe', '4 - Proliferative DR']
cnf_matrix = confusion_matrix(train_df['diagnosis'].astype('int'), y_train_pred)
df_cm = pd.DataFrame(cnf_matrix, index=labels, columns=labels)
plt.figure(figsize=(16, 7))
sns.heatmap(df_cm, annot=True, cmap="Blues")
plt.show()

In [None]:
kappa_val = cohen_kappa_score(
            train_df['diagnosis'].astype('int'),
            y_train_pred, 
            weights='quadratic'
        )
kappa_val

In [None]:
val_pred = model.predict(x_val)
y_val_pred = val_pred
y_val_pred = np.clip(y_val_pred,0,4)
y_val_pred = y_val_pred.astype(int)

labels = ['0 - No DR', '1 - Mild', '2 - Moderate', '3 - Severe', '4 - Proliferative DR']
cnf_matrix = confusion_matrix(valid_df['diagnosis'].astype('int'), y_val_pred)
df_cm = pd.DataFrame(cnf_matrix, index=labels, columns=labels)
plt.figure(figsize=(16, 7))
sns.heatmap(df_cm, annot=True, cmap="Blues")
plt.show()

In [None]:
kappa_val = cohen_kappa_score(
            valid_df['diagnosis'].astype('int'),
            y_val_pred, 
            weights='quadratic'
        )
kappa_val

In [None]:
from sklearn.metrics import classification_report
target_names = ['0 - No DR', '1 - Mild', '2 - Moderate', '3 - Severe', '4 - Proliferative DR']
print(classification_report(valid_df['diagnosis'].astype('int'), y_val_pred, target_names=target_names))

In [None]:
train_data = pd.DataFrame()
valid_data = pd.DataFrame()
train_data['trainLabel'] = train_df['diagnosis']
train_data['trainPred'] = train_pred
valid_data['validLabel'] = valid_df['diagnosis']
valid_data['validPred'] = val_pred

In [None]:
labels = [0,1,2,3,4]

# Iterate through the five airlines
for label in labels:
    # Subset to the airline
    subset = train_data[train_data['trainLabel'] == label]
    
    # Draw the density plot
    sns.distplot(subset['trainPred'], hist = False, kde = True,
                 kde_kws = {'linewidth': 3},
                 label = label)
    
# Plot formatting
plt.legend(prop={'size': 10}, title = 'Kelas')
plt.title('Density Plot with Multiple Classes')
plt.xlabel('Prediksi')
plt.ylabel('Density')

In [None]:
labels = [0,1,2,3,4]

# Iterate through the five airlines
for label in labels:
    # Subset to the airline
    subset = valid_data[valid_data['validLabel'] == label]
    
    # Draw the density plot
    sns.distplot(subset['validPred'], hist = False, kde = True,
                 kde_kws = {'linewidth': 3},
                 label = label)
    
# Plot formatting
plt.legend(prop={'size': 10}, title = 'Kelas')
plt.title('Density Plot with Multiple Classes')
plt.xlabel('Prediksi')
plt.ylabel('Density')

In [None]:
test_pred = model.predict(x_test)
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()
y_test_pred = gnb.fit(val_pred, valid_data['validLabel']).predict(test_pred)

test_df['diagnosis'] = y_test_pred
test_df.to_csv('submission.csv',index=False)

In [None]:
'''
pred = model.predict(x_test)
y_val_pred = pred
y_val_pred = np.clip(y_val_pred,0,4)
y_val_pred = y_val_pred.astype(int)

test_df['diagnosis'] = y_val_pred
test_df.to_csv('submission.csv',index=False)

In [None]:
import keras
layer_name = 'dense_2'
intermediate_layer_model = keras.Model(inputs=model.input,
                                       outputs=model.get_layer(layer_name).output)
intermediate_layer_model.summary()

In [None]:
y_train_pred = intermediate_layer_model.predict(x_train)
y_valid_pred = intermediate_layer_model.predict(x_val)
y_test_pred = intermediate_layer_model.predict(x_test)

In [None]:
from sklearn.svm import SVC
from sklearn.metrics import roc_curve, auc
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import label_binarize
from scipy import interp
from sklearn.metrics import roc_auc_score
from sklearn.multiclass import OneVsOneClassifier

In [None]:
y_val = label_binarize(y_val, classes=[0,1,2,3,4])

In [None]:
n_classes = 5

In [None]:
# classifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
clf = make_pipeline(StandardScaler(),SVC(probability=True))
clf = OneVsOneClassifier(clf)
y_score = clf.fit(y_train_pred, y_train).decision_function(y_valid_pred)

kappa_val = cohen_kappa_score(
            np.argmax(y_val,axis=1),
            np.argmax(y_score,axis=1), 
            weights='quadratic'
        )

print(kappa_val)

In [None]:
# Compute ROC curve and ROC area for each class
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(n_classes):
    fpr[i], tpr[i], _ = roc_curve(y_val[:, i], y_score[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

# Compute micro-average ROC curve and ROC area
fpr["micro"], tpr["micro"], _ = roc_curve(y_val.ravel(), y_score.ravel())
roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])

from itertools import cycle
lw = 2
all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)]))

# Then interpolate all ROC curves at this points
mean_tpr = np.zeros_like(all_fpr)
for i in range(n_classes):
    mean_tpr += interp(all_fpr, fpr[i], tpr[i])

# Finally average it and compute AUC
mean_tpr /= n_classes

fpr["macro"] = all_fpr
tpr["macro"] = mean_tpr
roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])

# Plot all ROC curves
plt.figure()
plt.plot(fpr["macro"], tpr["macro"],
         label='macro-average ROC curve (area = {0:0.2f})'
               ''.format(roc_auc["macro"]),
         color='navy', linestyle=':', linewidth=4)

colors = cycle(['aqua', 'darkorange', 'cornflowerblue'])
for i, color in zip(range(n_classes), colors):
    plt.plot(fpr[i], tpr[i], color=color, lw=lw,
             label='ROC curve of class {0} (area = {1:0.2f})'
             ''.format(i, roc_auc[i]))

plt.plot([0, 1], [0, 1], 'k--', lw=lw)
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Some extension of Receiver operating characteristic to multi-class')
plt.legend(loc="lower right")
plt.show()


In [None]:
labels = ['0 - No DR', '1 - Mild', '2 - Moderate', '3 - Severe', '4 - Proliferative DR']
cnf_matrix = confusion_matrix(np.argmax(y_val,axis=1), np.argmax(y_score,axis=1))
df_cm = pd.DataFrame(cnf_matrix, index=labels, columns=labels)
plt.figure(figsize=(16, 7))
sns.heatmap(df_cm, annot=True, cmap="Blues")
plt.show()

In [None]:
from sklearn.metrics import classification_report
target_names = ['0 - No DR', '1 - Mild', '2 - Moderate', '3 - Severe', '4 - Proliferative DR']
print(classification_report(np.argmax(y_val,axis=1), np.argmax(y_score,axis=1), target_names=target_names))

In [None]:
'''
y_score = clf.decision_function(y_test_pred)
test_df['diagnosis'] = np.argmax(y_score,axis=1)
test_df.to_csv('submission.csv',index=False)