## Import modules

In [None]:
RANDOM_SEED=2019

from functools import reduce


import pandas as pd
import numpy as np
np.random.seed(RANDOM_SEED)

from skimage.transform import resize
from scipy.stats.mstats import gmean

import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.framework import dtypes
from tensorflow import set_random_seed

from keras import backend as K
from keras import metrics
from keras.utils import *
from keras.models import *
from keras.layers import *
from keras.layers.convolutional import *
from keras.layers.merge import *
from keras.layers.normalization import *
from keras.regularizers import *
from keras.optimizers import *
from keras.losses import *
from keras.callbacks import *
from keras.activations import *
from keras.applications import *
from keras.applications import mobilenetv2
from keras.preprocessing.image import ImageDataGenerator
from keras.legacy import interfaces

from numba import jit

import cv2

from io import BytesIO
import warnings
warnings.filterwarnings("ignore")

import IPython
from IPython.display import display
from IPython.display import clear_output
from IPython.core.display import display, HTML 
from IPython.display import Image, display_png

import PIL

from glob import glob
from tqdm import tqdm_notebook
from multiprocessing import Process

import os
import sys
sys.path.append("../input")

import json
import math
import string
import collections
import time

from six.moves import xrange

from efficientnet import *

## Define L2 Normalized Optimizer

In [None]:
from keras import optimizers

def l2_norm(grad):
    norm = K.sqrt(K.sum(K.square(grad))) + K.epsilon()
    return norm

class OptimizerWrapper(optimizers.Optimizer):

    def __init__(self, optimizer):     
        
        self.optimizer = optimizers.get(optimizer)

        # patch the `get_gradients` call
        self._optimizer_get_gradients = self.optimizer.get_gradients

    def get_gradients(self, loss, params):      
        grads = self._optimizer_get_gradients(loss, params)
        return grads

    @interfaces.legacy_get_updates_support
    def get_updates(self, loss, params):
        # monkey patch `get_gradients`
        self.optimizer.get_gradients = self.get_gradients

        # get the updates
        self.optimizer.get_updates(loss, params)

        # undo monkey patch
        self.optimizer.get_gradients = self._optimizer_get_gradients

        return self.updates

    def set_weights(self, weights):       
        self.optimizer.set_weights(weights)

    def get_weights(self):        
        return self.optimizer.get_weights()

    def get_config(self):       
        # properties of NormalizedOptimizer
        config = {'optimizer_name': self.optimizer.__class__.__name__.lower()}

        # optimizer config
        optimizer_config = {'optimizer_config': self.optimizer.get_config()}
        return dict(list(optimizer_config.items()) + list(config.items()))

    @property
    def weights(self):
        return self.optimizer.weights

    @property
    def updates(self):
        return self.optimizer.updates

    @classmethod
    def from_config(cls, config):
        raise NotImplementedError

    @classmethod
    def set_normalization_function(cls, name, func):
        global _NORMS
        _NORMS[name] = func

    @classmethod
    def get_normalization_functions(cls):        
        global _NORMS
        return sorted(list(_NORMS.keys()))


class NormalizedOptimizer(OptimizerWrapper):

    def __init__(self, optimizer, normalization='l2'):       
        super(NormalizedOptimizer, self).__init__(optimizer)

        if normalization not in _NORMS:
            raise ValueError('`normalization` must be one of %s.\n' 
                             'Provided was "%s".' % (str(sorted(list(_NORMS.keys()))), normalization))

        self.normalization = normalization
        self.normalization_fn = _NORMS[normalization]
        self.lr = K.variable(1e-3, name='lr')

    def get_gradients(self, loss, params):       
        grads = super(NormalizedOptimizer, self).get_gradients(loss, params)
        grads = [grad / self.normalization_fn(grad) for grad in grads]
        return grads

    def get_config(self):        
        # properties of NormalizedOptimizer
        config = {'normalization': self.normalization}

        # optimizer config
        base_config = super(NormalizedOptimizer, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

    @classmethod
    def from_config(cls, config):       
        optimizer_config = {'class_name': config['optimizer_name'],
                            'config': config['optimizer_config']}

        optimizer = optimizers.get(optimizer_config)
        normalization = config['normalization']

        return cls(optimizer, normalization=normalization)


_NORMS = {
    'l2': l2_norm,
}

# register this optimizer to the global custom objects when it is imported
get_custom_objects().update({'NormalizedOptimizer': NormalizedOptimizer})

In [None]:
class HorizontalDisplay:
    def __init__(self, *args):
        self.args = args

    def _repr_html_(self):
        template = '<div style="float: left; padding: 10px;">{0}</div>'
        return "\n".join(template.format(arg._repr_html_())
                         for arg in self.args)

In [None]:
def showarray(a, fmt='png'):
    a = np.uint8(a)
    f = BytesIO()
    PIL.Image.fromarray(a).save(f, fmt)
    IPython.display.display(IPython.display.Image(data=f.getvalue()))

In [None]:
!ls ../input/all-train-csv

## Load data list

In [None]:
NUM_CLASSES = 5

tr = pd.read_csv(
    '../input/all-train-csv/train.csv',
    dtype={
        'id_code': str, 
        'diagnosis': str
    }
)

tr15 = pd.read_csv(
    '../input/all-train-csv/trainLabels.csv',
    dtype={
        'image': str, 
        'level': str
    }
)

ts15 = pd.read_csv(
    '../input/all-train-csv/retinopathy_solution.csv',
    dtype={
        'image': str, 
        'level': str
    }
)

HorizontalDisplay(tr.head(), tr15.head(), ts15.head(), tr15.groupby("level").count(), ts15.groupby("level").count())

## Delete duplication

In [None]:
md5s = pd.read_csv("../input/aptosmd5/strMd5.csv")
tr=tr[tr.id_code.isin(md5s[1 == md5s.strMd5_train_count].id_code.values)]

In [None]:
tr.id_code = '../input/aptos-1519-hard-crop-480x480/cropped_resized_train_19_480/' + tr.id_code + '.jpg'
tr15.image = '../input/aptos-1519-hard-crop-480x480/cropped_resized_train_15_480/' + tr15.image + '.jpg'
ts15.image = '../input/aptos-1519-hard-crop-480x480/cropped_resized_test_15_480/' + ts15.image + '.jpg'
HorizontalDisplay(tr.head(), tr15.head(), ts15.head())

## Show indivisual class image

In [None]:
# Load image
img = cv2.imread("../input/aptos-1519-hard-crop-480x480/cropped_resized_train_19_480/002c21358ce6.jpg")
showarray(img)

In [None]:
img = cv2.imread("../input/aptos-1519-hard-crop-480x480/cropped_resized_train_19_480/18b06f56ab27.jpg")
#showarray(img)

In [None]:
img = cv2.imread("../input/aptos-1519-hard-crop-480x480/cropped_resized_train_19_480/ffcf7b45f213.jpg")
###showarray(img)

In [None]:
img = cv2.imread("../input/aptos-1519-hard-crop-480x480/cropped_resized_train_19_480/03c85870824c.jpg")
#showarray(img)

In [None]:
img = cv2.imread("../input/aptos-1519-hard-crop-480x480/cropped_resized_train_19_480/eb1d37b71fd1.jpg")
#showarray(img)

## Undersampling

In [None]:
undersampled = []

nsmpl = tr15.groupby("level").count().image.min()

for i in range(5):
    undersampled.append(tr15[tr15.level == str(i)].sample(nsmpl, replace=True))
    
train = pd.concat(undersampled).sample(frac=1)
HorizontalDisplay(tr15.groupby("level").count(), train.groupby("level").count())

In [None]:
undersampled = []

nsmpl = ts15.groupby("level").count().image.min()

for i in range(5):
    undersampled.append(ts15[ts15.level == str(i)].sample(nsmpl, replace=True))
    
train = pd.concat(undersampled+[train]).sample(frac=1)
HorizontalDisplay(ts15.groupby("level").count(), train.groupby("level").count())

## Define model

In [None]:
epochs = 40
batch_size = 8
steps_per_epoch=100
embedding_size = 128
input_image_shape = (480, 480, 3)
#input_image_shape = (28, 28, 1)

### Quadruplet Loss

In [None]:
def quadruplet_loss(alpha=1., beta=.5, batch_size=128):
    
    def _quadruplet_loss(y_true, y_pred):
        del y_true    
    
   #     stronger_push = ())
     #   weaker_push = tf.reduce_sum(()) 
        
        quadruplet = \
            tf.reduce_sum(tf.maximum(tf.square(y_pred[:, 0]) - (1. - tf.square(y_pred[:, 1])) + alpha, 0)) \
            + tf.reduce_sum(tf.maximum(tf.square(y_pred[:, 0]) - (1. -  tf.square(y_pred[:, 2])) + beta, 0))
#            tf.reduce_sum(tf.maximum(tf.square(y_pred[:, 0, 1]) - tf.square(y_pred[:, 1, 0]) + alpha, 0)) \
#            + tf.reduce_sum(tf.maximum(tf.square(y_pred[:, 0, 0]) - tf.square(y_pred[:, 1, 0]) + alpha, 0)) \
#            + tf.reduce_sum(tf.maximum(tf.square(y_pred[:, 0, 0]) - tf.square(y_pred[:, 2, 0]) + alpha, 0)) \
#            + tf.reduce_sum(tf.maximum(tf.square(y_pred[:, 0, 1]) - tf.square(y_pred[:, 2, 0]) + beta, 0))
       # quadruplet = tf.square(stronger_push / alpha) + tf.square(weaker_push / beta)
                                                                             
        return quadruplet
    
    return _quadruplet_loss

In [None]:
np.concatenate([[1]*5, [0]*5]).reshape(-1)

### EfficientNetB0 backend

In [None]:
#_efficientnet = EfficientNetB0(
#    include_top=False, 
#    weights=None, 
#    input_tensor=None, 
#    input_shape=input_image_shape, 
#    pooling='max', 
#    classes=5
#)
#_efficientnet.load_weights("../input/efficientnet-keras-weights-b0b5/efficientnet-b0_imagenet_1000_notop.h5")

In [None]:
#_efficientnet.layers[-1]

In [None]:
#_efficientnet = Model(inputs=_efficientnet.inputs, output=_efficientnet.layers[-1].output)
#_efficientnet.layers.pop(0)

In [None]:
#_out = concatenate([GlobalMaxPooling2D(_efficientnet.layers[-3]), GlobalAveragePooling2D(_efficientnet.layers[-3])])
#_efficientnet = Model(inputs=_efficientnet.inputs, output=_out)
#_efficientnet.layers.pop(0)

In [None]:
#for i in range(len(_efficientnet.layers)-50):
#    _efficientnet.layers[i].trainable = False

In [None]:
def augmentation_perimage(img):
    x = tf.image.random_flip_left_right(img)
    x = tf.random_crop(x, [400,400,3])
    return x

def standard_augmentation(inputs):
    random_flip = tf.map_fn(augmentation_perimage, inputs)
    resize = tf.image.resize_bicubic(random_flip, [480, 480])
    random_rotate = tf.contrib.image.rotate(
        resize,
        np.random.randint(low=0, high=360, size=batch_size),
        interpolation='BILINEAR'
    )
    return random_rotate

## PeleeNet backend

In [None]:
import keras
import keras.layers as layers

def conv_bn_relu(input_tensor, ch, kernel, padding="same", strides=1, weight_decay=5e-4):
    x = layers.Conv2D(ch, kernel, padding=padding, strides=strides,
                      kernel_regularizer=keras.regularizers.l2(weight_decay))(input_tensor)
    x = layers.BatchNormalization()(x)
    return layers.Activation("relu")(x)

def stem_block(input_tensor):
    x = conv_bn_relu(input_tensor, 32, 3, strides=2)
    branch1 = conv_bn_relu(x, 16, 1)
    branch1 = conv_bn_relu(branch1, 32, 3, strides=2)
    branch2 = layers.MaxPool2D(2)(x)
    x = layers.Concatenate()([branch1, branch2])
    return conv_bn_relu(x, 32, 1)

def dense_block(input_tensor, num_layers, growth_rate, bottleneck_width):
    x = input_tensor
    growth_rate = int(growth_rate / 2)

    for i in range(num_layers):
        inter_channel = int(growth_rate*bottleneck_width/4) * 4
        branch1 = conv_bn_relu(x, inter_channel, 1)
        branch1 = conv_bn_relu(branch1, growth_rate, 3)

        branch2 = conv_bn_relu(x, inter_channel, 1)
        branch2 = conv_bn_relu(branch2, growth_rate, 3)
        branch2 = conv_bn_relu(branch2, growth_rate, 3)
        x = layers.Concatenate()([x, branch1, branch2])
    return x

def transition_layer(input_tensor, k, use_pooling=True):
    x = conv_bn_relu(input_tensor, k, 1)
    if use_pooling:
        return layers.AveragePooling2D(2)(x)
    else:
        return x

def PeleeNet(input_shape=(224,224,3), use_stem_block=True, n_classes=1000, interrupt=False, is_train=False):
    n_dense_layers = [3,4,8,6]
    bottleneck_width = [1,2,4,4]
    out_layers = [128,256,512,704]
    growth_rate = 32

    input = layers.Input(input_shape)
    if is_train:
        x = Lambda(standard_augmentation, output_shape=input_image_shape)(input)
    else:
        x = input
    x = stem_block(x) if use_stem_block else x
    for i in range(4):
        x = dense_block(x, n_dense_layers[i], growth_rate, bottleneck_width[i])
        use_pooling = i < 3
        x = transition_layer(x, out_layers[i], use_pooling=use_pooling)
        
    if interrupt:
        return keras.models.Model(input, x)
    
    x = layers.GlobalMaxPooling2D()(x)
    x = layers.Dense(n_classes, activation="softmax")(x)
    return keras.models.Model(input, x)

In [None]:
_peleenet = PeleeNet(input_image_shape, n_classes=NUM_CLASSES, interrupt=True, is_train=True)
_peleenet.layers.pop(0)
_peleenet = Model(inputs=_peleenet.inputs, output=_peleenet.layers[-1].output)

In [None]:
def _swish(x, beta=1.0):
    return x * K.sigmoid(beta * x)

In [None]:
class swish(Layer):
    
    def __init__(self, beta=1.0, **kwargs):
        super(swish, self).__init__(**kwargs)
        self.supports_masking = True
        self.beta = K.cast_to_floatx(beta)

    def call(self, inputs):
        
        return _swish(inputs, beta=self.beta)

    def get_config(self):
        config = {'beta': float(self.beta)}
        base_config = super(swish, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

    def compute_output_shape(self, input_shape):
        return input_shape

### Quadruplet Deep Network (ReLU1)

In [None]:
_in0 = (Input(input_image_shape))
_in1 = (Input(input_image_shape))
_in2 = (Input(input_image_shape))
_in3 = (Input(input_image_shape))

embedding = Sequential(name='embedding')
embedding.add(Flatten())
embedding.add(Dense(128, activation='elu', name='fc_0'))
embedding.add(Dense(64, name='embedding_space'))

ref = _peleenet(_in0)
pos = _peleenet(_in1)
neg = _peleenet(_in2)
neg2 = _peleenet(_in3)

ref = embedding(ref)
pos = embedding(pos)
neg = embedding(neg)
neg2 = embedding(neg2)

positive_pair = concatenate([ref,pos])
negetive_pair = concatenate([ref,neg])
negetive_pair_2 = concatenate([neg2,neg])

metric = Sequential(name='metric')
metric.add(Dense(1, name='diff_or_same'))
metric.add(ReLU(max_value=1.))

positive_pair_distance = metric(positive_pair)
negetive_pair_distance = metric(negetive_pair)
negetive_pair_2_distance = metric(negetive_pair_2)

_out0 = Reshape((-1, 1))(positive_pair_distance)
_out1 = Reshape((-1, 1))(negetive_pair_distance)
_out2 = Reshape((-1, 1))(negetive_pair_2_distance)

probs = concatenate([_out0, _out1, _out2], axis=1, name='probs')

model = Model(inputs=[_in0, _in1, _in2, _in3], outputs=probs)#[logits_ref, logits_pos, logits_neg, logits_neg2, 

In [None]:
# https://www.pyimagesearch.com/2018/06/04/keras-multiple-outputs-and-multiple-losses/
# losses = {
# 	"category_output": "categorical_crossentropy",
# 	"color_output": "categorical_crossentropy",
# }
# lossWeights = {"category_output": 1.0, "color_output": 1.0}
#  
# # initialize the optimizer and compile the model
# print("[INFO] compiling model...")
# opt = Adam(lr=INIT_LR, decay=INIT_LR / EPOCHS)
# model.compile(optimizer=opt, loss=losses, loss_weights=lossWeights,
# metrics=["accuracy"])

In [None]:
def quadruplet_loss_with_binary_cross_entropy(alpha=.2, beta=.1, batch_size=128):
    # bs , 3 , 1
    def _quadruplet_loss(y_true, y_pred):
        del y_true
        
        quadruplet = \
            tf.maximum(tf.square(1. - y_pred[:, 0]) - tf.square(1. - y_pred[:, 1]) + alpha, 0) \
            + tf.maximum(tf.square(1. - y_pred[:, 0]) - tf.square(1. - y_pred[:, 2]) + beta, 0)
                                                                             
        return \
            quadruplet \
            + binary_crossentropy(
                tf.convert_to_tensor(np.concatenate([np.ones((batch_size//2, 1)), np.zeros((batch_size//2, 1))]), dtype=tf.float32), 
                tf.reshape(y_pred[:batch_size//2, :2], (batch_size, 1))
            )
    
    return _quadruplet_loss

In [None]:
def quadruplet_loss(alpha=.5, beta=.2, batch_size=128):
    # bs , 3 , 1
    def _quadruplet_loss(y_true, y_pred):
        del y_true
        
        quadruplet = \
            tf.maximum(tf.square(1. - y_pred[:, 0]) - tf.square(1. - y_pred[:, 1]) + alpha, 0) \
            + tf.maximum(tf.square(1. - y_pred[:, 0]) - tf.square(1. - y_pred[:, 2]) + beta, 0)
                                                                             
        return quadruplet
    
    return _quadruplet_loss

In [None]:
sgd = SGD(1e-3, momentum=0.9, nesterov=True)
sgd = NormalizedOptimizer(sgd, normalization='l2')

losses = {
  #  "logits": "kullback_leibler_divergence", #lambda y_true, y_pred: mean_squared_error(y_true, K.clip(y_pred, 0., 4.)), #tf.nn.softmax_cross_entropy_with_logits_v2,
    "probs": quadruplet_loss_with_binary_cross_entropy(alpha=.2, beta=.1,batch_size=batch_size),
}
lossWeights = {"logits": .5, "probs": 1.}

model.compile(
    optimizer= SGD(1e-5, momentum=0.9, nesterov=True),
    loss=quadruplet_loss(alpha=.5, beta=.2,batch_size=batch_size),
)

In [None]:
#sgd = SGD(1e-5, momentum=0.9, nesterov=True)
#sgd = NormalizedOptimizer(sgd, normalization='l2')
#
#model.compile(
#    optimizer=sgd,
#    loss=quadruplet_loss(alpha=.8, beta=.6,batch_size=batch_size)
#)

In [None]:
model.summary()

In [None]:
#RAdamW(
#        lr=1e-6, beta_1=0.9, beta_2=0.999,
#        epsilon=None, decay=0., weight_decay=0.025, amsgrad=True,
#        total_steps=steps_per_epoch*epochs, warmup_proportion=0.1, min_lr=1e-7, 
#        batch_size=batch_size, samples_per_epoch=epochs*batch_size,
#        epochs=epochs, eta=1.
#    )numpy.random.random()

## Define data generator

In [None]:
@jit()
def preprocess(im, erase_rate=0.2, src_width=480, src_height=480):
    
    # subtract_median_bg_image
    k = np.max(im.shape)//20*2+1
    bg = cv2.medianBlur(im, k)
    subtract_median_bg_image = cv2.addWeighted (im, 4, bg, -4, 128)
    
    # random erase
    mask_width, mask_height = np.random.randint(0, src_width * erase_rate), np.random.randint(0, src_height * erase_rate)
    offset_x = np.random.randint(0, src_width-mask_width+1)
    offset_y = np.random.randint(0, src_height-mask_height+1)          
    
    mask = np.random.random(size=(mask_width, mask_height, 3))
    img[offset_x:offset_x+mask_width, offset_y:offset_y+mask_height, :] = mask
    
    return img        

In [None]:
def quadra_generator(df, file_name_column, label_column, batch_size, input_shape):

    labels = df[label_column].unique()

    file_names_by_class = [df[df[label_column] == label][file_name_column].tolist() for label in labels]

    labels = [int(label) for label in labels]

    while True:
        
        x = np.empty((4, batch_size, *input_shape))
        y = np.zeros((4, batch_size, NUM_CLASSES))

        for i in range(batch_size):
            
            ref_pos_idx, neg_idx, neg2_idx = np.random.choice(labels, size=3, replace=False, p=None)

            ref_file_name, pos_file_name = np.random.choice(file_names_by_class[ref_pos_idx], size=2, replace=False)

            img = cv2.imread(ref_file_name)
            x[0, i] = preprocess(img )#
            img = cv2.imread(pos_file_name)
            x[1, i] = preprocess(img ) #
            img =  cv2.imread(np.random.choice(file_names_by_class[neg_idx]))
            x[2, i] = preprocess(img ) #
            img = cv2.imread(np.random.choice(file_names_by_class[neg2_idx]))
            x[3, i] = preprocess(img)#  - img.mean()
            
            y[0, i, ref_pos_idx] = 1.
            y[1, i, ref_pos_idx] = 1.
            y[2, i, neg_idx] = 1.
            y[3, i, neg2_idx] = 1.


        #x[:, :, :, :, 0] = 0. # ignore red channel
        
        x = x / 255.

        yield [x[0], x[1], x[2], x[3]],  np.empty(batch_size)#y[0], y[1], y[2], y[3],

## Trainning

In [None]:
history = model.fit_generator(
    quadra_generator(train, "image", "level", batch_size, input_image_shape),
    steps_per_epoch=steps_per_epoch,
    epochs=epochs,
    validation_data=quadra_generator(tr, "id_code", "diagnosis", batch_size, input_image_shape),
    validation_steps=16,
#    callbacks=[checkpoint, lr_decay]
)

In [None]:
!ls

In [None]:
model.save_weights("quadruplet.h5")
######################################################################

_peleenet = PeleeNet(input_image_shape, n_classes=NUM_CLASSES, interrupt=True, is_train=False)
_peleenet.layers.pop(0)
_peleenet = Model(inputs=_peleenet.inputs, output=_peleenet.layers[-1].output)

ref = _peleenet(_in0)
ref = embedding(ref)
model = Model(inputs=_in0, outputs=ref)

In [None]:
#_in = Input(input_image_shape)
#
#ref = _efficientnet(_in)
#
#embedding = Sequential(name='embedding')
##embedding.add(Flatten())
#embedding.add(Dense(512, name='embedding_layer_1'))
#embedding.add(Dropout(rate=0.1))
#embedding.add(Dense(64, name='embedding_layer_2'))
#_out = embedding(ref)
#
#model = Model(inputs=_in, outputs=_out)
#model.compile(
#    optimizer="adam", 
#    loss=quadruplet_loss(alpha=1., beta=0.5, batch_size=batch_size)
#)

In [None]:
model.load_weights("quadruplet.h5", by_name=True)

In [None]:
tr15 = pd.read_csv(
    '../input/all-train-csv/trainLabels.csv',
    dtype={
        'image': str, 
        'level': str
    }
)
tr15.columns

In [None]:
tr19 = pd.read_csv(
    '../input/all-train-csv/train.csv',
    dtype={
        'id_code': str, 
        'diagnosis': str
    }
)

In [None]:
undersampled = []

nsmpl = tr15.groupby("level").count().image.min()

for i in range(5):
    undersampled.append(tr15[tr15.level == str(i)].sample(nsmpl, replace=True))
    
train = pd.concat(undersampled).sample(frac=1)

In [None]:
train.image = train.image + '.jpg'

## Prediction

In [None]:
test_datagen=ImageDataGenerator(
    rescale=1./255 #,
#    shear_range=0.,
#    zoom_range=0.2,
#    horizontal_flip=False,
#    vertical_flip = True,
#    rotation_range=360
)

test_generator=test_datagen.flow_from_dataframe(
    dataframe=train,
    directory="../input/aptos-1519-hard-crop-480x480/cropped_resized_train_15_480",
    x_col='image',
    y_col=None,
    target_size=(480, 480),
    batch_size=1,
    class_mode=None,
    shuffle=False
)

In [None]:
test_generator.reset()

pred=model.predict_generator(
   test_generator,
   steps=test_generator.n//test_generator.batch_size,
   verbose=1
)

pred.shape

In [None]:
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.svm import SVC

import matplotlib.cm as cm

import umap

In [None]:
embedding = umap.UMAP().fit_transform(pred)

plt.scatter(embedding[:,0],embedding[:,1],c=train.level.astype(int),cmap=cm.tab10)
plt.colorbar()

In [None]:
X_reduced = TSNE(n_components=2).fit_transform(pred)

plt.scatter(X_reduced[:, 0], X_reduced[:, 1], c=train.level.astype(int),cmap=cm.tab10)
plt.colorbar()

In [None]:
X_reduced = PCA(n_components=2).fit_transform(pred)

plt.scatter(X_reduced[:, 0], X_reduced[:, 1], c=train.level.astype(int),cmap=cm.tab10)
plt.colorbar()

In [None]:
svm = SVC(gamma='auto')
svm.fit(pred, train.level)
#svm.score(pred,train.level)

In [None]:
tr19 = pd.read_csv(
    '../input/all-train-csv/train.csv',
    dtype={
        'id_code': str, 
        'diagnosis': str
    }
)

tr19.id_code = tr19.id_code + '.jpg'

undersampled = []

nsmpl = tr19.groupby("diagnosis").count().id_code.min()

for i in range(5):
    undersampled.append(tr19[tr19.diagnosis == str(i)].sample(nsmpl, replace=True))
    
train = pd.concat(undersampled).sample(frac=1)

In [None]:
test_datagen=ImageDataGenerator(
    rescale=1./255 #,
#    shear_range=0.,
#    zoom_range=0.2,
#    horizontal_flip=False,
#    vertical_flip = True,
#    rotation_range=360
)

test_generator=test_datagen.flow_from_dataframe(
    dataframe=train,
    directory="../input/aptos-1519-hard-crop-480x480/cropped_resized_train_19_480",
    x_col='id_code',
    y_col=None,
    target_size=(480, 480),
    batch_size=1,
    class_mode=None,
    shuffle=False
)

In [None]:
test_generator.reset()

pred=model.predict_generator(
   test_generator,
   steps=test_generator.n//test_generator.batch_size,
   verbose=1
)

pred.shape