In [None]:
import os
import shutil


import openslide

import skimage.io

import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import PIL
from IPython.display import Image, display
from collections import Counter

import cv2
import skimage.io
from tqdm.notebook import tqdm

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MultiLabelBinarizer

from keras.optimizers import Adam
from keras.preprocessing.image import ImageDataGenerator

In [None]:
import tensorflow as tf
tf.test.is_gpu_available()

In [None]:
# Location of the training images
dataDir = '../input/panda-resized-train-data-512x512/train_images/train_images/'

# Location of training labels
trainLabels = pd.read_csv('/kaggle/input/prostate-cancer-grade-assessment/train.csv').set_index('image_id')
testDF = pd.read_csv('/kaggle/input/prostate-cancer-grade-assessment/test.csv').set_index('image_id')

# Output cropped images
#cropDir = '/kaggle/working/train_images/'

inputShape = (224, 224, 3)
epochs = 30

In [None]:
# How many train objects should be included in one batch (higher = faster but less accurate)
# Take care that the batch size is smaller than the amount of total images analyzed
batchSize = 16
INIT_LR = 0.0001

In [None]:
trainDatagen = ImageDataGenerator(rotation_range=30, width_shift_range=0.1,height_shift_range=0.1, validation_split = 0.20,
                                  zoom_range=0.2, horizontal_flip=True, fill_mode="nearest")

In [None]:
trainDF = pd.DataFrame(list(zip(trainLabels.index + ".png", trainLabels.isup_grade.astype(str))), 
               columns =['x_col', 'y_col']) 

# Uncomment the following if the assumption needs to be re-checked
# for x in trainDF.x_col:
#     assert x in os.listdir(dataDir)

In [None]:
trainGenerator = trainDatagen.flow_from_dataframe(
    trainDF, x_col="x_col", y_col="y_col",
    directory=dataDir,  # this is the target directory
    batch_size=batchSize,
    class_mode = "categorical",
    subset="training",
    target_size=(inputShape[0], inputShape[1]))

In [None]:
valGenerator = trainDatagen.flow_from_dataframe(
    trainDF, x_col="x_col", y_col="y_col",
    directory=dataDir,  # this is the target directory
    batch_size=batchSize,
    class_mode = "categorical",
    subset="validation",
    target_size=(inputShape[0], inputShape[1]))

In [None]:
from keras.applications import Xception
from keras.models import Sequential
from keras.layers import Dense, Flatten, GlobalAveragePooling2D

In [None]:
numClasses = len(set(trainLabels.isup_grade))
weightFile = "/kaggle/input/keras-pretrained-models/xception_weights_tf_dim_ordering_tf_kernels_notop.h5"

myModel = Sequential()
myModel.add(Xception(include_top=False, pooling='avg', weights=weightFile))
myModel.add(Dense(6, activation='softmax'))

#myModel.add(activation('softmax'))
# Say not to train first layer (Xception) model. It is already trained
myModel.layers[0].trainable = False

In [None]:
myModel.summary()

In [None]:
# Optimaztion function
opt = Adam(lr=INIT_LR, decay=INIT_LR / epochs)

myModel.compile(loss="binary_crossentropy",
              optimizer=opt,
              metrics=["accuracy"])

In [None]:
H = myModel.fit_generator(trainGenerator,
                        steps_per_epoch=128,
                        epochs=epochs, 
                        validation_data=valGenerator,
                        validation_steps=128,
                        verbose=1)

In [None]:
H_df = pd.DataFrame(H.history)
H_df[['loss', 'val_loss']].plot()
H_df[['accuracy', 'val_accuracy']].plot()

In [None]:
# Save model
myModel.save('/kaggle/working/Xception_'+str(epochs)+'.model')

In [None]:
predictions = myModel.predict(trainGenerator)

In [None]:
predictions

In [None]:
pred

In [None]:
trainDF