In [None]:
from keras.callbacks import ReduceLROnPlateau, EarlyStopping
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
from keras.models import load_model
from keras.applications.xception import Xception
import time
import seaborn as sns
from sklearn.metrics import roc_curve
from sklearn.metrics import auc
from sklearn.model_selection import *
from keras.preprocessing.image import ImageDataGenerator 
from keras.applications.vgg16 import VGG16
from keras.layers import Conv2D, MaxPool2D, Dense, Flatten, Dropout, BatchNormalization, Input
from keras.models import Sequential, Model
import numpy as np
from sklearn.metrics import cohen_kappa_score
from sklearn.metrics import confusion_matrix
import keras.backend as K
import tensorflow as tf
from keras.optimizers import SGD
import matplotlib.pyplot as plt

from numpy.random import seed
seed(1)
import pandas as pd
import openslide

In [None]:
import pathlib

def create_df(data_dir):
    data=[]
    labels=[]
    directorio = pathlib.Path(data_dir)
    for fichero in directorio.iterdir():
        data.append(data_dir+fichero.name)

        lbl=train[(train['image_id'] == fichero.name[:32])]
        labels.append(lbl['gleason_score'].unique()[0])

    df=pd.DataFrame(data)
    df.columns=['images']
    df['gleason_score']=labels
    return df
    

In [None]:
train=pd.read_csv('/kaggle/input/prostate-cancer-grade-assessment/train.csv')
train.head()

# Eliminacion de elementos mal etiquetados

In [None]:
train.drop([7273],inplace=True)

train['gleason_score'] = train['gleason_score'].apply(
    lambda x: "0+0" if x=="negative" else x)

In [None]:
train['gleason_score'].value_counts()

# Dataset de entrenamiento creado

**mypandataset** contiene las imagenes de entrenamiento, validacion y prueba habiendo balanceado las imagenes del **prostate-cancer-grade-assessment** dataset teniendo un minimo de 43 imagenes por clase. El numero de clases en total es de 10 por cada categoria de Gleason.

In [None]:
train_dir = '../input/mypandadataset/train/'
train_df = create_df(train_dir)
train_df.head()

In [None]:
train_df['gleason_score'].value_counts()

# Dataset de validacion creado

In [None]:
val_dir = '../input/mypandadataset/val/'
val_df = create_df(val_dir)
val_df.head()

In [None]:
val_df['gleason_score'].value_counts()

# Creacion de los generadores de imagen para los datos de entrenamiento y validacion

In [None]:
val_datagen=train_datagen = ImageDataGenerator(rescale=1./255,
                                              horizontal_flip=True,
                                              vertical_flip = True)
train_generator = train_datagen.flow_from_dataframe(
    train_df,
    x_col='images',
    y_col='gleason_score',
    target_size=(224, 224),
    batch_size=8,
    shuffle = True,
    class_mode='categorical')

validation_generator = val_datagen.flow_from_dataframe(
    val_df,
    x_col='images',
    y_col='gleason_score',
    target_size=(224, 224),
    batch_size=8,
    class_mode='categorical')

In [None]:
def vgg16_model( num_classes=None):

    #model = VGG16(weights='/kaggle/input/keras-pretrained-models/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5',include_top=False, input_shape=(224, 224, 3))
    model = VGG16(weights='imagenet',include_top=False, input_shape=(224, 224, 3))    
    #x=Dropout(0.2)(model.output)
    x=Flatten()(model.output)
    #x =Dense(200, activation = 'relu')(x)
    output=Dense(num_classes,activation='softmax')(x)
    model=Model(model.input,output)
    return model

vgg_conv=vgg16_model(10)

vgg_conv.summary()

In [None]:
from keras.applications.vgg19 import VGG19
def vgg19_model(num_classes = None):
    #vgg19_weights = '../input/vgg19/vgg19_weights_tf_dim_ordering_tf_kernels.h5'
    model = VGG19(weights='imagenet',include_top=False, input_shape=(224, 224, 3))
    #model = VGG19(weights='/input/vgg19/vgg19_weights_tf_dim_ordering_tf_kernels.h5', include_top=False, input_shape=(224, 224, 3))
    x=Dropout(0.3)(model.output)
    x=Flatten()(x)
    x =Dense(32, activation = 'relu')(x)
    x =Dropout(0.2)(x)
    output=Dense(num_classes,activation='softmax')(x)
    model=Model(model.input,output)
    return model

#vgg19_conv = vgg19_model(10)
#vgg19_conv.summary()

In [None]:
opt = SGD(lr= 1e-4)
vgg_conv.compile(loss='categorical_crossentropy',optimizer=opt ,metrics=['accuracy'])# lr 1e-4
#vgg19_conv.compile(loss='categorical_crossentropy',optimizer=opt ,metrics=['accuracy'])

In [None]:
epochs = 60
batch_size=8#16
nb_train_steps = train_df.shape[0]//batch_size
nb_val_steps=val_df.shape[0]//batch_size
#nb_train_steps = 10#128
#nb_val_steps = 5#64
print("Number of training and validation steps: {} and {}".format(nb_train_steps,nb_val_steps))

# Entrenamiento de la red VGG16 a 60 epocas con un batchsize de 8

In [None]:
vgg_hist = vgg_conv.fit_generator(
    train_generator,
    steps_per_epoch=nb_train_steps,
    epochs=epochs,
    validation_data=validation_generator,
    validation_steps=nb_val_steps
)

'''vgg_hist = vgg19_conv.fit_generator(
    train_generator,
    steps_per_epoch=nb_train_steps,
    epochs=epochs,
    validation_data=validation_generator,
    validation_steps=nb_val_steps
)'''

In [None]:
vgg_baseline = vgg_conv.save('VGG16_Baseline.h5')
#vgg_baseline = vgg19_conv.save('VGG19_Baseline.h5')

In [None]:
vgg_baseline_weights = vgg_conv.save_weights('vgg16_baseline_weights.h5')
#vgg_baseline_weights = vgg19_conv.save_weights('vgg19_baseline_weights.h5')

In [None]:
def plotTraining(hist, epochs, typeData):
    
    if typeData=="loss":
        plt.figure(1,figsize=(10,5))
        yc=hist.history['loss']
        xc=range(epochs)
        plt.ylabel('Loss', fontsize=24)
        plt.plot(xc,yc,'-r',label='Loss Training')
    if typeData=="accuracy":
        plt.figure(2,figsize=(10,5))
        yc=hist.history['accuracy']
        for i in range(0, len(yc)):
            yc[i]=100*yc[i]
        xc=range(epochs)
        plt.ylabel('Accuracy (%)', fontsize=24)
        plt.plot(xc,yc,'-r',label='Accuracy Training')
    if typeData=="val_loss":
        plt.figure(1,figsize=(10,5))
        yc=hist.history['val_loss']
        xc=range(epochs)
        plt.ylabel('Loss', fontsize=24)
        plt.plot(xc,yc,'--b',label='Loss Validate')
    if typeData=="val_accuracy":
        plt.figure(2,figsize=(10,5))
        yc=hist.history['val_accuracy']
        for i in range(0, len(yc)):
            yc[i]=100*yc[i]
        xc=range(epochs)
        plt.ylabel('Accuracy (%)', fontsize=24)
        plt.plot(xc,yc,'--b',label='Training Validate')
        

    plt.rc('xtick',labelsize=24)
    plt.rc('ytick',labelsize=24)
    plt.rc('legend', fontsize=18) 
    plt.legend()
    plt.xlabel('Number of Epochs',fontsize=24)
    plt.grid(True)

# Graficas de entrenamiento y validacion para Loss y Accuracy

In [None]:
plotTraining(vgg_hist,epochs,"loss")
plotTraining(vgg_hist,epochs,"accuracy")
plotTraining(vgg_hist,epochs,"val_loss")
plotTraining(vgg_hist,epochs,"val_accuracy")

# Dataset de prueba creado

In [None]:
test_dir = '../input/mypandadataset/test/'
test_df = create_df(test_dir)
test_df.head()

# Matriz de confusion, pression, recall y f1-score

In [None]:
from sklearn.metrics import confusion_matrix, f1_score, roc_curve, precision_score, recall_score, accuracy_score, roc_auc_score
from sklearn import metrics
from mlxtend.plotting import plot_confusion_matrix
from keras.models import load_model
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

width_shape = 224
height_shape = 224

names = ['gleason 00','gleason 33','gleason 34','gleason 43','gleason 44', 'gleason 35', 'gleason 53', 'gleason 45', 'gleason 54', 'gleason 55']

test_datagen = ImageDataGenerator()
test_generator = test_datagen.flow_from_dataframe(
    test_df,
    x_col='images',
    y_col='gleason_score',
    target_size=(224, 224),
    batch_size=8,
    shuffle = True,
    class_mode='categorical')


custom_Model= load_model("VGG16_Baseline.h5")
#custom_Model= load_model("VGG19_Baseline.h5")

predictions = custom_Model.predict_generator(generator=test_generator)
#predictions = custom_Model.predict_generator(generator=validation_generator)

y_pred = np.argmax(predictions, axis=1)
y_real = test_generator.classes
#y_real = validation_generator.classes



matc=confusion_matrix(y_real, y_pred)

plot_confusion_matrix(conf_mat=matc, figsize=(9,9), class_names = names, show_normed=False)
plt.tight_layout()

print(metrics.classification_report(y_real,y_pred, digits = 4))