# Cross validation

## Load the libraries

Using the GPU: Importing a GPU can significantly enhance the performance of model training.

In [None]:
import tensorflow as tf
physical_devices = tf.config.experimental.list_physical_devices('GPU')
print("Number of GPUs Available: ", len(physical_devices))
tf.config.experimental.set_memory_growth(physical_devices[0], True)

In [None]:
from preprocess import *
from model import *
from utils import *
import random
import os
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from keras.utils.np_utils import to_categorical
import tensorflow as tf
from sklearn.metrics import confusion_matrix
import seaborn as sns


np.random.seed(2)
from keras.models import Sequential, load_model
from keras.layers import Dense, Flatten, Conv2D, MaxPool2D, Dropout, MaxPooling2D, UpSampling2D, Input, Concatenate, Conv2DTranspose, BatchNormalization,Reshape
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.preprocessing.image import ImageDataGenerator


from PIL import Image, ImageChops, ImageEnhance,ImageFilter, ImageOps
from io import BytesIO
import cv2
from scipy.fftpack import dct
from scipy import ndimage
from scipy import fftpack
import optuna

from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold

In [None]:
real_images_path = '../data/dataset/data/CASIA2/Au/*.*'
fake_images_path = '../data/dataset/data/CASIA2/Tp/*.*'
image_size = (128, 128)

In [None]:
def build_model(image_size=(128, 128)):

    filters =  32
    dropout_rate = 0.25


    model = Sequential()
    model.add(Conv2D(filters=filters, kernel_size=(3, 3), padding='same', activation='relu', input_shape=(image_size[0], image_size[1], 3)))
    model.add(Conv2D(filters=filters, kernel_size=(3, 3), padding='same', activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(dropout_rate))

    model.add(Conv2D(filters=2*filters, kernel_size=(3, 3), padding='same', activation='relu'))
    model.add(Conv2D(filters=2*filters, kernel_size=(3, 3), padding='same', activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(dropout_rate))

    model.add(Conv2D(filters=4*filters, kernel_size=(3, 3), padding='same', activation='relu'))
    model.add(Conv2D(filters=4*filters, kernel_size=(3, 3), padding='same', activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(dropout_rate))

    model.add(Flatten())
    model.add(Dense(512, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(2, activation='softmax'))

    model.summary()

    return model

def train_model(model, train_generator, val_generator, batch_size, epochs):
    init_lr = 1e-4
   
    opt = Adam(learning_rate= init_lr, decay=init_lr / epochs)

    model.compile(optimizer= opt, loss='categorical_crossentropy', metrics=['accuracy'])

    early_stopping = EarlyStopping(monitor='val_accuracy', min_delta=0, patience=5, verbose=0, mode='auto')
    checkpoint = ModelCheckpoint('../models/model.h5', monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
    
    model.fit(
        train_generator,
        steps_per_epoch=len(train_generator),
        epochs=epochs,
        validation_data=val_generator,
        validation_steps=len(val_generator),
        callbacks=[early_stopping, checkpoint]
    )
    return model

In [None]:
if __name__ == '__main__':
    batch_size = 32
    epochs = 50
    X = []
    y = []

    print("Cargando datos de imágenes reales...")
    for file_path in glob.glob(real_images_path)[0:100]:
        X.append(preparete_image_ela(file_path, image_size))
        y.append(0)

    random.shuffle(X)

    print("Cargando datos de imágenes falsificadas...")
    for file_path in glob.glob(fake_images_path)[0:100]:
        X.append(preparete_image_ela(file_path, image_size))
        y.append(1)

    X = np.array(X).reshape(-1, image_size[0], image_size[1], 3)
    y = np.array(y)
    y = to_categorical(y, num_classes=2)  # Convertir las etiquetas a one-hot encoding

    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=5)
    X_val, X_test, y_val, y_test = train_test_split(X_val, y_val, test_size=0.5, random_state=5)

    train_generator = ImageDataGenerator().flow(X_train, y_train, batch_size=batch_size)
    val_generator = ImageDataGenerator().flow(X_val, y_val, batch_size=batch_size)

    # Aplicar validación cruzada
    kfold = KFold(n_splits=5, shuffle=True, random_state=7)
    cvscores = []
    metrics = []
    for train, test in kfold.split(X, y):
        model = build_model(image_size)
        model = train_model(model, train_generator, val_generator, batch_size, epochs)
        scores = model.evaluate(X[test], y[test], verbose=0)
        metrics.append(model.metrics_names[1])
        cvscores.append(scores[1] * 100)


    print("%.2f%% (+/- %.2f%%)" % (np.mean(cvscores), np.std(cvscores)))
    for metric in metrics:
        print(metric)



    # graficar en base al score de cada validación cruzada  poner las dos lineas, cvscores y metrics
    plt.plot(cvscores)
    plt.title('Model accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Validation number')
    plt.show()
    

    for i in range(0, len(cvscores)):
        print("Validación cruzada: ", i, " Accuracy: ", cvscores[i])
    
    score = model.evaluate(X_test, y_test)
    print('Test loss:', score[0])
    print('Test accuracy:', score[1])