In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)
!unzip "/content/drive/MyDrive/ACA_FDS/Datasets" -d "/content"

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

import tensorflow.keras
from tensorflow.keras.layers import Dense, Conv2D , MaxPool2D , Flatten , Dropout 
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import Accuracy, Precision, Recall, AUC
from tensorflow.keras.models import Sequential, load_model, save_model

from sklearn.metrics import classification_report,confusion_matrix

import tensorflow as tf

import cv2
import os

from tensorflow.keras.preprocessing import image
from sklearn.model_selection import train_test_split

import numpy as np
import pandas as pd

DIR = os.getcwd()
IMG_WIDTH = 28
IMG_HEIGHT = 28
IMG_PIXELS = IMG_HEIGHT*IMG_WIDTH
COLOR_MODE = 'grayscale'
PATH_DATASET_TRAIN = "Datasets/dataset_train/"
PATH_DATASET_TEST = 'Datasets/dataset_test1/'
metrics = ['accuracy','precision','recall','roc_auc']


In [None]:
def loadDataSet():
    def loadImages():
        # literalmente dar load das imagens todas para estes arrays
        # Listas por compreencao for life <3
        real = [image.load_img(PATH_DATASET_TRAIN+"real/"+file,target_size = (IMG_HEIGHT,IMG_WIDTH), color_mode=COLOR_MODE) for file in os.listdir(PATH_DATASET_TRAIN+"real/")]
        fake = [image.load_img(PATH_DATASET_TRAIN+"fake/"+file,target_size = (IMG_HEIGHT,IMG_WIDTH), color_mode=COLOR_MODE) for file in os.listdir(PATH_DATASET_TRAIN+"fake/")]
        return real,fake

    def imagesToArray(imgs):
        return np.array([image.img_to_array(img)[:,:,0] for img in imgs])
    
    real,fake = loadImages()
    real_array = imagesToArray(real)
    fake_array = imagesToArray(fake)
    return real_array,fake_array

real,fake = loadDataSet()

X = np.append(real,fake,axis=0)
y = np.append(np.ones((20000,1)),np.zeros((20000,1)))
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.4, random_state=13)
X_test, X_val, y_test, y_val = train_test_split(X_val, y_val, test_size=0.5, random_state=13)
print("Train -> {} [{}%] ".format(len(y_train),len(y_train)/40000*100))
print("Val   -> {} [{}%] ".format(len(y_val),len(y_val)/40000*100))
print("Test  -> {} [{}%] ".format(len(y_test),len(y_test)/40000*100))

X_train = X_train.reshape(-1, 28, 28,1)
X_val = X_val.reshape(-1, 28, 28,1)
X_train = X_train/255
X_val = X_val/255
X_test = X_test/255
X_test = X_test.reshape(-1, 28, 28,1)

In [None]:
print(X_train.shape)
print(y_train.shape)
print(y_train)

In [None]:
dropouts = [0.5, 0.6]
filters = [16,32]
n_epochs = 100
epochs = [i for i in range (n_epochs)]
df_cnns = pd.DataFrame(columns = metrics)
numCNN = 1

In [None]:
from sklearn.metrics import accuracy_score,precision_score,recall_score,roc_auc_score

def getMetrics(pred,real):
    precision = precision_score(pred,real)
    accuracy = accuracy_score(pred,real)
    recall = recall_score(pred,real)
    roc_auc = roc_auc_score(pred,real)

    return [accuracy, precision, recall, roc_auc]

In [None]:
for filt in filters:
  for dropout in dropouts:
    model_name = f'CNN{numCNN}'
    print(f'filter:{filt}\tDropout{dropout}')
    cnn_model = Sequential()
    cnn_model.add(Conv2D(filt, kernel_size=5, activation='relu',input_shape=(28,28,1)))
    cnn_model.add(Conv2D(filt, kernel_size=5, activation='relu'))
    cnn_model.add(Dropout(dropout))
    cnn_model.add(Flatten())
    cnn_model.add(Dense(128, activation='relu'))
    cnn_model.add(Dense(84, activation='relu'))
    cnn_model.add(Dense(1, activation='sigmoid'))
    cnn_model.compile(
                  optimizer="adam",
                  loss= tf.keras.losses.BinaryCrossentropy(from_logits=False),
                  metrics=['accuracy'])

    cnn_model_training = cnn_model.fit(
                  X_train,
                  y_train,
                  epochs=n_epochs,
                  validation_data=(X_val, y_val)
              )
  
    cnn_accuracy     = cnn_model_training.history['accuracy']
    cnn_val_accuracy = cnn_model_training.history['val_accuracy']
    loss             = cnn_model_training.history['loss'] 
    val_loss         = cnn_model_training.history['val_loss']

    pred = cnn_model.predict(X_val)
    pred = pred.astype(int)
    df_cnns[model_name] = getMetrics(pred,y_val)

    numCNN +=1 
    plt.plot(epochs, cnn_accuracy, 'b--', label='Training accuracy')
    plt.plot(epochs, cnn_val_accuracy, 'orange', label='Validation accuracy')
    plt.title('Training and validation accuracy')
    plt.savefig(f'{model_name}_accuracy.png')
    plt.legend()
    plt.show()

    plt.figure()
    plt.plot(epochs, loss, 'b--', label = 'Training loss')
    plt.plot(epochs, val_loss, 'orange', label = 'Validation loss')
    plt.title('Training and validation loss')
    plt.legend()
    plt.savefig(f'{model_name}_loss.png')
    plt.show()
    
df_cnns.to_csv('cnns.csv') 

In [None]:
df_cnns

In [None]:
df_test_results = pd.read_csv('test_results.csv')

In [None]:
filt = 32
dropout = 0.5
model_name = "CNN3"

cnn_model = Sequential()
cnn_model.add(Conv2D(filt, kernel_size=5, activation='relu',input_shape=(28,28,1)))
cnn_model.add(Conv2D(filt, kernel_size=5, activation='relu'))
cnn_model.add(Dropout(dropout))
cnn_model.add(Flatten())
cnn_model.add(Dense(128, activation='relu'))
cnn_model.add(Dense(84, activation='relu'))
cnn_model.add(Dense(1, activation='sigmoid'))
cnn_model.compile(
            optimizer="adam",
            loss= tf.keras.losses.BinaryCrossentropy(from_logits=False),
            metrics=['accuracy'])

cnn_model_training = cnn_model.fit(
            X_train,
            y_train,
            epochs=n_epochs,
            validation_data=(X_val, y_val)
        )

pred = cnn_model.predict(X_test)
pred = pred.astype(int)
cnn_metrics = getMetrics(pred,y_test)
cnn_metrics 

In [None]:
svm_metrics=[0.976750, 0.975113, 0.978064, 0.976761]
mlp_metrics=[0.97375, 0.966817, 0.980122, 0.973869]
voting_metrics=[0.95375, 0.946455, 0.959969, 0.953869]
rf_metrics=[0.910875, 0.943690, 0.884751, 0.912565]
final_data=[svm_metrics, mlp_metrics, voting_metrics, rf_metrics, cnn_metrics]
df_test_results=pd.DataFrame(data=final_data,columns=metrics,index=("SVM7","MLP15","Majority Weighted Voting","RF8","CNN3"))
df_test_results.to_csv("final_data.csv")

In [None]:
from tensorflow.keras.preprocessing import image
test_ds = tf.keras.preprocessing.image_dataset_from_directory(
    PATH_DATASET_TEST,
    image_size=(IMG_HEIGHT, IMG_WIDTH),
    color_mode=COLOR_MODE,
)
test_ds = test_ds.unbatch()

test_data = []
for img,label in test_ds.as_numpy_iterator():
    imgData = np.array([row.flatten() for row in img])
    test_data.append(imgData.flatten())
    
test_data = np.array(test_data)/255
test_data = test_data.reshape(-1, 28, 28,1)

In [None]:
df_submission = pd.DataFrame(columns = ['Id','Category'])
df_submission['Id'] = [i for i in range(test_data.shape[0])]
pred = cnn_model.predict(test_data)
pred = pred.astype(int)
df_submission['Category'] = pred

In [None]:
df_submission

In [None]:
df_submission.to_csv("Submission.csv", index=False)