# CNN Classifiers

### Import Libraries

In [None]:
import os
import random
import numpy as np
import pandas as pd
import tensorflow as tf
import cv2
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Resizing, Conv2D, MaxPooling2D, Dense, Flatten, Rescaling, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import image_dataset_from_directory
from keras.preprocessing.image import ImageDataGenerator
from skimage import exposure
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

#### Model training done with CPU. If CUDA and cuDNN are set up, a GPU can also be used to speed up model training. 

In [None]:
print(f'TensorFlow Version: {tf.__version__}')
print(tf.config.experimental.list_physical_devices())

### Global Variables

In [None]:
data_directory: str = 'data'
batch_size: int = 32
image_height: int = 256
image_width: int = 256
random_state: int = 111

# True balances dataset to 10k of each class, false uses 20k generated images and 10k real images
balance_dataset_TF: bool = True

### Creating Training and Test Sets

In [None]:
#Defining Helper Functions
def plot_images(directory: list):
    files = os.listdir(directory)
    random_images = random.choices(files, k=9)

    fig = plt.figure()

    for ii in range(9):
        image_path = os.path.join(directory, random_images[ii])  # File path
        current_image = cv2.imread(image_path)
        
        # Add subplot
        ax = fig.add_subplot(int(np.sqrt(9)), int(np.sqrt(9)), ii + 1)
        
        # Plot image
        ax.imshow(current_image)
        ax.axis('off')
        
        # Display filename below the image
        ax.set_title(random_images[ii], fontsize=6, pad=1, wrap = True)

    plt.show()

In [None]:
# Define the training paths
train_dir = os.path.join('Y:/BACKUP Real_AI_SD_LD_Dataset/train')
all_directories = os.listdir(train_dir)

train_real = []
train_ai = []

for directory in all_directories:
    if directory.startswith('AI_'):
        train_ai.append(os.path.join(train_dir, directory))
    else:
        train_real.append(os.path.join(train_dir, directory))

# Define the test paths
test_dir = os.path.join('Y:/BACKUP Real_AI_SD_LD_Dataset/test')
all_directories = os.listdir(test_dir)

test_real = []
test_ai = []

for directory in all_directories:
    if directory.startswith('AI_'):
        test_ai.append(os.path.join(test_dir, directory))
    else:
        test_real.append(os.path.join(test_dir, directory))


#### Displaying Real Images

In [None]:
plot_images(train_real[3])

#### Displaying Artificial Images

In [None]:
plot_images(train_ai[3])

#### Create Data Frames with Directory Paths and Labels for Training and Testing

##### Training Set

In [None]:
filepaths = []
labels = []

for directory in train_real:
    for file in os.listdir(directory):
        filepath = os.path.join(directory, file)
        filepaths.append(filepath)
        labels.append("real")

for directory in train_ai:
    for file in os.listdir(directory):
        filepath = os.path.join(directory, file)
        filepaths.append(filepath)
        labels.append("AI")

train_data = pd.DataFrame(columns=['filepath', 'label'])

data = pd.DataFrame({'filepath': filepaths, 'label': labels})

train_data = pd.concat([train_data, data], ignore_index=True)

##### Testing Set

In [None]:
filepaths = []
labels = []

for directory in test_real:
    for file in os.listdir(directory):
        filepath = os.path.join(directory, file)
        filepaths.append(filepath)
        labels.append("real")

for directory in test_ai:
    for file in os.listdir(directory):
        filepath = os.path.join(directory, file)
        filepaths.append(filepath)
        labels.append("AI")

test_data = pd.DataFrame(columns=['filepath', 'label'])

data = pd.DataFrame({'filepath': filepaths, 'label': labels})

test_data = pd.concat([test_data, data], ignore_index=True)

#### Drop Random AI-Images to Balance the Dataset

In [None]:
np.random.seed(random_state)

num_to_drop = 55015 #Gives the balanced dataset
ai_indices = train_data[train_data['label'] == 'AI'].index
indices_to_drop = np.random.choice(ai_indices, num_to_drop, replace=False)

train_data = train_data.drop(indices_to_drop)
train_data.reset_index(drop=True, inplace=True)

#### Create the Training Set and Test Sets

In [None]:
#Training
training_generator = ImageDataGenerator(rescale=1./255,   # Normalize pixel value
                                       # rotation_range=7, # Apply rotations to the image
                                       # horizontal_flip=True, # Flip image horizontally
                                       # zoom_range=0.2  # Increase and decrease zoom by 0.2x
                                       )
train_dataset = training_generator.flow_from_dataframe(dataframe=train_data,
                                                       x_col='filepath',
                                                       y_col='label',
                                                       target_size=(image_height, image_width),
                                                       batch_size=64,
                                                       class_mode='categorical',  
                                                       shuffle=True)
#Testing
test_generator = ImageDataGenerator(rescale=1./255 # Normalize pixel value
                                    # rotation_range=7, # Apply rotations to the image
                                    # horizontal_flip=True, # Flip image horizontally
                                    # zoom_range=0.2  # Increase and decrease zoom by 0.2x
                                    )
test_dataset = test_generator.flow_from_dataframe(dataframe=test_data,
                                                  x_col='filepath',
                                                  y_col='label',
                                                  target_size = (image_height, image_width),
                                                  batch_size = 1,    # 1 image at a time to evaluate the NN
                                                  class_mode = 'categorical',
                                                  shuffle = False)   # to associate the prediction with expected output

print(train_dataset.class_indices)
print(test_dataset.class_indices)

### Create CNN Model Using the Keras Sequential API

In [None]:
# Custom architecture for baseline testing
# Images can be any size

model = Sequential([
  Conv2D(filters = 64, kernel_size = 3, input_shape = (256,256,3), activation = 'relu'),
  MaxPooling2D(),
  Conv2D(filters = 64, kernel_size = 3, activation = 'relu'),
  MaxPooling2D(),
  Flatten(),
  Dense(units = 64, activation = 'relu'),
  Dense(units = 2, activation = 'softmax')
])

model.compile(optimizer = 'Adam', 
              loss = tf.losses.BinaryCrossentropy(),
              metrics = ['accuracy'])

model.summary()

In [None]:
# Mimicking the LiNet CNN Architecture
# Expects 233x233 images

LiNet = Sequential([
    # Layer 1
    Conv2D(32, (7,7), padding = 'same', activation = 'relu', input_shape = (image_height, image_width, 1)),
    
    # Layer 2
    Conv2D(64, (7,7), strides = 2, padding = 'same', activation = 'relu'),
    BatchNormalization(),
    MaxPooling2D(3, strides = 2, padding = 'same'),

    # Layer 3
    Conv2D(48, (5, 5), padding = 'same', activation = 'relu'),
    BatchNormalization(),
    MaxPooling2D((3, 3), strides = 2, padding = 'same'),

    # Layer 4
    Conv2D(64, (3, 3), padding = 'same', activation = 'relu'),
    BatchNormalization(),
    MaxPooling2D((3, 3), strides = 2, padding = 'same'),

    # Fully Connected Layers
    Flatten(),
    Dense(4096, activation = 'relu'),
    Dropout(0.5),
    Dense(4096, activation = 'relu'),
    Dropout(0.5),
    Dense(1, activation = 'softmax' )
])

LiNet.compile(optimizer = 'adam', 
              loss = tf.losses.BinaryCrossentropy(), 
              metrics = ['accuracy'])

LiNet.summary()

In [None]:
# Mimicking the AlexNet CNN Architecture
# Expects 227x227 images

AlexNet = Sequential([
    Rescaling(1./255, input_shape = (image_height, image_width, 1)),

    # Layer 1
    Conv2D(96, (11, 11), strides = (4, 4), activation = 'relu', input_shape = (image_height, image_width, 1)), # 1 for Grayscale
    MaxPooling2D((3, 3), strides = (2, 2)),
    BatchNormalization(),

    # Layer 2
    Conv2D(256, (5, 5), padding = 'same', activation = 'relu'),
    MaxPooling2D((3, 3), strides = (2, 2)),
    BatchNormalization(),

    # Layer 3
    Conv2D(384, (3, 3), padding = 'same', activation = 'relu'),
    Conv2D(384, (3, 3), padding = 'same', activation = 'relu'),
    Conv2D(256, (3, 3), padding = 'same', activation = 'relu'),
    MaxPooling2D((3, 3), strides = (2, 2)),
    BatchNormalization(),
    Flatten(),

    # Fully connected layers
    Dense(4096, activation = 'relu'),
    Dropout(0.5),
    Dense(4096, activation = 'relu'),
    Dropout(0.5),
    Dense(1, activation = 'softmax')
])

AlexNet.compile(optimizer = 'adam',
              loss = tf.losses.BinaryCrossentropy(),
              metrics = ['accuracy'])

AlexNet.summary()

In [None]:
# Using a pretrained model from TensorFlow called ResNet50
# Expects 224x224 images

from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
from tensorflow.keras.layers import GlobalAveragePooling2D

resnet_base = ResNet50(input_shape = (image_height, image_width, 3),
                       include_top = False,
                       weights = 'imagenet',
                       classes = 2) # 1 for Grayscale

# Prevent the pretrained layers from being overwritten
resnet_base.trainable = False

# Establish the ResNet model with custom Dense layer
resnet_model = Sequential([
    resnet_base,
    GlobalAveragePooling2D(),
    Dense(1, activation = 'softmax') # Only need to train this layer
    ])

resnet_model.compile(optimizer = 'adam',
              loss = tf.losses.BinaryCrossentropy(),
              metrics = ['accuracy'])

# Images need to be in a specific format for the ResNet model
resnet_input_images = preprocess_input(images)

X_train, X_test, y_train, y_test = train_test_split(resnet_input_images, labels, test_size = 0.3, random_state = random_state)

resnet_model.summary()

In [None]:
# This cell starts the training process and kicks off a TensorBoard instance for performance visualizations.

# Baseline, simple model
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir='logs')
history = model.fit(train_dataset, epochs=10, validation_data=test_dataset, callbacks=[tensorboard_callback])

In [None]:
fig = plt.figure()
plt.plot(history.history['loss'], color='teal', label='loss')
plt.plot(history.history['val_loss'], color='orange', label='val_loss')
fig.suptitle('Loss', fontsize=20)
plt.legend(loc="upper left")
plt.show()

In [None]:
fig = plt.figure()
plt.plot(history.history['accuracy'], color='teal', label='accuracy')
plt.plot(history.history['val_accuracy'], color='orange', label='val_accuracy')
fig.suptitle('Accuracy', fontsize=20)
plt.legend(loc="upper left")
plt.show()

In [None]:
from tensorflow.keras.metrics import Precision, Recall, BinaryAccuracy

pre = Precision()
re = Recall()
acc = BinaryAccuracy()

for batch in X_test.as_numpy_iterator(): 
    X, y = batch
    yhat = model.predict(X)
    pre.update_state(y, yhat)
    re.update_state(y, yhat)
    acc.update_state(y, yhat)


print(pre.result(), re.result(), acc.result())

In [None]:
img = cv2.imread('test-fake5.jpg')
plt.imshow(img)
plt.show()

In [None]:
resize = tf.image.resize(img, (256,256))
yhat = model.predict(np.expand_dims(resize/255, 0))
if yhat > 0.5: 
    print(f'Predicted class is Real: {yhat}')
else:
    print(f'Predicted class is Fake: {yhat}')