In [1]:
# Import necessary libraries
import os
import numpy as np
import pandas as pd
from PIL import Image
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Dropout
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.regularizers import l2

In [16]:
# Function to load and normalize an image
def load_and_normalize_image(file_path):
    image = Image.open(file_path)
    image_array = np.array(image)
    image_array = image_array / 255.0  # Normalize to [0, 1]
    return image_array

# Load the CSV file from your local directory
csv_file_path = 'C:\\Users\\jbane\\Documents\\cda_fall2023_project\\data\\output\\combined_data_mtx.csv'  # Replace with your actual path to the CSV file
combined_data_mtx = pd.read_csv(csv_file_path)

# Correct the file paths in the dataframe, assuming the 'file_path' column contains paths like 'data\\real_and_fake_face\\...'
combined_data_mtx['file_path'] = combined_data_mtx['file_path'].str.replace(r'data\\', '', regex=True)

# Base directory for images
base_dir = 'C:\\Users\\jbane\\Documents\\cda_fall2023_project\\data\\'  # Replace with your actual base directory

# Load and normalize images using the corrected paths
combined_data_mtx['img_array'] = combined_data_mtx['file_path'].apply(
    lambda x: load_and_normalize_image(os.path.join(base_dir, x.replace('\\', os.sep))))

# Prepare labels
y = combined_data_mtx['label'].apply(lambda x: 1 if x == 'real' else 0).values

# Split the dataset into training and testing sets
X = np.array(combined_data_mtx['img_array'].tolist())  # Convert list of arrays to a numpy array
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# CNN Model Architecture
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(X_train.shape[1:])),
    MaxPooling2D(2, 2),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=5, batch_size=32, validation_data=(X_test, y_test))

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {loss}")
print(f"Test Accuracy: {accuracy}")



Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test Loss: 1.1423999071121216
Test Accuracy: 0.5721271634101868


In [None]:
# Adjusted CNN Model Architecture with Dropout and Regularization
model2 = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(X_train.shape[1:])),
    MaxPooling2D(2, 2),
    Dropout(0.25),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Flatten(),
    Dense(128, activation='relu', kernel_regularizer=l2(0.01)),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

# Early Stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=3, verbose=1, mode='min')

# Compile the model with a potentially smaller learning rate
model2.compile(optimizer=Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=['accuracy'])

# Train the model with Early Stopping
history = model2.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test), callbacks=[early_stopping])

In [None]:
import matplotlib.pyplot as plt

# Plot training & validation accuracy values
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()

# Plot training & validation loss values
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()
