## Create the new directory for the small train, test and validation dataset

In [10]:
import os
import shutil
import random

real_images_dir = '/kaggle/input/140k-real-and-fake-faces/real_vs_fake/real-vs-fake/train/real'
fake_images_dir = '/kaggle/input/140k-real-and-fake-faces/real_vs_fake/real-vs-fake/train/fake'

new_train_dir = '/kaggle/working/train_n'
new_real_dir = os.path.join(new_train_dir, 'real')
new_fake_dir = os.path.join(new_train_dir, 'fake')

os.makedirs(new_real_dir, exist_ok=True)
os.makedirs(new_fake_dir, exist_ok=True)

real_images = os.listdir(real_images_dir)
fake_images = os.listdir(fake_images_dir)

selected_real_images = random.sample(real_images, 1000)
selected_fake_images = random.sample(fake_images, 1000)

for img in selected_real_images:
    shutil.copy(os.path.join(real_images_dir, img), os.path.join(new_real_dir, img))

for img in selected_fake_images:
    shutil.copy(os.path.join(fake_images_dir, img), os.path.join(new_fake_dir, img))

print("Images copied successfully to 'train_new' directory!")

In [11]:
new_train_dir = '/kaggle/working/valid_n'
new_real_dir = os.path.join(new_train_dir, 'real')
new_fake_dir = os.path.join(new_train_dir, 'fake')

os.makedirs(new_real_dir, exist_ok=True)
os.makedirs(new_fake_dir, exist_ok=True)

real_images = os.listdir(real_images_dir)
fake_images = os.listdir(fake_images_dir)

selected_real_images = random.sample(real_images, 200)
selected_fake_images = random.sample(fake_images, 200)

for img in selected_real_images:
    shutil.copy(os.path.join(real_images_dir, img), os.path.join(new_real_dir, img))

for img in selected_fake_images:
    shutil.copy(os.path.join(fake_images_dir, img), os.path.join(new_fake_dir, img))

print("Images copied successfully to 'valid_new' directory!")

In [12]:
new_train_dir = '/kaggle/working/test_n'
new_real_dir = os.path.join(new_train_dir, 'real')
new_fake_dir = os.path.join(new_train_dir, 'fake')

# Create the new directories
os.makedirs(new_real_dir, exist_ok=True)
os.makedirs(new_fake_dir, exist_ok=True)

real_images = os.listdir(real_images_dir)
fake_images = os.listdir(fake_images_dir)

selected_real_images = random.sample(real_images, 200)
selected_fake_images = random.sample(fake_images, 200)

for img in selected_real_images:
    shutil.copy(os.path.join(real_images_dir, img), os.path.join(new_real_dir, img))

for img in selected_fake_images:
    shutil.copy(os.path.join(fake_images_dir, img), os.path.join(new_fake_dir, img))

print("Images copied successfully to 'test_n' directory!")

### Data Augmentation 

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.efficientnet import preprocess_input

prep_train = ImageDataGenerator(preprocessing_function = preprocess_input,
                                   rotation_range=30, 
                                   width_shift_range=0.2, 
                                   zoom_range=0.2,
                                   horizontal_flip=True,)
prep_val = ImageDataGenerator(preprocessing_function = preprocess_input,
                                   rotation_range=30,
                                   width_shift_range=0.2, 
                                   height_shift_range=0.2, 
                                   zoom_range=0.2,
                                   horizontal_flip=True,)

In [5]:
import tensorflow as tf
from tensorflow.keras import layers

# train_dataset = tf.keras.utils.image_dataset_from_directory(
#     '/kaggle/input/140k-real-and-fake-faces/real_vs_fake/real-vs-fake/train',
#     labels='inferred',
#     label_mode='binary',
#     class_names=["fake","real"],
#     color_mode='rgb',
#     batch_size=32,
#     image_size=(224, 224),
#     shuffle=True
# )
train_dataset = prep_train.flow_from_directory(
  '/kaggle/working/train_n',
  target_size=(224,224),
  batch_size=32,
  class_mode='binary', 
  shuffle=True
)


In [9]:
# val_dataset = tf.keras.utils.image_dataset_from_directory(
#     '/kaggle/input/140k-real-and-fake-faces/real_vs_fake/real-vs-fake/valid',
#     labels='inferred',
#     label_mode='binary',
#     class_names=["fake", "real"],
#     color_mode='rgb',
#     batch_size=32,
#     image_size=(224, 224),
#     shuffle=False,
#     seed=99,
# )

val_dataset = prep_val.flow_from_directory(
    '/kaggle/working/valid_n',
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary', 
    shuffle=True,      
)


### Sample Images plot

In [4]:
import matplotlib.pyplot as plt
import numpy as np

images, labels = next(iter(train_dataset))

plt.figure(figsize=(5, 5))

for i in range(15):
    ax = plt.subplot(3, 5, i + 1)
    plt.imshow(images[i].astype("uint8"))  # Convert to uint8 for display
    plt.title("Fake" if labels[i] == 0 else "Real") 
    plt.axis("off")  

plt.tight_layout()
plt.show()

In [32]:
print(f"Number of training samples: {train_dataset.samples}")

Number of training samples: 2000


### VGG19 Fine-Tuned 20 layers 

In [3]:
import tensorflow as tf
from tensorflow.keras.applications import VGG19
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

# Load the VGG19 model without the top classification layers
base_model = VGG19(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Freeze the base model layers
for layer in base_model.layers:
    layer.trainable = False

# Add custom layers on top of the VGG19 base
x = base_model.output
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dense(1024, activation='relu')(x)  # 1st dense layer
x = layers.Dropout(0.5)(x)  # 1st Dropout for regularization
x = layers.Dense(512, activation='relu')(x)  # 2nd  dense layer
x = layers.Dropout(0.3)(x)  # 3rd dropout layer
x = layers.Dense(256, activation='relu')(x)  # 3rd  dense layer
x = layers.Dropout(0.2)(x)  # 3rd dropout layer
predictions = layers.Dense(1, activation='sigmoid')(x)  # Output layer for classification

ft_model = Model(inputs=base_model.input, outputs=predictions)
for layer in base_model.layers[-20:]:
    layer.trainable = True

ft_model.compile(optimizer=Adam(learning_rate=1e-4), loss='binary_crossentropy', metrics=['accuracy'])

# Summary of the model (optional)
# model.summary()


In [6]:
import matplotlib.pyplot as plt
import pandas as pd
from tensorflow.keras.callbacks import EarlyStopping

early_stopping = EarlyStopping(
    monitor='val_accuracy',  # or another metric you want to monitor
    patience=5,  # number of epochs to wait before stopping if no improvement
    restore_best_weights=True  # restore the best weights from the training
)

history = ft_model.fit(
    train_dataset,
    epochs=25,  # set a large number of epochs as the upper bound
    validation_data=val_dataset,
    callbacks=[early_stopping]
)

pd.DataFrame(history.history).plot(
figsize=(8, 5), xlim=[0, 20], ylim=[0, 1], grid=True, xlabel="Epoch",
style=["r--", "r--.", "b-", "b-*"])
plt.show()


In [8]:
prep_test = ImageDataGenerator(
    preprocessing_function=preprocess_input, 
)
test_dataset = prep_val.flow_from_directory(
    '/kaggle/working/test_n',
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary',  
    shuffle=False  
)

In [2]:
test_loss, test_accuracy = ft_model.evaluate(test_dataset)

### VGG19 Base Model

In [42]:
import tensorflow as tf
from tensorflow.keras.applications import VGG19
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

# Load the VGG19 model without the top classification layers
base_model = VGG19(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

for layer in base_model.layers:
    layer.trainable = True

x = base_model.output
x = GlobalAveragePooling2D()(x)
predictions = Dense(1, activation='sigmoid')(x)  

vggbase_model = Model(inputs=base_model.input, outputs=predictions)

vggbase_model.compile(optimizer=Adam(learning_rate=1e-4), loss='binary_crossentropy', metrics=['accuracy'])

# Summary of the model (optional)
# model.summary()


In [1]:
import matplotlib.pyplot as plt
import pandas as pd
from tensorflow.keras.callbacks import EarlyStopping

early_stopping = EarlyStopping(
    monitor='val_accuracy',  
    patience=5,  
    restore_best_weights=True  
)

history = vggbase_model.fit(
    train_dataset,
    epochs=25,  
    validation_data=val_dataset,
    callbacks=[early_stopping]
)

pd.DataFrame(vggbase_model.history).plot(
figsize=(8, 5), xlim=[0, 20], ylim=[0, 1], grid=True, xlabel="Epoch",
style=["r--", "r--.", "b-", "b-*"])
plt.show()

In [None]:

test_loss, test_accuracy = vggbase_model.evaluate(test_dataset)

### Confusion Matrix

In [None]:
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np


y_true = test_dataset.classes  # True labels
y_pred = vggbase_model.predict(test_dataset)  # Model predictions

# Get predicted class labels (0 or 1)
y_pred_classes = np.argmax(y_pred, axis=1) 

# confusion matrix
cm = confusion_matrix(y_true, y_pred_classes)

# Plotting the confusion matrix
plt.figure(figsize=(6, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Real', 'Fake'], yticklabels=['Real', 'Fake'])
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()
