In [None]:
!pip install -q kaggle

from google.colab import files
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Prompt user to upload kaggle.json file
uploaded = files.upload()

# Check if file is uploaded
if 'kaggle.json' not in uploaded.keys():
  print("Error: Upload the kaggle.json file for authentication!")
else:
  # Move the uploaded file to the expected location
  !mkdir -p ~/.kaggle
  !cp {uploaded['kaggle.json']['name']} ~/.kaggle/kaggle.json

  # Set permissions for the kaggle.json file
  !chmod 600 ~/.kaggle/kaggle.json

  # Print confirmation message
  print("kaggle.json uploaded successfully!")

In [None]:
# Download data from kaggle
print("Downloading data from Kaggle...")
!kaggle datasets download -d tusharpadhy/deepfake-dataset

In [None]:
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score
from keras import backend as K

In [None]:
# Define constants
IMAGE_SIZE = (128, 128)
BATCH_SIZE = 4
NUM_CLASSES = 2
EPOCHS = 20

In [None]:
# Define directories
train_dir = '../dataset_2/train'
valid_dir = '../dataset_2/valid'
test_dir = '../dataset_2/test'

In [None]:
# Use ImageDataGenerator for data augmentation and normalization
train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)

valid_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255)
test_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255)

In [None]:
# Flow images from directories
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

valid_generator = valid_datagen.flow_from_directory(
    valid_dir,
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

Found 240002 images belonging to 2 classes.
Found 59428 images belonging to 2 classes.
Found 30905 images belonging to 2 classes.


In [None]:
# Define the model
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dense(NUM_CLASSES, activation='softmax')
])

In [None]:
def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

In [None]:
# Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy',f1_m,precision_m, recall_m])

In [None]:
# Train the model
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // BATCH_SIZE,
    epochs=20,
    validation_data=valid_generator,
    validation_steps=valid_generator.samples // BATCH_SIZE
)

In [None]:
# Evaluate the model
test_loss, test_acc, test_precision, test_recall, test_f1 = model.evaluate(test_generator, steps=test_generator.samples // BATCH_SIZE)
print('Test accuracy:', test_acc)

In [None]:
# Extract metrics from history object
train_acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
train_loss = history.history['loss']
val_loss = history.history['val_loss']
train_precision = history.history['precision_m']
val_precision = history.history['val_precision_m']
train_recall = history.history['recall_m']
val_recall = history.history['val_recall_m']
train_f1 = history.history['f1_m']
val_f1 = history.history['val_f1_m']

In [None]:
# Plot accuracy graph
plt.plot(history.epoch, train_acc, label='Train Accuracy')
plt.plot(history.epoch, val_acc, label='Validation Accuracy')
plt.plot([1], test_acc, label='Test Accuracy', marker='o')  # Test accuracy as a single point
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

In [None]:
# Plot precision, recall, and f1-score graphs (similar to accuracy graph)
plt.plot(history.epoch, train_precision, label='Train Precision')
plt.plot(history.epoch, val_precision, label='Validation Precision')
plt.plot([1], test_precision, label='Test Precision', marker='o')
plt.xlabel('Epochs')
plt.ylabel('Precision')
plt.legend()
plt.show()

In [None]:
# Plot recall graph
plt.plot(history.epoch, train_recall, label='Train Recall')
plt.plot(history.epoch, val_recall, label='Validation Recall')
plt.plot([1], test_recall, label='Test Recall', marker='o')
plt.xlabel('Epochs')
plt.ylabel('Recall')
plt.legend()
plt.show()

In [None]:
# Plot f1-score graph
plt.plot(history.epoch, train_f1, label='Train F1-score')
plt.plot(history.epoch, val_f1, label='Validation F1-score')
plt.plot([1], test_f1, label='Test F1-score', marker='o')
plt.xlabel('Epochs')
plt.ylabel('F1-score')
plt.legend()
plt.show()

In [None]:
# Get model predictions
y_train_pred = model.predict(train_generator)
y_val_pred = model.predict(valid_generator)
y_test_pred = model.predict(test_generator)

# Convert predictions to class labels (assuming binary classification)
y_train_pred_classes = [int(pred >= 0.5) for pred in y_train_pred]
y_val_pred_classes = [int(pred >= 0.5) for pred in y_val_pred]
y_test_pred_classes = [int(pred >= 0.5) for pred in y_test_pred]

# Get true class labels
y_train_true = train_generator.classes
y_val_true = valid_generator.classes
y_test_true = test_generator.classes

# Create confusion matrices
cm_train = confusion_matrix(y_train_true, y_train_pred_classes)
cm_val = confusion_matrix(y_val_true, y_val_pred_classes)
cm_test = confusion_matrix(y_test_true, y_test_pred_classes)

# Print confusion matrices
print("Train Confusion Matrix:\n", cm_train)
print("Validation Confusion Matrix:\n", cm_val)
print("Test Confusion Matrix:\n", cm_test)