In [1]:
from PIL import Image, ImageChops, ImageEnhance
import os

def generate_ela_image(image_path, save_path, quality=90):
    # Open the original image
    original = Image.open(image_path).convert('RGB')
    
    # Save the image with a specified compression quality
    compressed_path = "compressed.jpg"
    original.save(compressed_path, "JPEG", quality=quality)
    
    # Open the compressed image
    compressed = Image.open(compressed_path)
    
    # Compute the difference between the original and compressed image
    diff = ImageChops.difference(original, compressed)
    
    # Enhance the difference to make it more visible
    extrema = diff.getextrema()
    max_diff = max([ex[1] for ex in extrema])
    scale = 255 / max_diff if max_diff != 0 else 1
    diff = ImageEnhance.Brightness(diff).enhance(scale)
    
    # Save the ELA image
    diff.save(save_path)

In [2]:
import shutil

au_input_dir = "/Users/logpoint/Documents/SoftwareProjects/Thesis/CASIA22/Au"
tp_input_dir = "/Users/logpoint/Documents/SoftwareProjects/Thesis/CASIA22/Tp"

ela_imag_dir = "/Users/logpoint/Documents/SoftwareProjects/Thesis/CASIA22/ela_images"
au_train_dir = "/Users/logpoint/Documents/SoftwareProjects/Thesis/CASIA22/ela_images/au/train"
tp_train_dir = "/Users/logpoint/Documents/SoftwareProjects/Thesis/CASIA22/ela_images/tp/train"

if os.path.exists(ela_imag_dir):
    shutil.rmtree(ela_imag_dir)
os.makedirs(au_train_dir, exist_ok=True)

for filename in [file for file in os.listdir(au_input_dir) if file.rsplit(".")[-1] in ["jpg", "jpeg", "png", "tif", "tiff"]]:
    input_path = os.path.join(au_input_dir, filename)
    output_path = os.path.join(au_train_dir, f"ela_{filename}")
    generate_ela_image(input_path, output_path)


os.makedirs(tp_train_dir, exist_ok=True)

for filename in [file for file in os.listdir(tp_input_dir) if file.rsplit(".")[-1] in ["jpg", "jpeg", "png", "tif", "tiff"]]:
    input_path = os.path.join(tp_input_dir, filename)
    output_path = os.path.join(tp_train_dir, f"ela_{filename}")
    generate_ela_image(input_path, output_path)

In [3]:
import os
import numpy as np
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.model_selection import train_test_split


# Function to load images and assign labels
def load_images_and_labels(authentic_dir, tampered_dir, target_size=(224, 224)):
    images = []
    labels = []
    
    # Load authentic images (label = 0)
    for filename in os.listdir(authentic_dir):
        filepath = os.path.join(authentic_dir, filename)
        if os.path.isfile(filepath):
            img = load_img(filepath, target_size=target_size)
            img_array = img_to_array(img)
            images.append(img_array)
            labels.append(0)  # Authentic label
    
    # Load tampered images (label = 1)
    for filename in os.listdir(tampered_dir):
        filepath = os.path.join(tampered_dir, filename)
        if os.path.isfile(filepath):
            img = load_img(filepath, target_size=target_size)
            img_array = img_to_array(img)
            images.append(img_array)
            labels.append(1)  # Tampered label

    return np.array(images), np.array(labels)

# Load the images from both directories
X, y = load_images_and_labels(au_train_dir, tp_train_dir)

# Normalize the image data (scale values to [0, 1])
X = X / 255.0  # Normalize the pixel values to the range [0, 1]

# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"Training data size: {X_train.shape[0]}")
print(f"Test data size: {X_test.shape[0]}")



Training data size: 10048
Test data size: 2512


In [4]:
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.optimizers import Adam

# Load VGG-16 base model without the top layers
vgg_base = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Create the model
model = Sequential([
    vgg_base,  # Add the pre-trained VGG16 base model
    Flatten(),  # Flatten the output from the VGG16 model
    Dense(256, activation='relu'),  # Fully connected layer
    Dropout(0.5),  # Dropout for regularization
    Dense(1, activation='sigmoid')  # Output layer for binary classification
])

# Freeze the base model layers to prevent them from being trained
for layer in vgg_base.layers:
    layer.trainable = False

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=['accuracy'])


In [5]:
# Train the model
history = model.fit(
    X_train, y_train,  # Training data
    validation_data=(X_test, y_test),  # Validation data
    epochs=10,  # Number of epochs
    batch_size=16  # Batch size
)

Epoch 1/10
[1m  4/628[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m36:18[0m 3s/step - accuracy: 0.5469 - loss: 0.7831

KeyboardInterrupt: 

In [None]:
# Evaluate the model on the test data
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {loss}")
print(f"Test Accuracy: {accuracy}")

In [None]:
# Save the trained model to a file
model.save("vgg16_ela_forgery_detection.h5")

In [None]:
import matplotlib.pyplot as plt

# Plot accuracy
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend(loc='best')
plt.show()

# Plot loss
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend(loc='best')
plt.show()
