In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
#to generate same sequence of random numbers in whole file
np.random.seed(2)
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from tensorflow.keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Flatten, Conv2D, MaxPool2D, Dropout
from tensorflow.keras.optimizers import Adam
from keras.preprocessing.image import ImageDataGenerator
#It is used during the training of a neural network to monitor a 
# specified metric (typically a validation metric) and stop training
#  early if certain criteria are met. The purpose of using EarlyStopping
#  is to prevent overfitting and to save time and resources when further
#  training is unlikely to improve the model's performance.
from keras.callbacks import EarlyStopping


In [2]:
from PIL import Image
import os
from pylab import *
import re
from PIL import Image, ImageChops, ImageEnhance

In [3]:
def convert_to_ela_image(path, quality):
    # creating a temporary filename for an intermediate image
    temp_filename = 'temp_file_name.jpg'
    # filename for ela image that will be generated
    ela_filename = 'temp_ela.png'
    
    # open image and convert to RGB
    image = Image.open(path).convert('RGB')

    # save image as jpg and keep quality as before
    image.save(temp_filename, 'JPEG', quality = quality)
    temp_image = Image.open(temp_filename)
    
    # calculate pixel difference between original image and RGB (new image) 
    # which will represents areas of image that have been altered.
    ela_image = ImageChops.difference(image, temp_image)
    
    # calculating minimum and maximum pixel values in the images
    extrema = ela_image.getextrema()

    # finds the maximum difference value among the extrema. This value is used to scale the ELA image.
    max_diff = max([ex[1] for ex in extrema])

    # ensuring max_diff is not zero to avoid division by zero.
    if max_diff == 0:
        max_diff = 1

    # calculates a scaling factor based on the maximum difference value. This factor
    # is used to stretch the ELA image's pixel values across the full 0-255 range.
    scale = 255.0 / max_diff
    
    # enhances the brightness of the ELA image by applying the previously calculated 
    # scaling factor for making the manipulated regions stand out more distinctly.
    ela_image = ImageEnhance.Brightness(ela_image).enhance(scale)
    
    return ela_image

In [4]:
image_size = (128, 128)

In [5]:
def prepare_image(image_path):
    return np.array(convert_to_ela_image(image_path, 90).resize(image_size)).flatten() / 255.0

In [6]:
X = []
Y = []

In [7]:
import random
path= "D:/major_project/dataset/real"
for dirname, _, filenames in os.walk(path):
    for filename in filenames:
        # if filename.endswith('jpg') or filename.endswith('png') or filename.endswith('tif'):
            full_path = os.path.join(dirname, filename)
            X.append(prepare_image(full_path))
            Y.append(1)
            if len(Y) % 500 == 0:
                print(f'Processing {len(Y)} images')

random.shuffle(X)
# X = X[:2100]
# Y = Y[:2100]
print(len(X), len(Y))

Processing 500 images
Processing 1000 images
Processing 1500 images
Processing 2000 images
Processing 2500 images
Processing 3000 images
Processing 3500 images
Processing 4000 images
Processing 4500 images
Processing 5000 images
5123 5123


In [8]:
path = "D:/major_project/dataset/fake"
#path = '/content/drive/MyDrive/Colab Notebooks/Image_Detector/CASIA2/Fake'
for dirname, _, filenames in os.walk(path):
    for filename in filenames:
        # if filename.endswith('jpg') or filename.endswith('png'):
            full_path = os.path.join(dirname, filename)
            X.append(prepare_image(full_path))
            Y.append(0)
            if len(Y) % 500 == 0:
                print(f'Processing {len(Y)} images')

print(len(X), len(Y))

Processing 5500 images
Processing 6000 images
Processing 6500 images
Processing 7000 images
Processing 7500 images
Processing 8000 images
Processing 8500 images
Processing 9000 images
Processing 9500 images
Processing 10000 images
10246 10246


In [9]:
# converting X into array 
X = np.array(X)
#used when you have a target variable (labels or classes) that is represented as integers and you want to convert it into a binary matrix format suitable for training machine learning models, especially neural networks.
# In Y there are two classes 1 for real and 0 for fake so to_categorical() converts this array into 2D array of labelled classes.
# this Y will look like [[1,0],[0,1]] for 0 and 1 label.
Y = to_categorical(Y, 2)
# # Reshape image data for a convolutional neural network
#  # Batch size, height, width, channels
X = X.reshape(-1, 128, 128, 3)

In [10]:
X_train, X_val, Y_train, Y_val = train_test_split(X, Y, test_size = 0.2, random_state=5)
# again converting 2D array to 1D array
X = X.reshape(-1,1,1,1)
print(len(X_train), len(Y_train))
print(len(X_val), len(Y_val))

8196 8196
2050 2050


In [11]:
from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
import numpy as np
from PIL import Image, ImageChops, ImageEnhance
import os
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, cohen_kappa_score


In [13]:
from tensorflow.keras.models import load_model
import numpy as np

In [14]:
# Load trained models
vgg16_model = load_model('model_vgg16_1_fake_detection.h5')

In [15]:
xception_model = load_model('model_Xception_fake_detection.h5')

In [16]:
# Make predictions on validation set
vgg16_predictions = vgg16_model.predict(X_val)
xception_predictions = xception_model.predict(X_val)



In [17]:
# Combine predictions using averaging
ensemble_predictions = (vgg16_predictions + xception_predictions) / 2.0

In [18]:
# Convert predictions to classes
ensemble_classes = np.argmax(ensemble_predictions, axis=1)

In [21]:
# Convert one-hot encoded labels back to single-column array
Y_val_classes = np.argmax(Y_val, axis=1)

# Evaluate ensemble performance
ensemble_accuracy = accuracy_score(Y_val_classes, ensemble_classes)
print(f'Ensemble Accuracy on Validation Set: {ensemble_accuracy * 100:.6f}%')

Ensemble Accuracy on Validation Set: 92.097561%


In [25]:
print("Shape of Y_val:", Y_val.shape)
print("Shape of ensemble_classes:", ensemble_classes.shape)

Shape of Y_val: (2050, 2)
Shape of ensemble_classes: (2050,)


In [28]:
Y_val_classes = np.argmax(Y_val, axis=1)
class_names = ['fake', 'real']

In [29]:
ensemble_report = classification_report(Y_val_classes, ensemble_classes, target_names=class_names)
print('Classification Report for Ensemble Model:')
print(ensemble_report)

Classification Report for Ensemble Model:
              precision    recall  f1-score   support

        fake       0.95      0.89      0.92      1009
        real       0.90      0.95      0.92      1041

    accuracy                           0.92      2050
   macro avg       0.92      0.92      0.92      2050
weighted avg       0.92      0.92      0.92      2050



In [37]:
from keras.utils import to_categorical

# Convert ensemble_classes to one-hot encoded format
ensemble_classes_onehot = to_categorical(ensemble_classes, num_classes=2)

In [38]:
# Calculate confusion matrix for ensemble model
ensemble_conf_matrix = confusion_matrix(Y_val, ensemble_classes)
print('Confusion Matrix for Ensemble Model:')
print(ensemble_conf_matrix)

ValueError: Classification metrics can't handle a mix of multilabel-indicator and binary targets