# Fake Image Detection 

## Importing Libraries

In [23]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
#to generate same sequence of random numbers in whole file
np.random.seed(2)
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from tensorflow.keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Flatten, Conv2D, MaxPool2D, Dropout
from tensorflow.keras.optimizers import Adam
from keras.preprocessing.image import ImageDataGenerator
#It is used during the training of a neural network to monitor a 
# specified metric (typically a validation metric) and stop training
#  early if certain criteria are met. The purpose of using EarlyStopping
#  is to prevent overfitting and to save time and resources when further
#  training is unlikely to improve the model's performance.
from keras.callbacks import EarlyStopping


PIL provides extensive capabilities for opening, manipulating, and saving many different image file formats.

to open image

ImageEnhance =>  can manipulate pixel colors in images, apply color mapping, and adjust brightness, contrast, and other color-related properties.

ImageChops => It provides various arithmetic and logical operations that you can perform on images.

In [24]:
from PIL import Image
import os
from pylab import *
import re
from PIL import Image, ImageChops, ImageEnhance

## Making Dataset

## Convert to Error Level Analysis

In summary, this code takes an input image, compresses it, calculates the difference between the original and compressed versions, scales the difference image, and enhances its brightness. The resulting ELA image highlights areas where digital manipulation may have occurred by making the manipulated regions appear as brighter or darker areas compared to the rest of the image.

It generates ela_Image by enhancing brightness on the scale of pixel difference(difference between original and converted RGB image)

In [25]:
def convert_to_ela_image(path, quality):
    # creating a temporary filename for an intermediate image
    temp_filename = 'temp_file_name.jpg'
    # filename for ela image that will be generated
    ela_filename = 'temp_ela.png'
    
    # open image and convert to RGB
    image = Image.open(path).convert('RGB')

    # save image as jpg and keep quality as before
    image.save(temp_filename, 'JPEG', quality = quality)
    temp_image = Image.open(temp_filename)
    
    # calculate pixel difference between original image and RGB (new image) 
    # which will represents areas of image that have been altered.
    ela_image = ImageChops.difference(image, temp_image)
    
    # calculating minimum and maximum pixel values in the images
    extrema = ela_image.getextrema()

    # finds the maximum difference value among the extrema. This value is used to scale the ELA image.
    max_diff = max([ex[1] for ex in extrema])

    # ensuring max_diff is not zero to avoid division by zero.
    if max_diff == 0:
        max_diff = 1

    # calculates a scaling factor based on the maximum difference value. This factor
    # is used to stretch the ELA image's pixel values across the full 0-255 range.
    scale = 255.0 / max_diff
    
    # enhances the brightness of the ELA image by applying the previously calculated 
    # scaling factor for making the manipulated regions stand out more distinctly.
    ela_image = ImageEnhance.Brightness(ela_image).enhance(scale)
    
    return ela_image


## Data preparation

### Read dataset and conversion to ELA

In [26]:
image_size = (128, 128)
# image_size = (224, 224)
# image_size = (139, 139)


In [27]:
from tensorflow.keras.applications import InceptionV3

this function converts the image into ela_image then resize it and flattens it to store in the 1D array as CNN requires data to be in 1D array 

In [28]:
def prepare_image(image_path):
    return np.array(convert_to_ela_image(image_path, 90).resize(image_size)).flatten() / 255.0
    

In [29]:
'''def prepare_image(image_path, quality):
    ela_image = convert_to_ela_image(image_path, quality)
    ela_image = np.array(ela_image.resize(image_size)).flatten() / 255.0
    return ela_image
'''


'def prepare_image(image_path, quality):\n    ela_image = convert_to_ela_image(image_path, quality)\n    ela_image = np.array(ela_image.resize(image_size)).flatten() / 255.0\n    return ela_image\n'

In [30]:
X = []
Y = []

this function processes all the files present in Au and convert them:

first into ela_image and after resizing it, it flattens the layers of image and converts in 1D array

X stores all the flattened images and Y store the count of images

In [31]:
import random
#path = 'C:/Users/swapn/Downloads/mesonet/dataset/CASIA2/Au'
path= 'D:/major_project/dataset/real'
for dirname, _, filenames in os.walk(path):
    for filename in filenames:
        # if filename.endswith('jpg') or filename.endswith('png') or filename.endswith('tif'):
            full_path = os.path.join(dirname, filename)
            X.append(prepare_image(full_path))
            Y.append(1)
            if len(Y) % 500 == 0:
                print(f'Processing {len(Y)} images')

random.shuffle(X)
# X = X[:2100]
# Y = Y[:2100]
print(len(X), len(Y))

Processing 500 images
Processing 1000 images
Processing 1500 images
Processing 2000 images
Processing 2500 images
Processing 3000 images
Processing 3500 images
Processing 4000 images
Processing 4500 images
Processing 5000 images
5123 5123


this function processes all the files present in Tp and convert them:

first into ela_image and after resizing it, it flattens the layers of image and converts in 1D array

X stores all the flattened images and Y store the count of images

now X stores all the images(Au + Tp)

In [32]:
path = 'D:/major_project/dataset/fake'
#path = '/content/drive/MyDrive/Colab Notebooks/Image_Detector/CASIA2/Fake'
for dirname, _, filenames in os.walk(path):
    for filename in filenames:
        # if filename.endswith('jpg') or filename.endswith('png'):
            full_path = os.path.join(dirname, filename)
            X.append(prepare_image(full_path))
            Y.append(0)
            if len(Y) % 500 == 0:
                print(f'Processing {len(Y)} images')

print(len(X), len(Y))

Processing 5500 images
Processing 6000 images
Processing 6500 images
Processing 7000 images
Processing 7500 images
Processing 8000 images
Processing 8500 images
Processing 9000 images
Processing 9500 images
Processing 10000 images
10246 10246


In [33]:
# converting X into array 
X = np.array(X)
#used when you have a target variable (labels or classes) that is represented as integers and you want to convert it into a binary matrix format suitable for training machine learning models, especially neural networks.
# In Y there are two classes 1 for real and 0 for fake so to_categorical() converts this array into 2D array of labelled classes.
# this Y will look like [[1,0],[0,1]] for 0 and 1 label.
Y = to_categorical(Y, 2)
# # Reshape image data for a convolutional neural network
#  # Batch size, height, width, channels
X = X.reshape(-1, 128, 128, 3)
# X = X.reshape(-1, 139, 139, 3)

# InceptionV3

In [34]:
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GlobalAveragePooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.applications.inception_v3 import preprocess_input

In [35]:
from tensorflow.keras.applications import Xception

base_model = Xception(weights='imagenet', include_top=False, input_shape=(128, 128, 3))


In [36]:

# Create a sequential model and add the VGG16 base model
model = Sequential()
model.add(base_model)

# Flatten the output from the base model
model.add(Flatten())

# Add a dense layer with ReLU activation
model.add(Dense(256, activation='relu'))

# Add a dropout layer to prevent overfitting
model.add(Dropout(0.5))

# Add the output layer with softmax activation (2 classes: fake and real)
model.add(Dense(2, activation='softmax'))


# Splitting

Splitting the dataset into features (X) and target labels (Y)

Splitting the data into a training set (80%) and a testing set (20%)

In [37]:
X_train, X_val, Y_train, Y_val = train_test_split(X, Y, test_size = 0.3, random_state=5)
# again converting 2D array to 1D array
X = X.reshape(-1,1,1,1)


print(len(X_train), len(Y_train))
print(len(X_val), len(Y_val))

7172 7172
3074 3074


In [38]:
model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])



In [39]:

batch_size = 32
epochs = 10

In [40]:
early_stopping = EarlyStopping(monitor = 'val_accuracy',
                              min_delta = 0,
                              patience = 2,
                              verbose = 0,
                              mode = 'auto')

In [41]:

history = model.fit(X_train, Y_train, batch_size=batch_size, epochs=10, validation_data=(X_val, Y_val), callbacks=[early_stopping])


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


In [42]:

history = model.fit(X_train, Y_train, batch_size=batch_size, epochs=10, validation_data=(X_val, Y_val), callbacks=[early_stopping])


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


In [43]:

model.save('model_Xception_fake_detection.h5')

  saving_api.save_model(


In [44]:
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, cohen_kappa_score
class_names = ['fake', 'real']
# After training the model, you can use X_val and Y_val for evaluation
Y_pred_val = model.predict(X_val)

# Convert predictions classes to one hot vectors
Y_pred_classes_val = np.argmax(Y_pred_val, axis=1)
# Convert validation observations to one hot vectors
Y_true_val = np.argmax(Y_val, axis=1)

# Calculate overall accuracy on validation set
overall_accuracy_val = accuracy_score(Y_true_val, Y_pred_classes_val)
print(f'Overall Accuracy on Validation Set: {overall_accuracy_val * 100:.6f}%')

# Calculate other metrics using classification_report
report_val = classification_report(Y_true_val, Y_pred_classes_val, target_names=class_names)
print('Classification Report on Validation Set:')
print(report_val)

# Calculate confusion matrix on validation set
conf_matrix_val = confusion_matrix(Y_true_val, Y_pred_classes_val)
print('Confusion Matrix on Validation Set:')
print(conf_matrix_val)

true_positive = conf_matrix_val[1, 1]
false_positive = conf_matrix_val[0, 1]
false_negative = conf_matrix_val[1, 0]
true_negative = conf_matrix_val[0, 0]

# Calculate False Positive Rate (FPR) and False Negative Rate (FNR)
fpr = false_positive / (false_positive + true_negative)
fnr = false_negative / (false_negative + true_positive)

print(f'False Positive Rate (FPR): {fpr * 100:.6f}%')
print(f'False Negative Rate (FNR): {fnr * 100:.6f}%')

# Calculate Cohen's Kappa on validation set
cohen_kappa_val = cohen_kappa_score(Y_true_val, Y_pred_classes_val)
print(f'Cohen\'s Kappa on Validation Set: {cohen_kappa_val:.6f}')

Overall Accuracy on Validation Set: 88.484060%
Classification Report on Validation Set:
              precision    recall  f1-score   support

        fake       0.90      0.87      0.88      1543
        real       0.88      0.90      0.89      1531

    accuracy                           0.88      3074
   macro avg       0.89      0.88      0.88      3074
weighted avg       0.89      0.88      0.88      3074

Confusion Matrix on Validation Set:
[[1347  196]
 [ 158 1373]]
False Positive Rate (FPR): 12.702528%
False Negative Rate (FNR): 10.320052%
Cohen's Kappa on Validation Set: 0.769700
