In [None]:
# Change directory to the project folder
import os
os.chdir('/content/drive/MyDrive/Colab Notebooks/Median-Filtering-Forensics-Based-on-Convlutional-Neural-Network')

In [None]:
import cv2
import os
import numpy as np
import random
import shutil
import glob
from PIL import Image
from sklearn.utils import shuffle
from utils import *
import tensorflow as tf
import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.models import load_model

dataset_path = './data/UCID/ucid.v2'
jpeg_dataset_path = './data/UCID/JPEG70'

In [None]:
# Testing model on whole JPEG70 and MF5-JPEG70 data
if not os.path.exists('./data/temp'):
    # Creating directories
    os.makedirs('./data/temp/JPEG70')
    os.makedirs('./data/temp/MF5-JPEG70')

    for img in os.listdir(jpeg_dataset_path):
        ## Creating negative data
        # Reading the image
        I = cv2.imread(os.path.join(jpeg_dataset_path, img), 0)
        I_crop = center_crop(I)

        # MFR-filter layer
        I_MFR = cv2.medianBlur(I_crop, 5) - I_crop

        # Saving the image
        cv2.imwrite(f'./data/temp/JPEG70/{img}', I_MFR)

        ## Creating positive data
        img_name = img.split('.')[0] + '.tif'
        # Reading the image
        I = cv2.imread(os.path.join(dataset_path, img_name), 0)
        I_crop = center_crop(I)

        # Creating the median blurred image
        I_blurred = cv2.medianBlur(I_crop, 5)

        # MFR-filter layer
        I_MFR = cv2.medianBlur(I_blurred, 5) - I_blurred

        # Saving the image
        cv2.imwrite('./data/temp/temp.png', I_MFR)

        # Reading the image and saving it in jpeg compression file
        I = Image.open('./data/temp/temp.png')
        I.save(f"./data/temp/MF5-JPEG70/{img_name.split('.')[0] + '.jpeg'}", 'JPEG', quality=70)

    os.remove('./data/temp/temp.png')

    assert len(os.listdir('./data/temp/JPEG70')) == 1338
    assert len(os.listdir('./data/temp/MF5-JPEG70')) == 1338

In [None]:
data_path = './data/temp'

data_batches = ImageDataGenerator(rescale=1./255).flow_from_directory(directory=data_path, target_size=(64, 64),
                        color_mode='grayscale', batch_size=256, classes=['JPEG70', 'MF5-JPEG70'], shuffle=True)

Found 2676 images belonging to 2 classes.


In [None]:
combined_2_model = load_model('./data/saved_models/combined_2_model.h5', compile=True)

In [None]:
score, acc = combined_2_model.evaluate(data_batches, steps=data_batches.samples//data_batches.batch_size+1, verbose=0)
print(f'Score is: {score}')
print(f'Accuracy is: {acc}')

Score is: 0.024361394345760345
Accuracy is: 0.994020938873291


In [None]:
# Removing temp files
shutil.rmtree('./data/temp/')

# Median Filtering Forensics Based on Convolutional Neural Network
This GitHub project presents a CNN-based model for detecting median filtering in images based on the [study](https://ieeexplore.ieee.org/document/7113799) done by Jiansheng Chen, Xiangui Kang, Ye Liu, Z. Jane Wang. I used the [UCID Dataset](https://www.researchgate.net/publication/220979862_UCID_An_uncompressed_color_image_database) for training the model.
The study aims to detect the tempering process on image documents done by forgery makers using median filtering.

# Overview
This project follows the following steps
- Images from the UCID Dataset are used as a negative dataset ($8028$ images) and median filtered images of the same dataset are used as a positive dataset ($5352$ images).
- jpeg compressed images of the same dataset are also included in the positive dataset ($8028$ images).
- jpeg compressed images of median filtered images are included in the negative dataset ($5352$ images).
- Median filtered images of jpeg compressed images are included in the negative dataset ($5352$ images).
- A total $16056$ for each class is used to train the model.
- The MFR layer is applied to each image as the requirement for the model.
- Define and train the model for classification.
- Results are computed for the whole UCID Dataset and compared with the [classical approach](https://github.com/nagar-mayank/Forensic-Detection-of-Median-Filtering-in-Digital-Images-Cao_2010.git).
- CNN based approach is found to be $99.00\%$ accurate on the test dataset.

# Data Preparing
The UCID Dataset is processed in the following manner
- Images are converted in grayscale and cropped into the size $64 \times 64$
 and converted into lossless *png* format.
- From one image, six images of size $64\times64$ are extracted along the principal diagonal.
- The UCID dataset is considered as negative images (original uncompressed images in PNG format and jpeg compressed image) for the model.
- Positive images are created by applying $5\times5$ median filtering on the same dataset images.
- Thus, the dataset consisting of $32112$ images is prepared with 2 classes.
- MFR layer is applied on each image, where the image is subtracted from a $5\times5$ median filtered image of itself.

# Model Architecture
Since using conventional CNN models with the raw image pixels as inputs
didn’t yield good performances, one additional layer, *the filter
layer* is added to the conventional model.

Through this filter layer, ***the median filtering residual (MFR)*** of an image
is obtained. Then the output MFR is fed into the conventional
network.

The MFR is defined as follows: Applying the $w\times w$ median
filtering window on a image $x(i,j)$ and obtain the output
image $y(i,j)$.

The MFR is:
$$d(i,j)=med_w(x(i,j))-x(i,j)$$

# Results
For classification between original and median filtered images, the model gives $99.43\%$ on the training dataset, $99.00\%$ on the validation dataset and $99.00\%$ on the test dataset.

For classification between jpeg and median filtered jpeg images, the model gives $99.84\%$ on the training dataset and $99.55\%$ on the validation dataset and $99.35\%$ on the test dataset.

For classification between jpeg and jpeg compressed images of $5\times 5$ median filtered images, the model gives $99.40\%$ on the whole UCID dataset. In compression, the [classical approach](https://github.com/nagar-mayank/Forensic-Detection-of-Median-Filtering-in-Digital-Images-Cao_2010.git) yields $72.24 /percentage$.
# Conclusion
CNN-based model is better than [classical approach](https://github.com/nagar-mayank/Forensic-Detection-of-Median-Filtering-in-Digital-Images-Cao_2010.git) and can detect median filtering in small and jpeg compressed image blocks and is able to identify cut-and-paste forgeries well.