Model trained on following classification of *image forgery*(**CASIA 2.0 dataset**)


1.   Splicing
2.   Copy move
3.   Removal






# **Packages**



In [1]:
!pip install torch torchvision joblib scikit-image opencv-python cycler graphviz imbalanced-learn imblearn kiwisolver matplotlib numpy pandas pyparsing python-dateutil pytz scikit-learn scipy seaborn six

Collecting imblearn
  Downloading imblearn-0.0-py2.py3-none-any.whl.metadata (355 bytes)
Downloading imblearn-0.0-py2.py3-none-any.whl (1.9 kB)
Installing collected packages: imblearn
Successfully installed imblearn-0.0


In [2]:
!pip install opencv-python-headless opencv-contrib-python



In [3]:
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

Looking in indexes: https://download.pytorch.org/whl/cu118


In [4]:
!apt-get update
!apt-get install -y libgl1-mesa-glx

0% [Working]            Get:1 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,626 B]
Get:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease [1,581 B]
Get:3 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]
Hit:4 http://archive.ubuntu.com/ubuntu jammy InRelease
Get:5 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [128 kB]
Get:6 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  Packages [999 kB]
Hit:7 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Hit:8 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
Ign:9 https://r2u.stat.illinois.edu/ubuntu jammy InRelease
Hit:10 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Hit:11 http://archive.ubuntu.com/ubuntu jammy-backports InRelease
Get:12 https://r2u.stat.illinois.edu/ubuntu jammy Release [5,713 B]
Get:13 http://security.ubuntu.com/ubuntu jammy-security

In [5]:
from typing import Dict
from torch import Tensor, stack
import torch
import math
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import torch.optim as optim
import torch.utils.data
from torch.optim.lr_scheduler import StepLR
from torch.autograd import Variable
import time
import pandas as pd
import torchvision.transforms as transforms
from torchvision import datasets
import glob
import cv2
from skimage.util import view_as_windows
import os
from skimage import io

from sklearn import svm
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from matplotlib.colors import ListedColormap
import seaborn as sn

# **Total Model**

**Architecture**

In [6]:
import torch.nn.functional as f
import torch.nn as nn



class CNN(nn.Module):
    """
    The convolutional neural network (CNN) class
    """
    def __init__(self):
        """
        Initialization of all the layers in the network.
        """
        super(CNN, self).__init__()

        self.conv0 = nn.Conv2d(3, 3, kernel_size=5, stride=1, padding=0)
        nn.init.xavier_uniform_(self.conv0.weight)

        self.conv1 = nn.Conv2d(3, 30, kernel_size=5, stride=2, padding=0)
        nn.init.xavier_uniform_(self.conv1.weight)

        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)

        self.conv2 = nn.Conv2d(30, 16, kernel_size=3, stride=1, padding=0)
        nn.init.xavier_uniform_(self.conv2.weight)

        self.conv3 = nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=0)
        nn.init.xavier_uniform_(self.conv3.weight)

        self.conv4 = nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=0)
        nn.init.xavier_uniform_(self.conv4.weight)

        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)

        self.conv5 = nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=0)
        nn.init.xavier_uniform_(self.conv5.weight)

        self.conv6 = nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=0)
        nn.init.xavier_uniform_(self.conv6.weight)

        self.conv7 = nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=0)
        nn.init.xavier_uniform_(self.conv7.weight)

        self.conv8 = nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=0)
        nn.init.xavier_uniform_(self.conv8.weight)

        self.fc = nn.Linear(16 * 5 * 5, 2)

        self.drop1 = nn.Dropout(p=0.5)  # used only for the NC dataset

    def forward(self, x):
        """
        The forward step of the network that consumes an image patch and either uses a fully connected layer in the
        training phase with a softmax or just returns the feature map after the final convolutional layer.
        :returns: Either the output of the softmax during training or the 400-D feature representation at testing
        """
        x = f.relu(self.conv0(x))
        x = f.relu(self.conv1(x))
        lrn = nn.LocalResponseNorm(3)
        x = lrn(x)
        x = self.pool1(x)
        x = f.relu(self.conv2(x))
        x = f.relu(self.conv3(x))
        x = f.relu(self.conv4(x))
        x = f.relu(self.conv5(x))
        x = lrn(x)
        x = self.pool2(x)
        x = f.relu(self.conv6(x))
        x = f.relu(self.conv7(x))
        x = f.relu(self.conv8(x))
        x = x.view(-1, 16 * 5 * 5)

        # In the training phase we also need the fully connected layer with softmax
        if self.training:
            # x = self.drop1(x) # used only for the NC dataset
            x = f.relu(self.fc(x))
            x = f.softmax(x, dim=1)

        return x

Function to extract patch of given image

In [7]:
import glob
import cv2
def get_patches(image_mat, stride):
    """
    Extract patches rom an image
    :param image_mat: The image as a matrix
    :param stride: The stride of the patch extraction process
    :returns: The patches
    """
    window_shape = (128, 128, 3)
    windows = view_as_windows(image_mat, window_shape, step=stride)
    patches = []
    for m in range(windows.shape[0]):
        for n in range(windows.shape[1]):
            patches += [windows[m][n][0]]
    print(patches)
    return patches


def get_images_and_labels(tampered_path, authentic_path):
    """
    Get the images and their corresponding labels
    :param tampered_path: The path containing the tampered images
    :param authentic_path: The path containing the authentic images
    :returns: Dictionary with images and labels
    """
    tampered_dir = tampered_path
    authentic_dir = authentic_path
    images = {}
    for im in glob.glob(authentic_dir):
        images[im] = {}
        images[im]['mat'] = cv2.imread(im)
        images[im]['label'] = 0
    for im in glob.glob(tampered_dir):
        images[im] = {}
        images[im]['mat'] = cv2.imread(im)
        images[im]['label'] = 1
    return images

In [8]:
def get_yi(model, patch):
    """
    Returns the patch's feature representation
    :param model: The pre-trained CNN object
    :param patch: The patch
    :returns: The 400-D feature representation of the patch
    """
    with torch.no_grad():
        model.eval()
        return model(patch)


class WrongOperationOption(Exception):
    pass


def get_y_hat(y: np.ndarray, operation: str):
    """
    Fuses the image's patches feature representation
    :param y: The network object
    :param operation: Either max or mean for the pooling operation
    :returns: The final 400-D feature representation of the entire image
    """
    if operation == "max":
        return np.array(y).max(axis=0, initial=-math.inf)
    elif operation == "mean":
        return np.array(y).mean(axis=0)
    else:
        raise WrongOperationOption("The operation can be either mean or max")

In [9]:
def create_feature_vectors(model, tampered_path, authentic_path, output_name):
    """
    Writes the feature vectors of the CASIA2 dataset.
    :param model: The pre-trained CNN object
    :param tampered_path: The path of the tampered images of the CASIA2 dataset
    :param authentic_path: The path of the authentic images of the CASIA2 dataset
    :param output_name: The name of the output CSV that contains the feature vectors
    """
    df = pd.DataFrame()
    images = get_images_and_labels(tampered_path, authentic_path)
    c = 1
    for image_name in images.keys():  # images
        print("Image: ", c)

        image = images[image_name]['mat']
        label = images[image_name]['label']

        df = pd.concat([df, pd.concat([pd.DataFrame([image_name.split(os.sep)[-1], str(label)]),
                                       pd.DataFrame(get_patch_yi(model, image))])], axis=1, sort=False)
        c += 1

    final_df = df.T
    final_df.columns = get_df_column_names()
    final_df.to_csv(output_name, index=False)
    # save the feature vector to csv
    # csv type [im_name][label][f1,f2,...,fK]


def get_patch_yi(model, image):
    """
    Calculates the feature representation of an image.
    :param model: The pre-trained CNN object
    :param image: The image
    :returns: The image's feature representation
    """
    transform = transforms.Compose([transforms.ToTensor()])

    y = []  # init Y

    patches = get_patches(image, stride=1024)

    for patch in patches:  # for every patch
        img_tensor = transform(patch)
        img_tensor.unsqueeze_(0)
        img_variable = Variable(img_tensor.double())
        yi = get_yi(model=model, patch=img_variable)
        y.append(yi)  # append Yi to Y

    y = np.vstack(tuple(y))

    y_hat = get_y_hat(y=y, operation="mean")  # create Y_hat with mean or max

    return y_hat


def get_df_column_names():
    """
    Rename the feature csv column names as [im_names][labels][f1,f2,...,fK].
    :returns: The column names
    """
    names = ["image_names", "labels"]
    for i in range(400):
        names.append("f" + str(i + 1))
    return names

In [11]:
import joblib
import torch
from cv2 import imread
import numpy as np
import warnings

# Suppress a specific warning by its type
warnings.filterwarnings('ignore', category=UserWarning)

# Example that raises a UserWarning



def get_feature_vector(image_path: str, model):
    feature_vector = np.empty((1, 400))
    feature_vector[0, :] = get_patch_yi(model, imread(image_path))
    return feature_vector


# Load the pretrained CNN with the CASIA2 dataset
with warnings.catch_warnings():
    warnings.simplefilter('ignore')
    with torch.no_grad():
      our_cnn = CNN()
      our_cnn=torch.load('drive/MyDrive/summer/Final/model/Cnn_50.pt',map_location=lambda storage, loc: storage)
      our_cnn.eval()
      our_cnn = our_cnn.double()

# Load the pretrained svm model
svm_model = joblib.load('drive/MyDrive/summer/Final/model/svm_model.joblib')

print("Labels are 0 for non-tampered and 1 for tampered")

# Probe the SVM model with a non-tampered image
non_tampered_image_path = 'Au_ani_30380.jpg'
non_tampered_image_feature_vector = get_feature_vector(non_tampered_image_path, our_cnn)
print("Non tampered prediction:", svm_model.predict(non_tampered_image_feature_vector))

# Probe the SVM model with a tampered image
tampered_image_path = 'Tp_D_NNN_M_N_art00099_cha00050_11760.jpg'
tampered_image_feature_vector = get_feature_vector(tampered_image_path, our_cnn)
print("Tampered prediction:", svm_model.predict(tampered_image_feature_vector))

Labels are 0 for non-tampered and 1 for tampered
[array([[[ 54, 215, 200],
        [ 56, 217, 202],
        [ 56, 218, 206],
        ...,
        [ 43,  99,  78],
        [ 44, 101,  80],
        [ 43, 100,  79]],

       [[ 55, 216, 201],
        [ 54, 217, 202],
        [ 56, 218, 206],
        ...,
        [ 45, 102,  81],
        [ 48, 105,  84],
        [ 50, 107,  86]],

       [[ 53, 215, 203],
        [ 52, 217, 204],
        [ 54, 218, 207],
        ...,
        [ 46, 103,  82],
        [ 51, 108,  87],
        [ 52, 111,  90]],

       ...,

       [[ 43, 101,  96],
        [ 44, 102,  97],
        [ 47, 105, 100],
        ...,
        [219,  93,  58],
        [231, 104,  65],
        [233, 102,  63]],

       [[ 45, 104, 100],
        [ 46, 106, 100],
        [ 47, 107, 101],
        ...,
        [230,  98,  61],
        [234, 103,  60],
        [239, 102,  63]],

       [[ 47, 105, 104],
        [ 48, 106, 105],
        [ 49, 108, 104],
        ...,
        [241, 104,  66],