<a href="https://colab.research.google.com/github/skj092/Real-vs-Fake-image/blob/main/Fake_Image_Detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Downloading the Dataset

In [1]:
# %%bash
!mkdir ~/.kaggle

!chmod 600 /root/.kaggle/kaggle.json

!cp kaggle.json ~/.kaggle

!kaggle datasets download -d sophatvathana/casia-dataset

chmod: cannot access '/root/.kaggle/kaggle.json': No such file or directory
Downloading casia-dataset.zip to /content
100% 5.21G/5.22G [03:50<00:00, 24.2MB/s]
100% 5.22G/5.22G [03:50<00:00, 24.3MB/s]


In [2]:
!unzip -q /content/casia-dataset.zip

# Data Preprocessing 

In [16]:
import tensorflow as tf
import cv2, os
import numpy as np
from tensorflow.keras.applications import xception

def process(path):
    quality, scale = 95, 15
    labels = np.array(['Au', 'Tp'])
    parts = tf.strings.split(path, os.path.sep)
    one_hot = parts[-2] == labels
    # label encoding 
    label = tf.argmax(one_hot)
    label = tf.cast(label, tf.float32)
    # read the image
    img = cv2.imread(path.numpy().decode('utf-8'))
    # img = cv2.imread(path)
    # resize to 224x224
    img = cv2.resize(img, (224, 224), interpolation = cv2.INTER_AREA)
    # convert to RGB
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    # save it in buffer
    _, buffer = cv2.imencode('.jpg', img, [int(cv2.IMWRITE_JPEG_QUALITY), quality])
    # convert to numpy array
    buffer = np.frombuffer(buffer, dtype=np.uint8)
    # decode the image
    compressed = cv2.imdecode(buffer, cv2.IMREAD_COLOR)
    # computer the absolute difference
    diff = (cv2.absdiff(img, compressed)) * scale
    img = xception.preprocess_input(diff)
    return img, label

# test
# path = "CASIA2/Au/Au_ani_00001.jpg"
# path = "/content/CASIA2/Tp/Tp_D_CND_M_N_ani00018_sec00096_00138.tif"
# img, label = process(path)
# print(img.shape, label)


In [17]:
import tensorflow as tf
# from utils import process

# path of the dataset
data_dir = '/content/CASIA2/'
# Collecting the data
jpg_files = tf.data.Dataset.list_files(str(data_dir + '**/*.jpg'))
tif_files = tf.data.Dataset.list_files(str(data_dir + '**/*.tif'))

# Creating the dataset
dataset = jpg_files.concatenate(tif_files)
print('number of images: ', len(list(dataset)))

# processing the data
preprocess = lambda x: tf.py_function(process, [x], [tf.float32, tf.float32])

# creating the dataset for training and validation
n_data = dataset.cardinality().numpy()
train_size = int(0.8 * n_data)
val_size = n_data - train_size

train_dataset = dataset.take(train_size).map(preprocess, num_parallel_calls=tf.data.experimental.AUTOTUNE)
val_dataset = dataset.skip(train_size).map(preprocess, num_parallel_calls=tf.data.experimental.AUTOTUNE)

# shuffling the data
train_dataset = train_dataset.shuffle(1000)
val_dataset = val_dataset.shuffle(1000)
# batching the data
train_dataset = train_dataset.batch(32)
val_dataset = val_dataset.batch(32)

number of images:  12477


In [18]:
for xb, yb in train_dataset:
  print(xb.shape, yb.shape)
  break 

(32, 224, 224, 3) (32,)


In [19]:
from tensorflow.keras.applications import Xception
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Flatten
from tensorflow.keras import Sequential,Model

def create_model():
    base_model = Xception(include_top=False, weights='imagenet', input_shape=(224, 224, 3))
    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(1024, activation='relu')(x)
    output = Dense(1, activation='sigmoid')(x)
    
    model = Model(base_model.inputs, output)
    return model
  
model = create_model()
# model.summary()

In [20]:
accuracy = tf.keras.metrics.CategoricalAccuracy(name='accuracy')

In [21]:
# compiling the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=accuracy)

In [22]:
history = model.fit(train_dataset,epochs=6,batch_size=32, validation_data=val_dataset)

Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6

KeyboardInterrupt: ignored