*This is a Google Collab notebook*

# **Downloading** data

In [1]:
import gdown
file_url = "https://drive.google.com/uc?id=1LPu1TAYQqCTyugZcmuxU6JH8LI5gezw6"
output_path = 'downloaded_file.zip'
gdown.download(file_url, output_path, quiet=False)

Downloading...
From: https://drive.google.com/uc?id=1LPu1TAYQqCTyugZcmuxU6JH8LI5gezw6
To: /content/downloaded_file.zip
100%|██████████| 20.1G/20.1G [03:57<00:00, 84.6MB/s]


'downloaded_file.zip'

In [2]:
file_url = "https://drive.google.com/uc?id=1MWo9i1TCYMYxd3DX7RK2ttVnmwuJPDET"
output_path = 'dms.zip'
gdown.download(file_url, output_path, quiet=False)

Downloading...
From: https://drive.google.com/uc?id=1MWo9i1TCYMYxd3DX7RK2ttVnmwuJPDET
To: /content/dms.zip
100%|██████████| 918M/918M [00:10<00:00, 84.9MB/s]


'dms.zip'

# **Unzipping** the downloaded data

In [3]:
import zipfile
zip_file_path = '/content/dms.zip'
extracted_dir = '/content/diffusion'
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(extracted_dir)


In [4]:
import zipfile
zip_file_path = '/content/downloaded_file.zip'
extracted_dir = '/content/gan'
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(extracted_dir)

In [5]:
import tensorflow as tf
print("TensorFlow version:", tf.__version__)
print("GPU Available:", tf.test.is_gpu_available())

Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.


TensorFlow version: 2.15.0
GPU Available: True


# Importing libraries

In [6]:
import matplotlib.pyplot as plt
import numpy as np
import os
import tensorflow as tf
from PIL import Image
import cv2
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping

# **Level-1** of the architecture

Gathering data

In [8]:
fake_images=[]
path="/content/diffusion/diffusion_datasets/dalle/1_fake"
l=os.listdir(path)
for i in l:
    img=np.array(Image.open(os.path.join(path,i)))
    fake_images.append(img)

In [9]:
real_images=[]
path="/content/diffusion/diffusion_datasets/laion/0_real"
l=os.listdir(path)
for i in l:
    img=np.array(Image.open(os.path.join(path,i)))
    if(np.shape(img)==(256,256,3)):
        real_images.append(img)

In [10]:
data=[]
labels=[]  # 0-Real 1-Fake
data.extend(real_images)
data.extend(fake_images[:988])
l1=[0]*988
l2=[1]*988
labels.extend(l1)
labels.extend(l2)

In [11]:
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(data, labels, test_size=0.2, random_state=42)

# Inserting data into training and validation folders

In [12]:
# Function to add data in folder according to its class
def copy_images_to_directory(images, labels, dest):
    i=0
    for image, label in zip(images, labels):
        i=i+1
        class_directory = os.path.join(dest, str(label))
        os.makedirs(class_directory,exist_ok=True)
        os.chdir(class_directory)
        output_file=str(i)+".jpg"
        cv2.imwrite(output_file,image)

In [13]:
train_directory = "/content/train_l1"
val_directory =  "/content/validation_l1"
os.makedirs(train_directory)
os.makedirs(val_directory)

copy_images_to_directory(X_train, y_train, train_directory)
copy_images_to_directory(X_val, y_val, val_directory)


# **Creating Structure of the model**

**1) Preprocessing of Data:**

- Gaussian Blur.
- Rescaling, rotation, flipping.
- Preparing Training and Validation Generator.

**2) Building Model:**

- Importing weights from ResNet50 model (Pre-Trained Model).
- **Fine Tuning:**
  - Freezing first 150 layers (Neural network won't train back first 150 layers).
  - Unfreezing all other layers (Weights get updated for all layers after the 150th layer).
- Used Early Stopping to stop the training process when no significant improvement in loss is observed and retaining the best weights in each step.
- Loss function: Binary Cross Entropy.

**3) Model Structure:**

- `x = base_model(inputs, training=False)`: Base_model is a pre-trained convolutional neural network (CNN) ResNet-50, used as a feature extractor.
- `x = GlobalAveragePooling2D()(x)`: After extracting features from the base model, a global average pooling layer is applied. Global Average Pooling 2D computes the average value of each feature map across the entire spatial dimensions. This reduces the spatial dimensions to 1x1, effectively summarizing the information in each feature map.
- `x = Dense(1024, activation='relu')(x)`: This is a fully connected (dense) layer with 1024 units and ReLU (Rectified Linear Unit) activation function. The output of the global average pooling is connected to this dense layer, introducing non-linearity and allowing the network to learn complex patterns.
- `x = tf.keras.layers.Dropout(0.2)(x)`: Dropout is a regularization technique that helps prevent overfitting. It randomly sets a fraction of input units to zero at each update during training, which helps prevent the network from relying too much on any specific set of neurons. In this case, 20% of the units are dropped out (set to zero).
- `outputs = Dense(1, activation='sigmoid')(x)`: The final layer is a dense layer with a single unit and a sigmoid activation function. The output is a probability between 0 and 1.
  - 0: Real Image
  - 1: Fake Image


**Hyperparameters:**

- Learning Rate: 0.00001
- Epochs: 40
- Momentum: 0.9
- Dropout Layers: 0.2
- Optimizer: SGD (Stochastic Gradient Descent)


In [46]:
def model_l1(train_dir,validation_dir):
    def apply_gaussian_blur(image):
        sigma = 1.0
        return cv2.GaussianBlur(image, (0, 0), sigma)
    train_datagen = ImageDataGenerator(
        rescale=1./255,
        rotation_range=0.2,
        horizontal_flip=True,
        shear_range=0.2,
        fill_mode='nearest',
        preprocessing_function=apply_gaussian_blur
    )

    validation_datagen = ImageDataGenerator(rescale=1./255)

    train_generator = train_datagen.flow_from_directory(
        train_dir,
        target_size=(256, 256),
        batch_size=30,
        class_mode='binary'
    )

    validation_generator = validation_datagen.flow_from_directory(
        validation_dir,
        target_size=(256, 256),
        batch_size=30,
        class_mode='binary'
    )


    inputs = tf.keras.Input(shape=(256, 256, 3))
    base_model = ResNet50(weights='imagenet', include_top=False, input_tensor=inputs)

    print("Number of layers in the base model: ", len(base_model.layers))

    fine_tune_at = 150

    for layer in base_model.layers[:fine_tune_at]:
        layer.trainable = False


    for layer in base_model.layers[fine_tune_at:]:
        layer.trainable = True


    x = base_model(inputs, training=False)
    x = GlobalAveragePooling2D()(x)
    x = Dense(1024, activation='relu')(x)
    x = tf.keras.layers.Dropout(0.2)(x)
    outputs = Dense(1, activation='sigmoid')(x)

    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    early_stopping_callback = EarlyStopping(monitor = 'val_loss', patience = 15, mode = 'min', restore_best_weights = True)
    model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=0.00001, momentum=0.9),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])

    history=model.fit(train_generator, epochs=40,
              validation_data=validation_generator,
              callbacks=[early_stopping_callback])
    return model,history

# **Training Phase**

In [48]:
t="/content/train_l1"
v="/content/validation_l1"
mod_l1,hist_l1=model_l1(t,v)

Found 1580 images belonging to 2 classes.
Found 396 images belonging to 2 classes.
Number of layers in the base model:  175
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


Saving the model

In [60]:
mod_l1.save("/content/Level1.h5")

  saving_api.save_model(


# Testing level-1

In [40]:
test_images=[]
test_label=[]
path="/content/diffusion/diffusion_datasets/glide_100_10/1_fake"
l=os.listdir(path)
for i in l:
    img=np.array(Image.open(os.path.join(path,i)))
    if len(np.shape(img))==3:
        test_images.append(img)
        test_label.append(1)

In [41]:
path="/content/diffusion/diffusion_datasets/glide_100_27/1_fake"
l=os.listdir(path)
for i in l:
    img=np.array(Image.open(os.path.join(path,i)))
    if len(np.shape(img))==3:
        test_images.append(img)
        test_label.append(1)

In [42]:
path="/content/diffusion/diffusion_datasets/glide_50_27/1_fake"
l=os.listdir(path)
for i in l:
    img=np.array(Image.open(os.path.join(path,i)))
    if len(np.shape(img))==3:
        test_images.append(img)
        test_label.append(1)

In [43]:
test_directory = "/content/test3"
os.makedirs(test_directory,exist_ok=True)
copy_images_to_directory(test_images,test_label, test_directory)

In [58]:
test_dir = "/content/test3"
test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(256, 256),
    batch_size=30,
    class_mode='binary',
    shuffle=False
)

Found 3000 images belonging to 1 classes.


In [59]:
results = mod_l1.evaluate(test_generator)
print("Test Loss:", results[0])
print("Test Accuracy:", results[1])

Test Loss: 0.608487606048584
Test Accuracy: 0.8679999709129333


# **Level-2** of the architecture

Preparing data

In [61]:
gan_images=[]
# path="/content/gan/biggan/1_fake"
path="/content/gan/whichfaceisreal/1_fake"
l=os.listdir(path)
for i in l:
    img=np.array(Image.open(os.path.join(path,i)))
    if len(np.shape(img))==3:
      new_size = (224, 224)
      img = cv2.resize(img, new_size)
      gan_images.append(img)

In [62]:
np.shape(gan_images)

(1000, 224, 224, 3)

In [63]:
dm_images=[]
# path="/content/diffusion/diffusion_datasets/dalle/1_fake"
path="/content/diffusion/diffusion_datasets/glide_100_10/1_fake"
l=os.listdir(path)
for i in l:
    img=np.array(Image.open(os.path.join(path,i)))
    if len(np.shape(img))==3:
      dm_images.append(img)

In [64]:
np.shape(dm_images)

(1000, 256, 256, 3)

In [65]:
data=[]
labels=[]  # 0-Dm 1-Gan
data.extend(dm_images)
data.extend(gan_images)
l1=[0]*len(dm_images)
l2=[1]*len(gan_images)
labels.extend(l1)
labels.extend(l2)

In [66]:
len(data)

2000

In [67]:
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(data, labels, test_size=0.2, random_state=42)

In [68]:
train_d = "/content/train_l2"
val_d =  "/content/validation_l2"
os.makedirs(train_d)
os.makedirs(val_d)

copy_images_to_directory(X_train, y_train, train_d)
copy_images_to_directory(X_val, y_val, val_d)


# **Creating Structure of Model**

**1) Preprocessing of Data:**

- Gaussian Blur.
- Rescaling, rotation, flipping.
- Preparing Training and Validation Generator.

**2) Building Model:**

- Importing weights from ResNet50 model (Pre-Trained Model).
- **Fine Tuning:**
  - Freezing first 100 layers (Neural network won't train back first 100 layers).
  - Unfreezing all other layers (Weights get updated for all layers after the 100th layer).
- Used Early Stopping to stop the training process when no significant improvement in loss is observed and retaining the best weights in each step.
- Loss function: Binary Cross Entropy.

**3) Model Structure:**

- `x = base_model(inputs, training=False)`: Base_model is a pre-trained convolutional neural network (CNN) ResNet-50, used as a feature extractor.
- `x = GlobalAveragePooling2D()(x)`: After extracting features from the base model, a global average pooling layer is applied. Global Average Pooling 2D computes the average value of each feature map across the entire spatial dimensions. This reduces the spatial dimensions to 1x1, effectively summarizing the information in each feature map.
- `x = Dense(1024, activation='relu')(x)`: This is a fully connected (dense) layer with 1024 units and ReLU (Rectified Linear Unit) activation function. The output of the global average pooling is connected to this dense layer, introducing non-linearity and allowing the network to learn complex patterns.
- `x = tf.keras.layers.Dropout(0.2)(x)`: Dropout is a regularization technique that helps prevent overfitting. It randomly sets a fraction of input units to zero at each update during training, which helps prevent the network from relying too much on any specific set of neurons. In this case, 20% of the units are dropped out (set to zero).
- `outputs = Dense(1, activation='sigmoid')(x)`: The final layer is a dense layer with a single unit and a sigmoid activation function. The output is a probability between 0 and 1.
  - 0: Real Image
  - 1: Fake Image


**Learning Rate Schedule: Exponential Decay**

- **Initial Learning Rate:** \(1 \times 10^{-4}\) (0.0001)
  - The starting learning rate at the beginning of training.

- **Decay Steps:** 10,000
  - After every 10,000 steps, the learning rate will be updated.

- **Decay Rate:** 0.9
  - The rate at which the learning rate will decay, multiplied at each decay step.

By employing this exponentially decaying learning rate schedule, the model takes larger steps initially and gradually decreases the learning rate as training progresses, aiding in faster convergence and fine-tuning

**Hyperparameters:**

- Epochs: 40
- Momentum: 0.9
- Dropout Layers: 0.2
- Optimizer: SGD (Stochastic Gradient Descent)


In [69]:
def model_l2(train_dir,validation_dir,tune,epoc):
    def apply_gaussian_blur(image):
        sigma = 1.0
        return cv2.GaussianBlur(image, (0, 0), sigma)

    train_datagen = ImageDataGenerator(
        rescale=1./255,
        rotation_range=0.2,
        horizontal_flip=True,
        shear_range=0.2,
        fill_mode='nearest',
        preprocessing_function=apply_gaussian_blur
    )

    validation_datagen = ImageDataGenerator(rescale=1./255)

    train_generator = train_datagen.flow_from_directory(
        train_dir,
        target_size=(256, 256),
        batch_size=30,
        class_mode='binary'
    )

    validation_generator = validation_datagen.flow_from_directory(
        validation_dir,
        target_size=(256, 256),
        batch_size=30,
        class_mode='binary'
    )

    inputs = tf.keras.Input(shape=(256, 256, 3))
    base_model = ResNet50(weights='imagenet', include_top=False, input_tensor=inputs)

    print("Number of layers in the base model: ", len(base_model.layers))

    fine_tune_at = tune

    for layer in base_model.layers[:fine_tune_at]:
        layer.trainable = False


    for layer in base_model.layers[fine_tune_at:]:
        layer.trainable = True

    x = base_model(inputs, training=False)
    x = GlobalAveragePooling2D()(x)
    x = Dense(1024, activation='relu')(x)
    x = tf.keras.layers.Dropout(0.2)(x)
    outputs = Dense(1, activation='sigmoid')(x)

    model = tf.keras.Model(inputs=inputs, outputs=outputs)

    lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=1e-4,
    decay_steps=10000,
    decay_rate=0.9
    )
    early_stopping_callback = EarlyStopping(monitor = 'val_loss', patience = 15, mode = 'min', restore_best_weights = True)

    model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=lr_schedule, momentum=0.9),
              loss='binary_crossentropy',
              metrics=['accuracy'])

    history=model.fit(train_generator, epochs=epoc,
              validation_data=validation_generator,
              callbacks = [early_stopping_callback])

    return model,history

# **Training** phase

In [70]:
t="/content/train_l2"
v="/content/validation_l2"
mod_l2,hist_l2=model_l2(t,v,100,40)

Found 1600 images belonging to 2 classes.
Found 400 images belonging to 2 classes.
Number of layers in the base model:  175
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


Saving the model

In [71]:
mod_l2.save("/content/Level2_updated.h5")

  saving_api.save_model(


# Testing Level-2

In [72]:
test_images=[]
test_label=[]
path="/content/diffusion/diffusion_datasets/guided/1_fake"
l=os.listdir(path)
for i in l:
    img=np.array(Image.open(os.path.join(path,i)))
    if len(np.shape(img))==3:
        test_images.append(img)
        test_label.append(0)

In [73]:
path="/content/gan/stargan/1_fake"
l=os.listdir(path)
for i in l:
    img=np.array(Image.open(os.path.join(path,i)))
    if len(np.shape(img))==3:
        test_images.append(img)
        test_label.append(1)

In [74]:
test_directory = "/content/test5"
os.makedirs(test_directory,exist_ok=True)
copy_images_to_directory(test_images,test_label, test_directory)

In [75]:
test_dir = "/content/test5"
test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(256, 256),
    batch_size=30,
    class_mode='binary',
    shuffle=False
)

Found 2999 images belonging to 2 classes.


In [76]:
results = mod_l2.evaluate(test_generator)
print("Test Loss:", results[0])
print("Test Accuracy:", results[1])

Test Loss: 0.2682065963745117
Test Accuracy: 0.9299766421318054
