In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


 <div class="markdown-google-sans">

## Checking if all images are 256X256 with 3 RGB channels
</div>




In [None]:
import cv2
import os

folder_path = '/content/drive/MyDrive/gan_datasets/biggan/0_real'
images_list = [f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))]

print(len(images_list))
# for file_name in images_list:
#     file_path = os.path.join(folder_path, file_name)

#     if file_name.lower().endswith(('.png', '.jpg', '.jpeg')):

#         image = cv2.imread(file_path)
#         height, width, channels = image.shape
#         if(height!=256 or width!=256 or channels!=3):
#            print("Dimensions/channels mismatch for ",file_name)
#     else:
#         print(file_name," is not an image.")



2000


 <div class="markdown-google-sans">

## <h1>**Level 1 of the Multilevel hierarchichal architecture**</h1>
</div>





 <div class="markdown-google-sans">

## Creating separate training and validation directories with real(imagenet) and fake(dalle) subfolders under each
</div>





    lvl1_datasets
    ├── training
    │   ├── 0_real
    │   └── 1_fake
    └── validation
        ├── 0_real
        └── 1_fake


In [2]:
import os
from sklearn.model_selection import train_test_split
import shutil


real_dir="/content/drive/MyDrive/diffusion_datasets/laion/0_real"
fake_dir="/content/drive/MyDrive/gan_datasets/biggan/1_fake"

os.makedirs("/content/drive/MyDrive/lvl1_biggan_laion_datasets", exist_ok=True)


training_dir = '/content/drive/MyDrive/lvl1_biggan_laion_datasets/training_dataset'
validation_dir = '/content/drive/MyDrive/lvl1_biggan_laion_datasets/validation_dataset'
os.makedirs(training_dir, exist_ok=True)
os.makedirs(validation_dir, exist_ok=True)


real_images = [os.path.join(real_dir, filename) for filename in os.listdir(real_dir)][:1000]
fake_images = [os.path.join(fake_dir, filename) for filename in os.listdir(fake_dir)][:1000]


all_images = real_images + fake_images
train_images, val_images = train_test_split(all_images, test_size=0.2, random_state=42)


for image_path in train_images:
    class_name = os.path.basename(os.path.dirname(image_path))
    destination_dir = os.path.join(training_dir, class_name)
    os.makedirs(destination_dir, exist_ok=True)
    shutil.copy(image_path, destination_dir)

for image_path in val_images:
    class_name = os.path.basename(os.path.dirname(image_path))
    destination_dir = os.path.join(validation_dir, class_name)
    os.makedirs(destination_dir, exist_ok=True)
    shutil.copy(image_path, destination_dir)

 <div class="markdown-google-sans">

## Creating real and fake subfolders for testing
</div>




    testing_datasets
      ├── 0_real
      └── 1_fake
     

In [3]:
import os
from sklearn.model_selection import train_test_split
import shutil


real_dir="/content/drive/MyDrive/gan_datasets/crn_subset/0_real"
fake_dir="/content/drive/MyDrive/gan_datasets/crn_subset/1_fake"
test_dir = '/content/drive/MyDrive/testing_lvl1_crn'
os.makedirs(test_dir, exist_ok=True)

real_images = [os.path.join(real_dir, filename) for filename in os.listdir(real_dir)]
fake_images = [os.path.join(fake_dir, filename) for filename in os.listdir(fake_dir)]


for image_path in real_images:
    class_name = os.path.basename(os.path.dirname(image_path))
    destination_dir = os.path.join(test_dir, class_name)
    os.makedirs(destination_dir, exist_ok=True)
    shutil.copy(image_path, destination_dir)

for image_path in fake_images:
    class_name = os.path.basename(os.path.dirname(image_path))
    destination_dir = os.path.join(test_dir, class_name)
    os.makedirs(destination_dir, exist_ok=True)
    shutil.copy(image_path, destination_dir)


 <div class="markdown-google-sans">

## Loading both train and validation data

</div>

In [2]:
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping
import cv2
import random

train_dir = '/content/drive/MyDrive/lvl1_biggan_laion_datasets/training_dataset'
validation_dir = '/content/drive/MyDrive/lvl1_biggan_laion_datasets/validation_dataset'

# def apply_gaussian_blur(image):
#     image = cv2.GaussianBlur(image, (5, 5), 0)
#     return image

train_datagen = ImageDataGenerator(
    rescale=1./255,
    # preprocessing_function=apply_gaussian_blur
    )
validation_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(256, 256),
    batch_size=30,
    class_mode='binary'
)

validation_generator = validation_datagen.flow_from_directory(
    validation_dir,
    target_size=(256, 256),
    batch_size=30,
    class_mode='binary'
)




Found 1600 images belonging to 2 classes.
Found 400 images belonging to 2 classes.


<h2><strong> Overall structure</strong></h2>

1. **Datasets**:
Training dataset: Laion and Biggan generated 800 images each <br>
Validation dataset: Laion and Biggan generated 200 images each

2. **Data Preprocessing**:
Rescaled pixel values to [0, 1]<br>
Used ImageDataGenerator to create image generators which one hot <br>encodes the traiing labels with batch size 30 and binary class mode

3. **Base Model**:
ResNet50 pre-trained on ImageNet<br>
Top layers excluded<br>
Input shape set to (256, 256, 3)

4. **Model Architecture**:
Sequential model with:<br>
Base ResNet50 model<br>
GlobalAveragePooling2D layer<br>
Dense layer with 1 neuron and sigmoid activation

5. **Model Compilation**:
Optimizer: SGD with learning rate 0.001 and momentum 0.9<br>
Loss function: binary_crossentropy<br>
Metrics: accuracy

6. **Training Specifications**:
Initial epochs: 30<br>
Batch size: 30<br>
Fine-tuning : 5 epochs<br>
Unfreezed layers: 130-175

7. **Hyperparameters**:
Learning rate: 0.001<br>
Momentum: 0.9<br>
Batch size: 30<br>
Total epochs : 35


 <div class="markdown-google-sans">

## Building model using pretrained weights from ResNet50 avaliable and fitting it initially without fine tuning it
</div>

In [3]:
base_model_1 = ResNet50(weights='imagenet', include_top=False, input_shape=(256, 256, 3))

model = Sequential([
    base_model_1,
    GlobalAveragePooling2D(),
    Dense(1, activation='sigmoid')
])

for layer in base_model_1.layers:
    layer.trainable = False

model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=0.001, momentum=0.9),
              loss='binary_crossentropy',
              metrics=['accuracy'])

initial_epochs = 30
fine_tune_epochs = 5
total_epochs = initial_epochs + fine_tune_epochs


history = model.fit(train_generator, epochs=initial_epochs,
                    validation_data=validation_generator)



Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30



 <div class="markdown-google-sans">

## Fine-tuning the model for 5 epochs
</div>

In [4]:
fine_tune_layers=130

for layer in model.layers[fine_tune_layers:]:
    layer.trainable = True

model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=0.001, momentum=0.9),
              loss='binary_crossentropy',
              metrics=['accuracy'])

history_fine = model.fit(train_generator, epochs=total_epochs,
                         initial_epoch=initial_epochs,
                         validation_data=validation_generator)


Epoch 31/35
Epoch 32/35
Epoch 33/35
Epoch 34/35
Epoch 35/35



 <div class="markdown-google-sans">

## Saving the model
</div>

In [4]:
import os
os.makedirs('/content/drive/MyDrive/saved_models_deepfake',exist_ok=True)
model.save('/content/drive/MyDrive/saved_models_deepfake/lvl1_laion_biggan.h5')

  saving_api.save_model(



 <div class="markdown-google-sans">

## Testing the model for level 1 classification(real vs fake)
</div>

In [5]:
import tensorflow as tf

loaded_model = tf.keras.models.load_model('/content/drive/MyDrive/saved_models_deepfake/lvl1_laion_biggan.h5')

test_dir = '/content/drive/MyDrive/testing_lvl1_crn'
test_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(256, 256),
    batch_size=30,
    class_mode='binary'
)

_, test_accuracy = loaded_model.evaluate(test_generator)

print(f'Test accuracy: {test_accuracy}')

Found 1996 images belonging to 2 classes.
Test accuracy: 0.5420841574668884


 <div class="markdown-google-sans">

## <h1>**Level 2 of the Multilevel hierarchichal architecture**</h1>
</div>






 <div class="markdown-google-sans">

## Making directories for level 2 datasets similiarly
</div>

In [10]:
import os
from sklearn.model_selection import train_test_split
import shutil


dms_dir="/content/drive/MyDrive/diffusion_datasets/dalle/0_dm"
gans_dir="/content/drive/MyDrive/gan_datasets/biggan/1_gan"

os.makedirs("/content/drive/MyDrive/lvl2_biggan_dalle_datasets",exist_ok=True)

training_dir = '/content/drive/MyDrive/lvl2_biggan_dalle_datasets/training_dataset'
validation_dir = '/content/drive/MyDrive/lvl2_biggan_dalle_datasets/validation_dataset'
os.makedirs(training_dir, exist_ok=True)
os.makedirs(validation_dir, exist_ok=True)


dms_images = [os.path.join(dms_dir, filename) for filename in os.listdir(dms_dir)][:1000]
gans_images = [os.path.join(gans_dir, filename) for filename in os.listdir(gans_dir)][:1000]

all_images = dms_images + gans_images
train_images, val_images = train_test_split(all_images, test_size=0.2, random_state=42)

for image_path in train_images:
    class_name = os.path.basename(os.path.dirname(image_path))
    destination_dir = os.path.join(training_dir, class_name)
    os.makedirs(destination_dir, exist_ok=True)
    shutil.copy(image_path, destination_dir)

for image_path in val_images:
    class_name = os.path.basename(os.path.dirname(image_path))
    destination_dir = os.path.join(validation_dir, class_name)
    os.makedirs(destination_dir, exist_ok=True)
    shutil.copy(image_path, destination_dir)


 <div class="markdown-google-sans">

## ResNet implementation with linear layer at end for level 2 datasets similiarly
</div>

In [8]:
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping
import cv2

train_dir = '/content/drive/MyDrive/lvl2_biggan_dalle_datasets/training_dataset'
validation_dir = '/content/drive/MyDrive/lvl2_biggan_dalle_datasets/validation_dataset'

# def apply_gaussian_blur(image):
#     image = cv2.GaussianBlur(image, (5, 5), 0)
#     return image

train_datagen = ImageDataGenerator(rescale=1./255)
validation_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(256, 256),
    batch_size=30,
    class_mode='binary'
)

validation_generator = validation_datagen.flow_from_directory(
    validation_dir,
    target_size=(256, 256),
    batch_size=30,
    class_mode='binary'
)

base_model_2 = ResNet50(weights='imagenet', include_top=False, input_shape=(256, 256, 3))

model = Sequential([
    base_model_2,
    GlobalAveragePooling2D(),
    Dense(1, activation='sigmoid')
])

for layer in base_model_2.layers:
    layer.trainable = False

model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=0.001, momentum=0.9),
              loss='binary_crossentropy',
              metrics=['accuracy'])

initial_epochs = 10
fine_tune_epochs = 15
total_epochs = initial_epochs + fine_tune_epochs


history = model.fit(train_generator, epochs=initial_epochs,
                    validation_data=validation_generator)



fine_tune_layers=140

for layer in model.layers[fine_tune_layers:]:
    layer.trainable = True

model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=0.001, momentum=0.9),
              loss='binary_crossentropy',
              metrics=['accuracy'])

history_fine = model.fit(train_generator, epochs=total_epochs,
                         initial_epoch=initial_epochs,
                         validation_data=validation_generator)

model.save('/content/drive/MyDrive/saved_models_deepfake/lvl2_biggan_dalle.h5')

Found 1600 images belonging to 2 classes.
Found 400 images belonging to 2 classes.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


  saving_api.save_model(



 <div class="markdown-google-sans">

## Creating testing directory with similiar structure

</div>

In [None]:
import os
from sklearn.model_selection import train_test_split
import shutil


gan_dir="/content/drive/MyDrive/gan_datasets/crn_subset/1_gan"
dm_dir="/content/drive/MyDrive/diffusion_datasets/ldm_100/0_dm"
test_dir = '/content/drive/MyDrive/testing_dir_lvl2_crn_ldm_100'
os.makedirs(test_dir, exist_ok=True)

gan_images = [os.path.join(gan_dir, filename) for filename in os.listdir(gan_dir)][:750]
dm_images = [os.path.join(dm_dir, filename) for filename in os.listdir(dm_dir)][:750]


for image_path in gan_images:
    class_name = os.path.basename(os.path.dirname(image_path))
    destination_dir = os.path.join(test_dir, class_name)
    os.makedirs(destination_dir, exist_ok=True)
    shutil.copy(image_path, destination_dir)

for image_path in dm_images:
    class_name = os.path.basename(os.path.dirname(image_path))
    destination_dir = os.path.join(test_dir, class_name)
    os.makedirs(destination_dir, exist_ok=True)
    shutil.copy(image_path, destination_dir)


 <div class="markdown-google-sans">

## Testing the model for level 2 classification(GANs vs DMs)
</div>

In [None]:
test_dir = '/content/drive/MyDrive/testing_dir_lvl2_crn_ldm_100'
test_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(256, 256),
    batch_size=30,
    class_mode='binary'
)

_, test_accuracy = model.evaluate(test_generator)

print(f'Test accuracy: {test_accuracy}')

Found 1500 images belonging to 2 classes.
Test accuracy: 0.6340000033378601


 <div class="markdown-google-sans">

## <h1>**Level 3 of the Multilevel hierarchichal architecture**</h1>
</div>






 <div class="markdown-google-sans">

## Making directories for Level 3 of Architecture
</div>

In [2]:
import os
from sklearn.model_selection import train_test_split
import shutil


dalle_dir="/content/drive/MyDrive/diffusion_datasets/dalle/0_dalle"
glide_dir="/content/drive/MyDrive/diffusion_datasets/glide_100_27/1_glide"
guided_dir="/content/drive/MyDrive/diffusion_datasets/guided/2_guided"
ldm_dir="/content/drive/MyDrive/diffusion_datasets/ldm_200/3_ldm"

os.makedirs("/content/drive/MyDrive/lvl3_dm_datasets", exist_ok=True)


training_dir = '/content/drive/MyDrive/lvl3_dm_datasets/training_dataset'
validation_dir = '/content/drive/MyDrive/lvl3_dm_datasets/validation_dataset'
os.makedirs(training_dir, exist_ok=True)
os.makedirs(validation_dir, exist_ok=True)


dalle_images = [os.path.join(dalle_dir, filename) for filename in os.listdir(dalle_dir)][:500]
glide_images = [os.path.join(glide_dir, filename) for filename in os.listdir(glide_dir)][:500]
guided_images = [os.path.join(guided_dir, filename) for filename in os.listdir(guided_dir)][:500]
ldm_images = [os.path.join(ldm_dir, filename) for filename in os.listdir(ldm_dir)][:500]

all_images = dalle_images + glide_images + guided_images + ldm_images
train_images, val_images = train_test_split(all_images, test_size=0.2, random_state=42)


for image_path in train_images:
    class_name = os.path.basename(os.path.dirname(image_path))
    destination_dir = os.path.join(training_dir, class_name)
    os.makedirs(destination_dir, exist_ok=True)
    shutil.copy(image_path, destination_dir)

for image_path in val_images:
    class_name = os.path.basename(os.path.dirname(image_path))
    destination_dir = os.path.join(validation_dir, class_name)
    os.makedirs(destination_dir, exist_ok=True)
    shutil.copy(image_path, destination_dir)


 <div class="markdown-google-sans">

## Implementating multiclass classification for all the diffusion models using ResNet50's pretrained weights and fine tuning them
</div>

In [2]:
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping
import cv2

train_dir = '/content/drive/MyDrive/lvl3_dm_datasets/training_dataset'
validation_dir = '/content/drive/MyDrive/lvl3_dm_datasets/validation_dataset'

# def apply_gaussian_blur(image):
#     image = cv2.GaussianBlur(image, (5, 5), 0)
#     return image

train_datagen = ImageDataGenerator(rescale=1./255,
                                  #  preprocessing_function=apply_gaussian_blur
                                   )
validation_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(256, 256),
    batch_size=30,
    class_mode='categorical'
)

validation_generator = validation_datagen.flow_from_directory(
    validation_dir,
    target_size=(256, 256),
    batch_size=30,
    class_mode='categorical'
)

base_model_3 = ResNet50(weights='imagenet', include_top=False, input_shape=(256, 256, 3))

model = Sequential([
    base_model_3,
    GlobalAveragePooling2D(),
    # Dense(64,activation='relu'),
    Dense(4, activation='softmax')
])

for layer in base_model_3.layers:
    layer.trainable = False

lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=0.001,
    decay_steps=1000,
    decay_rate=0.9
)

model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=lr_schedule, momentum=0.9),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

initial_epochs = 30
fine_tune_epochs = 5
total_epochs = initial_epochs + fine_tune_epochs


history = model.fit(train_generator, epochs=initial_epochs,
                    validation_data=validation_generator)



fine_tune_layers=140

for layer in model.layers[fine_tune_layers:]:
    layer.trainable = True

model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=lr_schedule, momentum=0.9),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

history_fine = model.fit(train_generator, epochs=total_epochs,
                         initial_epoch=initial_epochs,
                         validation_data=validation_generator)

model.save('/content/drive/MyDrive/saved_models_deepfake/lvl3_dalle.h5')

Found 1600 images belonging to 4 classes.
Found 400 images belonging to 4 classes.
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Epoch 31/35
Epoch 32/35
Epoch 33/35
Epoch 34/35
Epoch 35/35


  saving_api.save_model(



 <div class="markdown-google-sans">

## Creating Testing directories for Level 3 dms classification
</div>

In [2]:
import os
from sklearn.model_selection import train_test_split
import shutil


dalle_dir="/content/drive/MyDrive/diffusion_datasets/dalle/0_dalle"
glide_dir="/content/drive/MyDrive/diffusion_datasets/glide_100_27/1_glide"
guided_dir="/content/drive/MyDrive/diffusion_datasets/guided/2_guided"
ldm_dir="/content/drive/MyDrive/diffusion_datasets/ldm_200/3_ldm"

os.makedirs("/content/drive/MyDrive/testing_dataset_lvl3_dms", exist_ok=True)


testing_dir = '/content/drive/MyDrive/testing_dataset_lvl3_dms'


dalle_images = [os.path.join(dalle_dir, filename) for filename in os.listdir(dalle_dir)][500:1000]
glide_images = [os.path.join(glide_dir, filename) for filename in os.listdir(glide_dir)][500:1000]
guided_images = [os.path.join(guided_dir, filename) for filename in os.listdir(guided_dir)][500:1000]
ldm_images = [os.path.join(ldm_dir, filename) for filename in os.listdir(ldm_dir)][500:1000]

all_images = dalle_images + glide_images + guided_images + ldm_images

for image_path in all_images:
    class_name = os.path.basename(os.path.dirname(image_path))
    destination_dir = os.path.join(testing_dir, class_name)
    os.makedirs(destination_dir, exist_ok=True)
    shutil.copy(image_path, destination_dir)



 <div class="markdown-google-sans">

## Testing the model for level 3 DMs classification
</div>

In [3]:
import tensorflow as tf

loaded_model = tf.keras.models.load_model('/content/drive/MyDrive/saved_models_deepfake/lvl3_dalle.h5')

test_dir = '/content/drive/MyDrive/testing_dataset_lvl3_dms'
test_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(256, 256),
    batch_size=30,
    class_mode='categorical'
)

_, test_accuracy = loaded_model.evaluate(test_generator)

print(f'Test accuracy: {test_accuracy}')

Found 2000 images belonging to 4 classes.
Test accuracy: 0.484499990940094
