In this notebook, we will fine tune the transfer learning approach on VGG16. 

So far, our accuracy is capped at 89-90%. We will follow the following steps to re-run the model on the entire dataset.

- We will use __F1 score__ as the metric for evaluation instead of accuracy since the dataset is unbalanced. We will define a custom function for the F1 score
- We will also have a separate test set in addition to the validation set.


In [2]:
# import necessary libraries and functions
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import os
from os import listdir
import random
from PIL import Image

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

from tensorflow.keras.layers import Dense, InputLayer, Flatten, Dropout, BatchNormalization, Conv2D, MaxPool2D, Conv2DTranspose, Input
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.optimizers.legacy import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.metrics import Precision, Recall
from tensorflow.keras.applications import VGG16
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras import regularizers

import warnings
warnings.filterwarnings('ignore')

In [3]:
class0_folder= 'dataset/fakeV2/fake-v2'
class1_folder= 'dataset/real'

print("No of AI generated images: ",len(os.listdir(class0_folder)))
print("No of Real images: ",len(os.listdir(class1_folder)))

No of AI generated images:  17856
No of Real images:  3781


In [4]:
# Define a function to filter out excessively large images
def filter_large_images(image_dir, max_pixels):
    filtered_images = []
    for filename in os.listdir(image_dir):
        filepath = os.path.join(image_dir, filename)
        try:
            with Image.open(filepath) as img:
                if img.size[0] * img.size[1] <= max_pixels:
                    continue
                    #filtered_images.append(filename)
        except Exception as e:
            print(f"Error processing {filename}: {e}")
            filtered_images.append(filename)
    return filtered_images

# Define your image directory and maximum allowable pixels

max_pixels = 178956970  #The model was giving error for pixel sizes above this value; so testing with this as threshold

# Filter out excessively large images from fake dataset
filtered_images_fake = filter_large_images(class0_folder, max_pixels)

# Filter out excessively large images from real dataset
filtered_images_real = filter_large_images(class1_folder, max_pixels)

Error processing .DS_Store: cannot identify image file 'dataset/fakeV2/fake-v2/.DS_Store'
Error processing 12479.jpg: Image size (232748750 pixels) exceeds limit of 178956970 pixels, could be decompression bomb DOS attack.
Error processing r-art.txt: cannot identify image file 'dataset/real/r-art.txt'


In [5]:
# Function to load filenames and labels
def load_filenames_labels(folder, label, large_img, sampled_imgs=None):
    if sampled_imgs is None:
        sampled_imgs= os.listdir(folder)
    filenames = []
    labels = []
    for filename in os.listdir(folder):
        if (filename not in large_img) and (filename in sampled_imgs) :
            filenames.append(os.path.join(folder, filename))
            labels.append(label)
    return filenames, labels

# Load filenames and labels for Class 0
class0_filenames, class0_labels = load_filenames_labels(class0_folder, '0', filtered_images_fake)

# Load filenames and labels for Class 1
class1_filenames, class1_labels = load_filenames_labels(class1_folder, '1', filtered_images_real )

In [6]:
# Concatenate filenames and labels from both classes
all_filenames = class0_filenames + class1_filenames
all_labels = class0_labels + class1_labels
len(all_filenames), len(all_labels)

(21634, 21634)

In [7]:
# Split the data into train and test sets while maintaining class ratio; we will keep the test side aside
full_train_filenames, test_filenames, full_train_labels, test_labels = train_test_split(
    all_filenames, all_labels, test_size=0.1, stratify=all_labels, random_state=42)

train_filenames, validation_filenames, train_labels, validation_labels = train_test_split(
    full_train_filenames, full_train_labels, test_size=0.2, stratify=full_train_labels, random_state=42)

len(train_filenames), len(train_labels)

(15576, 15576)

In [8]:
# Load the pre-trained VGG16 model with ImageNet weights
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Create ImageDataGenerator instances for train and validation sets along with the preprocessing for VGG16
train_datagen = ImageDataGenerator(rescale=1./255, preprocessing_function=preprocess_input)
validation_datagen = ImageDataGenerator(rescale=1./255, preprocessing_function=preprocess_input)

2024-04-07 23:30:50.420003: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M2 Pro
2024-04-07 23:30:50.420060: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2024-04-07 23:30:50.420067: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.33 GB
2024-04-07 23:30:50.420137: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-04-07 23:30:50.420168: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [9]:
#create the generators for train and validation data
target_size= (224,224)
batch_size= 64

# Create the generator for training data
train_generator = train_datagen.flow_from_dataframe(
    dataframe=pd.DataFrame({'filename': train_filenames, 'class': train_labels}),
    x_col='filename',
    y_col='class',
    target_size=target_size,
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=False,
    seed=42
)

# Create the generator for validation data
validation_generator = validation_datagen.flow_from_dataframe(
    dataframe=pd.DataFrame({'filename': validation_filenames, 'class': validation_labels}),
    x_col='filename',
    y_col='class',
    target_size=target_size,
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=False,
    seed=42
)

Found 15576 validated image filenames belonging to 2 classes.
Found 3894 validated image filenames belonging to 2 classes.


In [10]:
# creating model with pre trained imagenet weights
base_model = VGG16(weights='imagenet')
# creating our own model
name= 'VGG16_partially_trained_full_data'
x1 = Dense(256, activation='relu', name='fc1', kernel_regularizer=regularizers.l2(0.01))(base_model.layers[-4].output)
x2= Dropout(0.5,name= 'dropout')(x1)
y = Dense(2, activation='softmax', name='prediction')(x2)
model = Model(base_model.input, y)
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0     

We will define a custom tf function for precision and recall and use this as the criteria/metric while compiling our model.

In [11]:
# Create Precision and Recall metrics
precision_metric = Precision()
recall_metric = Recall()

# Define F1-score metric function
@tf.function
def f1_score(y_true, y_pred):
    precision = precision_metric
    recall = recall_metric
    precision.update_state(y_true, y_pred)
    recall.update_state(y_true, y_pred)
    precision_result = precision.result()
    recall_result = recall.result()
    return 2 * ((precision_result * recall_result) / (precision_result + recall_result + 1e-10))
    
# to set the first 15 layers to non-trainable (weights will not be updated)
for layer in model.layers[:15]:
    layer.trainable = False 
# Compile the model
model.compile(optimizer=Adam(learning_rate=1e-4), loss='categorical_crossentropy', metrics=[f1_score])
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0     

In [12]:
filepath= os.path.join("models",f"{name}.hdf5")
checkpoint= ModelCheckpoint(filepath=filepath, monitor="f1_score",verbose=1, save_best_only=True, mode='max')

In [11]:
# Fit the model to the training data and validate on the validation data
history = model.fit(train_generator,
                    steps_per_epoch=len(train_generator),
                    epochs=5,
                    validation_data=validation_generator,
                    validation_steps=len(validation_generator),
                    callbacks=[checkpoint])

Epoch 1/5


2024-04-07 22:30:54.064730: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


Epoch 1: f1_score improved from -inf to 0.84540, saving model to models/VGG16_partially_trained_full_data.hdf5
Epoch 2/5
Epoch 2: f1_score improved from 0.84540 to 0.90098, saving model to models/VGG16_partially_trained_full_data.hdf5
Epoch 3/5
Epoch 3: f1_score improved from 0.90098 to 0.91877, saving model to models/VGG16_partially_trained_full_data.hdf5
Epoch 4/5
Epoch 4: f1_score improved from 0.91877 to 0.92977, saving model to models/VGG16_partially_trained_full_data.hdf5
Epoch 5/5
Epoch 5: f1_score improved from 0.92977 to 0.93868, saving model to models/VGG16_partially_trained_full_data.hdf5


In [13]:
model.load_weights(filepath=filepath)
predictions= model.predict(validation_generator)

predicted_labels =  np.argmax(predictions, axis=1).astype('str')
true_labels= validation_labels

# Print the classification report
print(classification_report(true_labels, predicted_labels))

2024-04-07 23:31:17.189855: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


              precision    recall  f1-score   support

           0       0.96      0.95      0.95      3214
           1       0.78      0.79      0.78       680

    accuracy                           0.92      3894
   macro avg       0.87      0.87      0.87      3894
weighted avg       0.92      0.92      0.92      3894



### Testing on test data

In [14]:
test_datagen = ImageDataGenerator(rescale=1./255, preprocessing_function=preprocess_input)
# Create the generator for training data
test_generator = train_datagen.flow_from_dataframe(
    dataframe=pd.DataFrame({'filename': test_filenames, 'class': test_labels}),
    x_col='filename',
    y_col='class',
    target_size=target_size,
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=False,
    seed=42
)

Found 2164 validated image filenames belonging to 2 classes.


In [15]:
predictions= model.predict(test_generator)

predicted_labels =  np.argmax(predictions, axis=1).astype('str')
true_labels= test_labels

# Print the classification report
print(classification_report(true_labels, predicted_labels))

              precision    recall  f1-score   support

           0       0.95      0.96      0.95      1786
           1       0.79      0.77      0.78       378

    accuracy                           0.92      2164
   macro avg       0.87      0.86      0.87      2164
weighted avg       0.92      0.92      0.92      2164



We have a macro-averaged f1-score of __87%__ and accuracy of __92%__. There seem to still be certain challenges to identify the real images. Since stable diffusion creates images from a noise signal, the noisy real art images maybe creating issues for the model in identifying them. An alternative approach to further improve the model would be to use a de-noising autoencoder in the pipeline prior to sending to VGG16 for classification.