<a href="https://colab.research.google.com/github/sanjaymahajan21/ImageSplicingDetection/blob/main/MTech_Final_Project_VGG16.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
#using pretrained network

import tensorflow as tf
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Input, Dense, Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.optimizers import SGD
import json


#prepare dataset-1 "Real and Fake Face Detection" "https://www.kaggle.com/datasets/ciplab/real-and-fake-face-detection?resource=download"
train_path = '/content/drive/MyDrive/KaggleRealandFakeFaceDetection/train'
valid_path = '/content/drive/MyDrive/KaggleRealandFakeFaceDetection/valid'
test_path = '/content/drive/MyDrive/KaggleRealandFakeFaceDetection/test'


train_batches = ImageDataGenerator(preprocessing_function=tf.keras.applications.resnet50.preprocess_input).flow_from_directory(train_path, target_size=(224,224), batch_size=10)
valid_batches = ImageDataGenerator(preprocessing_function=tf.keras.applications.resnet50.preprocess_input).flow_from_directory(valid_path, target_size=(224,224), batch_size=10)
test_batches = ImageDataGenerator(preprocessing_function=tf.keras.applications.resnet50.preprocess_input).flow_from_directory(test_path, target_size=(224,224), batch_size=10)


Found 1633 images belonging to 2 classes.
Found 306 images belonging to 2 classes.
Found 102 images belonging to 2 classes.


In [None]:
#Training Module using VGG16 pretrained architecture


input_tensor = tf.keras.layers.Input(shape=(224,224,3))

base_model = VGG16(input_tensor=input_tensor, weights='imagenet', include_top=False)

for layer in base_model.layers:
    layer.trainable = False

x = base_model.output
x = Flatten()(x)

x = Dense(1024, activation='relu')(x)
x = Dense(1024, activation='relu')(x)
x = Dense(512, activation='relu')(x)
predictions = Dense(1, activation='sigmoid')(x)

model1 = Model(inputs=base_model.input, outputs=predictions)

opt = SGD(lr=1e-4, momentum=0.9)
#opt: This is a variable name representing the optimizer object.
#SGD: It stands for Stochastic Gradient Descent, which is the optimization algorithm being used.
#lr=1e-4: It specifies the learning rate for the optimizer. The learning rate determines the step size at which the optimizer adjusts the model's parameters during training. In this case, the learning rate is set to 10^(-4), which is a small value indicating cautious and slower updates.
#momentum=0.9: Momentum is a hyperparameter that affects the convergence speed and behavior of the optimizer. It helps accelerate gradient descent in the relevant direction and dampens oscillations. A momentum value of 0.9 means that the optimizer takes into account 90% of the previously accumulated gradients to influence the current update.


callback_list=[EarlyStopping(monitor="val_loss",patience=100),ModelCheckpoint(filepath="/content/drive/MyDrive/VGG16TEST-OUTPUT.h5",monitor="val_loss",save_best_only=True,verbose=1)]


model1.compile(loss="binary_crossentropy", optimizer=opt,metrics=["accuracy"])

model1.summary()

history = model1.fit_generator(train_batches,validation_data=valid_batches,epochs=5,verbose=1,callbacks=callback_list)
print("Number of CNN layers:", len(base_model.layers))
#STORING HISTORY OF TRAINING FOR LATER USE
with open("history.json", "w") as f:
    json.dump(history.history, f)



Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0   

  history = model1.fit_generator(train_batches,validation_data=valid_batches,epochs=5,verbose=1,callbacks=callback_list)


Epoch 1/5
Epoch 1: val_loss did not improve from inf
Epoch 2/5
Epoch 2: val_loss did not improve from inf
Epoch 3/5
Epoch 3: val_loss did not improve from inf
Epoch 4/5
Epoch 4: val_loss did not improve from inf
Epoch 5/5
Epoch 5: val_loss did not improve from inf
Number of CNN layers: 19


In [None]:

import numpy as np
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score, accuracy_score

def load_model_evaluate(model_path, test_data_generator):
    # Load the model
    model = tf.keras.models.load_model(model_path)

    # Generate predictions
    test_data = test_data_generator
    y_true = test_data.classes
    y_pred_prob = model.predict(test_data)
    y_pred = np.round(y_pred_prob).flatten()

    # Calculate evaluation metrics
    accuracy = accuracy_score(y_true, y_pred)
    print("accuracy : ",accuracy)
    cm = confusion_matrix(y_true, y_pred)
    print("confusion matrix : ", cm)
    precision = precision_score(y_true, y_pred)
    print("precision : ",precision)
    recall = recall_score(y_true, y_pred)
    print("recall : ",recall)


    f1 = f1_score(y_true, y_pred)
    print("f1 score : ",f1)

    return accuracy, cm, precision, recall, f1



load_model_evaluate("/content/drive/MyDrive/VGG16TEST-OUTPUT.h5",test_batches)

accuracy :  0.5294117647058824
confusion matrix :  [[ 0 48]
 [ 0 54]]
precision :  0.5294117647058824
recall :  1.0
f1 score :  0.6923076923076924


(0.5294117647058824,
 array([[ 0, 48],
        [ 0, 54]]),
 0.5294117647058824,
 1.0,
 0.6923076923076924)

In [None]:
import tensorflow as tf
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Input, Dense, Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.optimizers import SGD
import json


#prepare dataset-2 "COVID-19 Digital X-rays Forgery Dataset" "https://www.kaggle.com/datasets/nourmahmoud/covid19-digital-xrays-forgery-dataset?resource=download"
train_path ='/content/drive/MyDrive/KaggleRealandFakeCovid19XRAYDetection/train'
valid_path ='/content/drive/MyDrive/KaggleRealandFakeCovid19XRAYDetection/valid'
test_path ='/content/drive/MyDrive/KaggleRealandFakeCovid19XRAYDetection/test'

train_batches = ImageDataGenerator(preprocessing_function=tf.keras.applications.resnet50.preprocess_input).flow_from_directory(train_path, target_size=(224,224), batch_size=10)
valid_batches = ImageDataGenerator(preprocessing_function=tf.keras.applications.resnet50.preprocess_input).flow_from_directory(valid_path, target_size=(224,224), batch_size=10)
test_batches = ImageDataGenerator(preprocessing_function=tf.keras.applications.resnet50.preprocess_input).flow_from_directory(test_path, target_size=(224,224), batch_size=10)


Found 3200 images belonging to 2 classes.
Found 600 images belonging to 2 classes.
Found 200 images belonging to 2 classes.


In [None]:
#Training Module using VGG16 pretrained architecture


input_tensor = tf.keras.layers.Input(shape=(224,224,3))

base_model = VGG16(input_tensor=input_tensor, weights='imagenet', include_top=False)

for layer in base_model.layers:
    layer.trainable = False

x = base_model.output
x = Flatten()(x)

x = Dense(1024, activation='relu')(x)
x = Dense(1024, activation='relu')(x)
x = Dense(512, activation='relu')(x)
predictions = Dense(1, activation='sigmoid')(x)

model1 = Model(inputs=base_model.input, outputs=predictions)

opt = SGD(lr=1e-4, momentum=0.9)
#opt: This is a variable name representing the optimizer object.
#SGD: It stands for Stochastic Gradient Descent, which is the optimization algorithm being used.
#lr=1e-4: It specifies the learning rate for the optimizer. The learning rate determines the step size at which the optimizer adjusts the model's parameters during training. In this case, the learning rate is set to 10^(-4), which is a small value indicating cautious and slower updates.
#momentum=0.9: Momentum is a hyperparameter that affects the convergence speed and behavior of the optimizer. It helps accelerate gradient descent in the relevant direction and dampens oscillations. A momentum value of 0.9 means that the optimizer takes into account 90% of the previously accumulated gradients to influence the current update.


callback_list=[EarlyStopping(monitor="val_loss",patience=100),ModelCheckpoint(filepath="/content/drive/MyDrive/VGG16TEST-OUTPUT.h5",monitor="val_loss",save_best_only=True,verbose=1)]


model1.compile(loss="binary_crossentropy", optimizer=opt,metrics=["accuracy"])

model1.summary()

history = model1.fit_generator(train_batches,validation_data=valid_batches,epochs=5,verbose=1,callbacks=callback_list)
print("Number of CNN layers:", len(base_model.layers))
#STORING HISTORY OF TRAINING FOR LATER USE
with open("history.json", "w") as f:
    json.dump(history.history, f)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5




Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0     

  history = model1.fit_generator(train_batches,validation_data=valid_batches,epochs=5,verbose=1,callbacks=callback_list)


Epoch 1/5
Epoch 1: val_loss improved from inf to 0.69315, saving model to /content/drive/MyDrive/VGG16TEST-OUTPUT.h5
Epoch 2/5
Epoch 2: val_loss did not improve from 0.69315
Epoch 3/5
Epoch 3: val_loss improved from 0.69315 to 0.69315, saving model to /content/drive/MyDrive/VGG16TEST-OUTPUT.h5
Epoch 4/5
Epoch 4: val_loss improved from 0.69315 to 0.69315, saving model to /content/drive/MyDrive/VGG16TEST-OUTPUT.h5
Epoch 5/5
Epoch 5: val_loss did not improve from 0.69315
Number of CNN layers: 19


In [None]:

import numpy as np
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score, accuracy_score

def load_model_evaluate(model_path, test_data_generator):
    # Load the model
    model = tf.keras.models.load_model(model_path)

    # Generate predictions
    test_data = test_data_generator
    y_true = test_data.classes
    y_pred_prob = model.predict(test_data)
    y_pred = np.round(y_pred_prob).flatten()

    # Calculate evaluation metrics
    accuracy = accuracy_score(y_true, y_pred)
    print("accuracy : ",accuracy)
    cm = confusion_matrix(y_true, y_pred)
    print("confusion matrix : ", cm)
    precision = precision_score(y_true, y_pred)
    print("precision : ",precision)
    recall = recall_score(y_true, y_pred)
    print("recall : ",recall)


    f1 = f1_score(y_true, y_pred)
    print("f1 score : ",f1)

    return accuracy, cm, precision, recall, f1



load_model_evaluate("/content/drive/MyDrive/VGG16TEST-OUTPUT.h5",test_batches)

accuracy :  0.505
confusion matrix :  [[  1  99]
 [  0 100]]
precision :  0.5025125628140703
recall :  1.0
f1 score :  0.6688963210702341


(0.505,
 array([[  1,  99],
        [  0, 100]]),
 0.5025125628140703,
 1.0,
 0.6688963210702341)