In [1]:
import os
import numpy as np
from PIL import Image
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import tensorflow as tf

# Set the path to your dataset directory
dataset_dir = "archive_2/train_test"

# Preprocesses an image before it is given to the model to ensure consistency
# across all images that the model is learning off of.
def preprocess_image(image_path):
    # Load the image and convert it to RGB colour space; 
    # we want all images to be in same colour space for consistency.
    with Image.open(image_path) as image:
        image = image.convert("RGB")
    # Resize the image to a standard size of 256 x 256.
    image = image.resize((256, 256))
    # Convert the image to an array. Normalize its pixel values to be between 0 and 1.
    # Since most images in the training/test dataset are jpegs and in RGB, assume that
    # we can divide each pixel by 255 to normalize them to be between 0 and 1.
    preprocessed_image = np.array(image) / 255.0
    # Return the preprocessed image.
    return preprocessed_image

# Determine whether an image is authentic or not based on its filename.
def is_authentic(filename):
    # Extract the first two letters of the image's filename (the prefix).
    prefix = filename[:2]
    # If the prefix is "au" or "Au", the image is authentic (return true).
    # Otherwise, it is manipulated (return false).
    return (prefix == "au" or prefix == "Au")

# Create empty lists to store the images and labels.
images = []
labels = []

# Loop over the images in the dataset directory
for filename in os.listdir(dataset_dir):
    # Ignore .DS_Store - was getting errors that this file was being read
    # when I was tried to run the model.
    if filename.startswith(".DS_Store"):
        continue
    # Get the path to the image file.
    file_path = os.path.join(dataset_dir, filename)
    # Preprocess the image and add the preprocessed_image to the images list.
    preprocessed_image = preprocess_image(file_path)
    images.append(preprocessed_image)
    # Determine whether the image is authentic or not, and add this label to the labels list.
    label = is_authentic(filename)
    labels.append(label)

# Convert the images and labels lists to NumPy arrays.
images = np.array(images)
labels = np.array(labels)

# Define an ImageDataGenerator, which we will use to perform data augmentation on the set of training images.
# Using data augmentation, we can increase the size of our training set by generating modified versions
# of the original training images (this helps to avoid overfitting).
# All values have been randomly set to 25 or True.
data_augmentation = ImageDataGenerator(
    rotation_range = 25,
    height_shift_range = 0.25,
    width_shift_range = 0.25,
    vertical_flip = True,
    horizontal_flip = True,
    zoom_range = 0.25
)

# Split the dataset into separate training and testing sets.
train_images, test_images, train_labels, test_labels = train_test_split(images, labels, test_size=0.2, random_state=42)

# Defining the convolutional neural network.
# Use InceptionV3 as a starting point we will built our CNN off of; it acts as feature extractor.
# We are using InceptionV3 to help us learn how to tell if a picture has been manipulated or if it is authentic, and then 
# feeding this information to the CNN (note that we do not include the top, final layers of the InceptionV3 model, since 
# we will build our own later that suit our specific classification task).
base = tf.keras.applications.InceptionV3(include_top = False, input_shape = train_images.shape[1:]) #3 channels for Red, Green and Blue
# Simplify the information outputted by InceptionV3 using pooling (it is most likely too extensive to use directly).
# Pooling will summarize the information into a selection of key features.
x = tf.keras.layers.GlobalAveragePooling2D()(base.output)
# Use the Dense() method to make a prediction (whether the image is authentic or not) based on the summarized formation 
# we get from Pooling - we are creating 128 features and using the ReLu activation function.
x = tf.keras.layers.Dense(128, activation = 'relu')(x)
# Create the final layer of the neural network; take the features from the previous Dense layer and process them to
# create a single output (that tells us whether the image is authentic or not).
# We only want output in this final layer; we use the sigmoid function since it is used for binary classification (we only have), 
# 2 classes), and this is a binary classification task.
output = tf.keras.layers.Dense(1, activation = 'sigmoid')(x)
# Create an instance of the model; note that we input the inputs to the InceptionV3 model and output the output from
# the final layer.
model = tf.keras.Model(inputs = base.input, outputs = output)

# Now, compile the model with binary crossentropy loss and Adam optimizer.
# We are using binary crossentropy loss function, since this is a binary classification task.
# Adam is a popular optimization algorithm.
model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

# Train the model for 20 epochs with a batch size of 32.
results = model.fit(data_augmentation.flow(train_images, train_labels, batch_size = 32), epochs = 20,
                    steps_per_epoch = len(train_images) / 32,
                    validation_data = (test_images, test_labels))

# Save the trained model as a tflite file.
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()
with open('manipulation_detector_config_5.tflite', 'wb') as f:
    f.write(tflite_model)

# Evaluate the model on the testing set
test_loss, test_acc = model.evaluate(test_images, test_labels)
print("Test accuracy:", test_acc)

Metal device set to: Apple M1 Max
Epoch 1/20


2023-04-24 11:50:04.711770: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20




INFO:tensorflow:Assets written to: /var/folders/x9/lnqw3ctd65g1t8sjc2w_k5hm0000gn/T/tmpo9svwhmt/assets


INFO:tensorflow:Assets written to: /var/folders/x9/lnqw3ctd65g1t8sjc2w_k5hm0000gn/T/tmpo9svwhmt/assets
2023-04-24 12:07:07.022302: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:362] Ignored output_format.
2023-04-24 12:07:07.022319: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:365] Ignored drop_control_dependency.


Test accuracy: 0.8256579041481018
