# Detect AI vs Human generated images
In this notebook, I aim to build a convnet architecture for detecting real images from fake ones.

In [1]:
# Libraries used in this notebook
import kagglehub
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from keras.applications.resnet50 import ResNet50
from keras.applications.resnet50 import preprocess_input, decode_predictions
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model

In [9]:
# Download latest version
data_path = kagglehub.dataset_download("alessandrasala79/ai-vs-human-generated-dataset")

In [10]:
# Csv with references to the images
train_data = pd.read_csv(data_path + "/" + "train.csv", index_col=0)
test_data = pd.read_csv(data_path + "/" + "test.csv")

In [11]:
train_data.head()

Unnamed: 0,file_name,label
0,train_data/a6dcb93f596a43249135678dfcfc17ea.jpg,1
1,train_data/041be3153810433ab146bc97d5af505c.jpg,0
2,train_data/615df26ce9494e5db2f70e57ce7a3a4f.jpg,1
3,train_data/8542fe161d9147be8e835e50c0de39cd.jpg,0
4,train_data/5d81fa12bc3b4cea8c94a6700a477cf2.jpg,1


In [12]:
# Creates a specific paths to each image
train_full_path = train_data["file_name"].apply(lambda image_path: data_path + "/" + image_path).values
test_full_path = test_data["id"].apply(lambda image_path: data_path + "/" + image_path).values

# Labels for training data
train_labels = train_data.loc[:,"label"].values

In [13]:
# Function to load and preprocess an image
def load_and_preprocess_image(path, label=None):
    image = tf.io.read_file(path)
    image = tf.image.decode_jpeg(image, channels=3)  # Ensure RGB format
    image = tf.image.resize(image, [224, 224])  # Resize to ResNet50 input size
    image = preprocess_input(image)  # Apply ResNet50 preprocessing
    return image, label

# Create a TensorFlow dataset from image paths
train_dataset = tf.data.Dataset.from_tensor_slices((train_full_path, train_labels))
test_dataset = tf.data.Dataset.from_tensor_slices((test_full_path))

# Shuffle train dataset
buffer_size = len(train_dataset)  # Set buffer size to dataset size for perfect shuffling
train_dataset = train_dataset.shuffle(buffer_size, reshuffle_each_iteration=True)

# Map the dataset to load and preprocess images
train_dataset = train_dataset.map(load_and_preprocess_image, num_parallel_calls=tf.data.AUTOTUNE)
test_dataset = test_dataset.map(load_and_preprocess_image, num_parallel_calls=tf.data.AUTOTUNE)

# Batch the dataset (optional)
batch_size = 32
train_dataset = train_dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
test_dataset = test_dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)