# Detect AI vs Human generated images
In this notebook, I aim to build a convnet architecture for detecting real images from fake ones.

In [1]:
# Libraries used in this notebook
import kagglehub
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras

In [2]:
# Download latest version
data_path = kagglehub.dataset_download("alessandrasala79/ai-vs-human-generated-dataset")

In [3]:
# Csv with references to the images
train_data = pd.read_csv(data_path + "/" + "train.csv", index_col=0)
test_data = pd.read_csv(data_path + "/" + "test.csv")

In [4]:
train_data.head()

Unnamed: 0,file_name,label
0,train_data/a6dcb93f596a43249135678dfcfc17ea.jpg,1
1,train_data/041be3153810433ab146bc97d5af505c.jpg,0
2,train_data/615df26ce9494e5db2f70e57ce7a3a4f.jpg,1
3,train_data/8542fe161d9147be8e835e50c0de39cd.jpg,0
4,train_data/5d81fa12bc3b4cea8c94a6700a477cf2.jpg,1


In [5]:
# Creates a specific paths to each image
train_full_path = train_data["file_name"].apply(lambda image_path: data_path + "/" + image_path).values
test_full_path = test_data["id"].apply(lambda image_path: data_path + "/" + image_path).values

In [6]:
top_10_images = train_full_path[:10]
top_10_images

array(['/kaggle/input/ai-vs-human-generated-dataset/train_data/a6dcb93f596a43249135678dfcfc17ea.jpg',
       '/kaggle/input/ai-vs-human-generated-dataset/train_data/041be3153810433ab146bc97d5af505c.jpg',
       '/kaggle/input/ai-vs-human-generated-dataset/train_data/615df26ce9494e5db2f70e57ce7a3a4f.jpg',
       '/kaggle/input/ai-vs-human-generated-dataset/train_data/8542fe161d9147be8e835e50c0de39cd.jpg',
       '/kaggle/input/ai-vs-human-generated-dataset/train_data/5d81fa12bc3b4cea8c94a6700a477cf2.jpg',
       '/kaggle/input/ai-vs-human-generated-dataset/train_data/25ea852f30594bc5915eb929682af429.jpg',
       '/kaggle/input/ai-vs-human-generated-dataset/train_data/e67085fb6d814cbabe08f978c738f3f7.jpg',
       '/kaggle/input/ai-vs-human-generated-dataset/train_data/041c36d9269146cdb88e7526e3b91651.jpg',
       '/kaggle/input/ai-vs-human-generated-dataset/train_data/4aea3b876247467c8d3713d4920148ab.jpg',
       '/kaggle/input/ai-vs-human-generated-dataset/train_data/09708379751e44d0bc9

In [37]:
def load_images(path):
    image = tf.io.read_file(path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, [224, 224])
    #image = tf.cast(image, tf.float32)
    image = keras.applications.resnet50.preprocess_input(image)

    return image

In [38]:
sample_image = load_images(top_10_images[0])
sample_image

<tf.Tensor: shape=(224, 224, 3), dtype=float32, numpy=
array([[[-30.22982   ,  12.50161   ,  -6.8381653 ],
        [-35.24003   ,   0.57814026, -20.123878  ],
        [-54.010433  , -24.018799  , -45.06266   ],
        ...,
        [135.82632   , 127.64954   , 119.91692   ],
        [132.30579   , 123.924995  , 116.222984  ],
        [136.80118   , 128.75204   , 121.06533   ]],

       [[-24.454308  ,  17.705696  ,  -3.9657135 ],
        [-32.15329   ,   7.3179398 , -16.174896  ],
        [-49.444107  ,  -8.569817  , -30.460617  ],
        ...,
        [143.8569    , 132.0169    , 123.1159    ],
        [148.48447   , 136.64447   , 127.74348   ],
        [149.63242   , 137.14957   , 128.89142   ]],

       [[-27.939003  ,  37.986305  ,  12.4169235 ],
        [-39.275734  ,  30.797523  ,   5.891426  ],
        [-53.40839   ,  24.664879  ,   4.447548  ],
        ...,
        [133.89267   , 125.66488   , 113.76388   ],
        [134.92325   , 126.4302    , 114.5292    ],
        [136.54056

In [28]:
sample_image = load_images(top_10_images[0])
sample_image

<tf.Tensor: shape=(224, 224, 3), dtype=float32, numpy=
array([[[-30.22982   ,  12.50161   ,  -6.8381653 ],
        [-35.24003   ,   0.57814026, -20.123878  ],
        [-54.010433  , -24.018799  , -45.06266   ],
        ...,
        [135.82632   , 127.64954   , 119.91692   ],
        [132.30579   , 123.924995  , 116.222984  ],
        [136.80118   , 128.75204   , 121.06533   ]],

       [[-24.454308  ,  17.705696  ,  -3.9657135 ],
        [-32.15329   ,   7.3179398 , -16.174896  ],
        [-49.444107  ,  -8.569817  , -30.460617  ],
        ...,
        [143.8569    , 132.0169    , 123.1159    ],
        [148.48447   , 136.64447   , 127.74348   ],
        [149.63242   , 137.14957   , 128.89142   ]],

       [[-27.939003  ,  37.986305  ,  12.4169235 ],
        [-39.275734  ,  30.797523  ,   5.891426  ],
        [-53.40839   ,  24.664879  ,   4.447548  ],
        ...,
        [133.89267   , 125.66488   , 113.76388   ],
        [134.92325   , 126.4302    , 114.5292    ],
        [136.54056

In [40]:
sample_image.numpy().min()

-123.68