In [3]:
import kagglehub
import os
import tensorflow as tf

# Download the CIFAKE dataset
path = kagglehub.dataset_download("birdy654/cifake-real-and-ai-generated-synthetic-images")
print("Path to dataset files:", path)

# Define paths to training and test directories
train_dir = os.path.join(path, 'train')
test_dir = os.path.join(path, 'test')

# Load dataset using TensorFlow's image_dataset_from_directory
train_dataset = tf.keras.utils.image_dataset_from_directory(
    train_dir,
    image_size=(32, 32),  # CIFAKE images are 32x32
    batch_size=32,
    label_mode='binary'   # 0 for real, 1 for AI-generated
)

test_dataset = tf.keras.utils.image_dataset_from_directory(
    test_dir,
    image_size=(32, 32),
    batch_size=32,
    label_mode='binary'
)

# Verify dataset
class_names = train_dataset.class_names
print("Class names:", class_names)

  from .autonotebook import tqdm as notebook_tqdm


Path to dataset files: /home/codespace/.cache/kagglehub/datasets/birdy654/cifake-real-and-ai-generated-synthetic-images/versions/3
Found 100000 files belonging to 2 classes.
Found 20000 files belonging to 2 classes.
Class names: ['FAKE', 'REAL']


In [4]:
# Purpose: Normalize images (0-1 range) and optimize loading speed
train_dataset = train_dataset.map(lambda x, y: (x / 255.0, y))  # Normalize pixel values
train_dataset = train_dataset.cache().prefetch(tf.data.AUTOTUNE)  # Speed up training

test_dataset = test_dataset.map(lambda x, y: (x / 255.0, y))  # Normalize test set
test_dataset = test_dataset.cache().prefetch(tf.data.AUTOTUNE)

print("Dataset preprocessed and ready!")

Dataset preprocessed and ready!


In [5]:
import tensorflow as tf

# Purpose: Define a CNN model to classify AI-generated vs real images
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')  # Binary output: 0 (REAL) or 1 (FAKE)
])

# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

print("Model built and compiled!")

Model built and compiled!


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [6]:
# Purpose: Train the model on the dataset to learn AI vs real image patterns
history = model.fit(
    train_dataset,
    validation_data=test_dataset,
    epochs=5  # Start with 5 epochs, adjust later if needed
)

print("Training complete!")

Epoch 1/5
[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m86s[0m 27ms/step - accuracy: 0.8119 - loss: 0.4039 - val_accuracy: 0.8745 - val_loss: 0.2868
Epoch 2/5
[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 14ms/step - accuracy: 0.9117 - loss: 0.2225 - val_accuracy: 0.9075 - val_loss: 0.2223
Epoch 3/5
[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 14ms/step - accuracy: 0.9268 - loss: 0.1847 - val_accuracy: 0.9211 - val_loss: 0.1950
Epoch 4/5
[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 14ms/step - accuracy: 0.9371 - loss: 0.1605 - val_accuracy: 0.9297 - val_loss: 0.1788
Epoch 5/5
[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 14ms/step - accuracy: 0.9455 - loss: 0.1403 - val_accuracy: 0.9262 - val_loss: 0.1828
Training complete!


In [7]:
# Purpose: Test the model’s accuracy on the unseen test dataset
test_loss, test_accuracy = model.evaluate(test_dataset)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")
print(f"Test Loss: {test_loss:.4f}")

[1m 11/625[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m3s[0m 5ms/step - accuracy: 0.9279 - loss: 0.1456  

[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.9251 - loss: 0.1837
Test Accuracy: 92.62%
Test Loss: 0.1828


In [10]:
# Purpose: Test multiple epoch values and find the best accuracy
epoch_range = [5, 10, 15, 20]  # Epochs to try
results = {}

# Rebuild the simpler model
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Loop through epochs
for epochs in epoch_range:
    print(f"Training with {epochs} epochs...")
    model.fit(train_dataset, validation_data=test_dataset, epochs=epochs, verbose=0)
    test_loss, test_accuracy = model.evaluate(test_dataset, verbose=0)
    results[epochs] = test_accuracy
    print(f"Epochs: {epochs}, Test Accuracy: {test_accuracy * 100:.2f}%")

# Find best result
best_epochs = max(results, key=results.get)
print(f"Best number of epochs: {best_epochs} with accuracy: {results[best_epochs] * 100:.2f}%")

Training with 5 epochs...


Epochs: 5, Test Accuracy: 92.18%
Training with 10 epochs...
Epochs: 10, Test Accuracy: 92.38%
Training with 15 epochs...
Epochs: 15, Test Accuracy: 92.64%
Training with 20 epochs...
Epochs: 20, Test Accuracy: 92.88%
Best number of epochs: 20 with accuracy: 92.88%


In [11]:
# Purpose: Compare models with fixed randomness
tf.random.set_seed(42)  # Fix randomness

# Original model (5 epochs)
model1 = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])
model1.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model1.fit(train_dataset, epochs=5, verbose=0)
loss1, acc1 = model1.evaluate(test_dataset, verbose=0)
print(f"Model 1 (5 epochs): {acc1 * 100:.2f}%")

# Best from grid (15 epochs)
model2 = tf.keras.Sequential([  # Same architecture
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])
model2.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model2.fit(train_dataset, epochs=15, verbose=0)
loss2, acc2 = model2.evaluate(test_dataset, verbose=0)
print(f"Model 2 (15 epochs): {acc2 * 100:.2f}%")

Model 1 (5 epochs): 92.64%
Model 2 (15 epochs): 92.66%


In [13]:


import tensorflow as tf
import numpy as np
from PIL import Image

# Rebuild Model 1 with seed for consistency
tf.random.set_seed(42)
model1 = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])
model1.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model1.fit(train_dataset, epochs=5, verbose=0)

# Load and preprocess an image (replace 'your_image.jpg' with your file path)
img_path = '/workspaces/datamining/ChatGPT Image Apr 9, 2025, 08_50_11 PM.png'  # Upload an image to your environment
img = Image.open(img_path).resize((32, 32))  # Resize to 32x32
img_array = np.array(img) / 255.0  # Normalize
img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension

# Predict
prediction = model1.predict(img_array)
result = "AI-generated" if prediction[0][0] > 0.5 else "Real"
confidence = prediction[0][0] if prediction[0][0] > 0.5 else 1 - prediction[0][0]
print(f"Prediction: {result} (Confidence: {confidence * 100:.2f}%)")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69ms/step
Prediction: AI-generated (Confidence: 92.11%)


Below is a real image, photographed by a real person, and is predicted as AI-generated. This goes to show the limitation in the training dataset. The dataset was very out-dated and could not handle variety well. With a better dataset, things might be different. However, my PC wouldn't be able to handle such data. Given a good dataset and a good PC, I believe this could be achieved. This is what already happening with xAI and X (twitter) when Elon Musk sold X to xAI. This gives xAI a huge environment with real-time data every seconds by real people.

In [2]:
# Load and preprocess an image (replace 'your_image.jpg' with your file path)
img_path = '/workspaces/datamining/josh-hild-16ZUFFYQdbo-unsplash.jpg'  # Upload an image to your environment
img = Image.open(img_path).resize((32, 32))  # Resize to 32x32
img_array = np.array(img) / 255.0  # Normalize
img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension

# Predict
prediction = model1.predict(img_array)
result = "AI-generated" if prediction[0][0] > 0.5 else "Real"
confidence = prediction[0][0] if prediction[0][0] > 0.5 else 1 - prediction[0][0]
print(f"Prediction: {result} (Confidence: {confidence * 100:.2f}%)")

NameError: name 'Image' is not defined

Here I tried to create a website to upload an image and the model will tell you whether or not the picture is AI-generated, when I turned it into a Flask app, I kept hitting errors like ‘SystemExit: 1’ because of port conflicts in Codespace. I couldn’t get the server running smoothly in time, even with help. The model works, but the app part just wouldn’t cooperate..



In [5]:
from flask import Flask, request
import tensorflow as tf
import numpy as np
from PIL import Image
import io
import os
import kagglehub
import socket

app = Flask(__name__)

# Load CIFAKE dataset
path = kagglehub.dataset_download("birdy654/cifake-real-and-ai-generated-synthetic-images")
train_dir = os.path.join(path, 'train')
train_dataset = tf.keras.utils.image_dataset_from_directory(
    train_dir,
    image_size=(32, 32),
    batch_size=32,
    label_mode='binary'
).map(lambda x, y: (x / 255.0, y)).cache().prefetch(tf.data.AUTOTUNE)

# Load Model 1
tf.random.set_seed(42)
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.fit(train_dataset, epochs=5, verbose=0)

@app.route('/', methods=['GET', 'POST'])
def upload_image():
    if request.method == 'POST':
        file = request.files['image']
        img = Image.open(file.stream).resize((32, 32))
        img_array = np.array(img) / 255.0
        img_array = np.expand_dims(img_array, axis=0)
        
        prediction = model.predict(img_array)
        result = "AI-generated" if prediction[0][0] > 0.5 else "Real"
        confidence = prediction[0][0] if prediction[0][0] > 0.5 else 1 - prediction[0][0]
        return f"Prediction: {result} (Confidence: {confidence * 100:.2f}%)"
    return '''
        <h1>AI vs Real Image Detector</h1>
        <form method="post" enctype="multipart/form-data">
            <input type="file" name="image">
            <input type="submit" value="Upload">
        </form>
    '''

# Find a free port
def get_free_port():
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    s.bind(('', 0))
    port = s.getsockname()[1]
    s.close()
    return port

if __name__ == '__main__':
    port = get_free_port()
    print(f"Starting server on port {port}...")
    try:
        app.run(host='0.0.0.0', port=port, debug=True)
    except Exception as e:
        print(f"Error: {e}")

Found 100000 files belonging to 2 classes.
Starting server on port 58783...
 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:58783
 * Running on http://10.0.13.110:58783
Press CTRL+C to quit
 * Restarting with stat
Traceback (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "/home/codespace/.local/lib/python3.12/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "/home/codespace/.local/lib/python3.12/site-packages/traitlets/config/application.py", line 1074, in launch_instance
    app.initialize(argv)
  File "/home/codespace/.local/lib/python3.12/site-packages/traitlets/config/application.py", line 118, in inner
    return method(app, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/codespace/.local/lib/python3.12/site-packages/ipykernel/kernelapp.py", line 692, in initialize
    self.init_sockets()
  File "/home/codespace/.local/lib/python3.12/site-packages/ipykernel/kernelapp.py", line 3

SystemExit: 1