In [1]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [2]:
import zipfile
import os

zip_path = '/content/drive/My Drive/DATASET.zip'  # Replace with your file's path
extract_path = '/content/dataset'

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

os.listdir(extract_path)  # Check the extracted files


['DATASET', '__MACOSX']

In [3]:
import os
import cv2
import numpy as np
import json
from tensorflow.keras.preprocessing.image import img_to_array
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications import InceptionResNetV2
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam


In [4]:
import os
import json
import cv2
import numpy as np
from sklearn.model_selection import train_test_split

# Paths to dataset folders
base_path = '/content/dataset/DATASET'
fake_images_path = os.path.join(base_path, 'fake_cifake_images')
real_images_path = os.path.join(base_path, 'real_cifake_images')

# Load the JSON files for real and fake images
fake_json_path = os.path.join(base_path, 'fake_cifake_preds.json')
real_json_path = os.path.join(base_path, 'real_cifake_preds.json')

# Load fake image JSON data
with open(fake_json_path, 'r') as f:
    fake_data = json.load(f)

# Load real image JSON data
with open(real_json_path, 'r') as f:
    real_data = json.load(f)

# Function to load images from directory and preprocess
def load_and_preprocess_images(image_folder, json_data, image_size=(224, 224)):
    images = []
    labels = []

    for entry in json_data:
        # Image filename corresponds to the index in the JSON
        image_filename = f"{entry['index']}.png"
        image_path = os.path.join(image_folder, image_filename)

        # Read and preprocess the image (resize and normalize)
        img = cv2.imread(image_path)
        if img is None:
            continue  # Skip if image is missing or unreadable
        img = cv2.resize(img, image_size)  # Resize to a fixed size (e.g., 224x224)
        img = img.astype('float32') / 255.0  # Normalize to [0, 1]

        images.append(img)
        labels.append(0 if entry['prediction'] == 'fake' else 1)  # 0 for fake, 1 for real

    return np.array(images), np.array(labels)

# Load and preprocess fake and real images
fake_images, fake_labels = load_and_preprocess_images(fake_images_path, fake_data)
real_images, real_labels = load_and_preprocess_images(real_images_path, real_data)

# Combine the datasets (fake and real images)
X = np.concatenate([fake_images, real_images], axis=0)
y = np.concatenate([fake_labels, real_labels], axis=0)

# Shuffle and split the dataset into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

print(f"Training data shape: {X_train.shape}")
print(f"Testing data shape: {X_test.shape}")
print(f"Training labels shape: {y_train.shape}")
print(f"Testing labels shape: {y_test.shape}")


Training data shape: (1600, 224, 224, 3)
Testing data shape: (400, 224, 224, 3)
Training labels shape: (1600,)
Testing labels shape: (400,)


In [5]:
# Load and preprocess test images
def load_test_images(test_image_folder, image_size=(224, 224)):
    test_images = []

    for image_name in os.listdir(test_image_folder):
        image_path = os.path.join(test_image_folder, image_name)

        # Read and preprocess the image (resize and normalize)
        img = cv2.imread(image_path)
        img = cv2.resize(img, image_size)  # Resize to a fixed size (e.g., 224x224)
        img = img.astype('float32') / 255.0  # Normalize to [0, 1]

        test_images.append(img)

    return np.array(test_images)

# Load and preprocess test images
test_images = load_test_images('/content/dataset/DATASET/test')
print(f"Test data shape: {test_images.shape}")


Test data shape: (500, 224, 224, 3)


In [6]:
from tensorflow.keras.applications import InceptionResNetV2
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Initialize InceptionResNetV2 model with pre-trained weights (no top classification layer)
base_model = InceptionResNetV2(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Freeze the base model layers
base_model.trainable = False

# Build the model
model = Sequential([
    base_model,
    GlobalAveragePooling2D(),
    Dense(1024, activation='relu'),
    Dense(1, activation='sigmoid')  # Output: 1 neuron (fake or real)
])

# Compile the model
model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])

# Model summary
model.summary()


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_resnet_v2/inception_resnet_v2_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m219055592/219055592[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


In [9]:
from sklearn.model_selection import train_test_split

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

# Train the model with manual validation data
history = model.fit(
    datagen.flow(X_train, y_train, batch_size=32),
    validation_data=(X_val, y_val),
    epochs=10
)


Epoch 1/10
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m71s[0m 2s/step - accuracy: 0.5638 - loss: 1.7768 - val_accuracy: 0.6537 - val_loss: 0.5829
Epoch 2/10
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 447ms/step - accuracy: 0.7258 - loss: 0.5706 - val_accuracy: 0.7317 - val_loss: 0.5256
Epoch 3/10
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 434ms/step - accuracy: 0.7481 - loss: 0.5204 - val_accuracy: 0.7463 - val_loss: 0.5057
Epoch 4/10
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 424ms/step - accuracy: 0.7803 - loss: 0.4425 - val_accuracy: 0.7707 - val_loss: 0.4903
Epoch 5/10
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 402ms/step - accuracy: 0.8180 - loss: 0.4097 - val_accuracy: 0.7951 - val_loss: 0.4232
Epoch 6/10
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 453ms/step - accuracy: 0.8187 - loss: 0.4020 - val_accuracy: 0.8195 - val_loss: 0.4313
Epoch 7/10
[1m26/26[0m 

In [11]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split

# Initialize ImageDataGenerator for data augmentation
datagen = ImageDataGenerator(
    rotation_range=20,       # Randomly rotate images in the range (degrees)
    width_shift_range=0.2,   # Randomly shift images horizontally
    height_shift_range=0.2,  # Randomly shift images vertically
    shear_range=0.2,         # Random shear transformation
    zoom_range=0.2,          # Random zoom
    horizontal_flip=True,    # Randomly flip images horizontally
    fill_mode='nearest'      # Fill mode for empty pixels
)

# Since we already split X_train and y_train previously,
# further split the training set into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(
    X_train, y_train, test_size=0.2, random_state=42, stratify=y_train
)

# Train the model with manual validation
history = model.fit(
    datagen.flow(X_train, y_train, batch_size=32),  # Apply augmentation to training set
    validation_data=(X_val, y_val),  # No augmentation for validation set
    epochs=20,  # Specify the number of epochs
    batch_size=32
)

# Evaluate model performance on the test set (which was untouched during training)
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f"Final Test Accuracy: {test_acc:.4f}")

# Confirm test set shape
print(f"Final Test Data Shape: {X_test.shape}")


  self._warn_if_super_not_called()


Epoch 1/20
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 2s/step - accuracy: 0.8651 - loss: 0.3264 - val_accuracy: 0.8855 - val_loss: 0.2742
Epoch 2/20
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 484ms/step - accuracy: 0.9162 - loss: 0.2186 - val_accuracy: 0.9389 - val_loss: 0.2277
Epoch 3/20
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 381ms/step - accuracy: 0.9189 - loss: 0.2101 - val_accuracy: 0.9008 - val_loss: 0.2343
Epoch 4/20
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 374ms/step - accuracy: 0.8756 - loss: 0.2775 - val_accuracy: 0.9237 - val_loss: 0.2254
Epoch 5/20
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 522ms/step - accuracy: 0.8718 - loss: 0.2999 - val_accuracy: 0.9008 - val_loss: 0.2329
Epoch 6/20
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 424ms/step - accuracy: 0.8803 - loss: 0.2747 - val_accuracy: 0.9237 - val_loss: 0.2626
Epoch 7/20
[1m17/17[0m [32

In [12]:
# Predict the test images
test_predictions = model.predict(test_images)
test_predictions = (test_predictions > 0.5).astype(int)  # Convert to 0 or 1

# Create the output JSON in the required format
output_data = []
for i, pred in enumerate(test_predictions):
    output_data.append({
        "index": i + 1,  # Ensure the index starts from 1
        "prediction": "real" if pred == 1 else "fake"
    })

# Save the predictions to a JSON file
output_json_path = 'Unpredictable.json'
with open(output_json_path, 'w') as f:
    json.dump(output_data, f, indent=4)

print(f"Predictions saved to {output_json_path}")


[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 1s/step
Predictions saved to Unpredictable.json


In [None]:
from tensorflow.keras.preprocessing import image
import numpy as np
import os

# Path to your test images folder
test_folder = '/content/dataset/DATASET/test'

# List all the image files in the test folder
test_image_files = [f for f in os.listdir(test_folder) if f.endswith('.jpg') or f.endswith('.png')]

# Prepare the test images and labels (you may have labels for the test set, or it could be unlabeled)
test_images = []
for img_file in test_image_files:
    img_path = os.path.join(test_folder, img_file)
    img = image.load_img(img_path, target_size=(224, 224))  # Resizing to 224x224
    img_array = image.img_to_array(img)  # Convert to NumPy array
    test_images.append(img_array)

# Convert the list to a NumPy array
test_images = np.array(test_images)

# Normalize the images
test_images = test_images / 255.0  # Ensure the images are scaled between 0 and 1


In [None]:
predictions = model.predict(test_images)

# Convert predictions to binary labels (if necessary, e.g., 0 for fake, 1 for real)
predicted_labels = (predictions > 0.5).astype("int32")


[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 141ms/step


In [13]:
# Make predictions on the test set (500 test images)
predictions = model.predict(test_images)

# Since the model output is likely a probability (e.g., for binary classification),
# you can convert these to binary labels (0 or 1) by setting a threshold (usually 0.5).
predicted_labels = (predictions > 0.5).astype(int)

# Check the shape and a few predictions
print(f"Predicted labels shape: {predicted_labels.shape}")
print(f"First 10 predicted labels: {predicted_labels[:10]}")


[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 143ms/step
Predicted labels shape: (500, 1)
First 10 predicted labels: [[1]
 [1]
 [0]
 [0]
 [1]
 [1]
 [0]
 [0]
 [1]
 [0]]


In [14]:
# Evaluate the model on the training and validation data (if available)
train_loss, train_accuracy = model.evaluate(X_train, y_train)
val_loss, val_accuracy = model.evaluate(X_val, y_val)

print(f"Training Accuracy: {train_accuracy * 100:.2f}%")
print(f"Validation Accuracy: {val_accuracy * 100:.2f}%")


[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 505ms/step - accuracy: 0.9610 - loss: 0.1331
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 122ms/step - accuracy: 0.8730 - loss: 0.2819
Training Accuracy: 95.80%
Validation Accuracy: 86.26%
