In [5]:
!pip install keras-preprocessing

Collecting keras-preprocessing
  Downloading Keras_Preprocessing-1.1.2-py2.py3-none-any.whl.metadata (1.9 kB)
Downloading Keras_Preprocessing-1.1.2-py2.py3-none-any.whl (42 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.6/42.6 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: keras-preprocessing
Successfully installed keras-preprocessing-1.1.2


In [6]:
from keras.utils import to_categorical
from keras_preprocessing.image import load_img
from keras.models import Sequential
from keras.applications import MobileNetV2, ResNet152, VGG16, EfficientNetB0, InceptionV3
from keras.layers import Dense, Conv2D, Dropout, Flatten, MaxPooling2D, BatchNormalization, GlobalAveragePooling2D
import os
from sklearn.preprocessing import LabelEncoder
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm

In [7]:
def createdataframe(dir):
    image_paths = []
    labels = []
    for label in os.listdir(dir):
        for imagename in os.listdir(os.path.join(dir, label)):
            image_paths.append(os.path.join(dir, label, imagename))
            labels.append(label)
        print(label, "completed")
    return image_paths, labels

def extract_features(images):
    features = []
    for image in tqdm(images):
        try:
            img = load_img(image, target_size=(236, 236))
            img = np.array(img)
            features.append(img)
        except Exception as e:
            print(f"Error loading image {image}: {e}")
    features = np.array(features)
    features = features.reshape(features.shape[0], 236, 236, 3)  # Reshape all images in one go
    return features

TRAIN_DIR = "/kaggle/input/realvsai-dataset/New_Data/New_Data"

train = pd.DataFrame()
train['image'], train['label'] = createdataframe(TRAIN_DIR)

train_features = extract_features(train['image'])

x_train = train_features / 255.0

le = LabelEncoder()
le.fit(train['label'])
y_train = le.transform(train['label'])
y_train = to_categorical(y_train, num_classes=2)

AI completed
Real completed


  0%|          | 0/592 [00:00<?, ?it/s]

In [9]:
from sklearn.model_selection import train_test_split


train_shuffled = train.sample(frac=1, random_state=42).reset_index(drop=True)


train_df, val_df = train_test_split(train_shuffled, test_size=0.15, random_state=42)

In [10]:
train_features = extract_features(train_df['image'])

x_train = train_features / 255.0

le = LabelEncoder()
le.fit(train_df['label'])
y_train = le.transform(train_df['label'])
y_train = to_categorical(y_train, num_classes=2)

  0%|          | 0/503 [00:00<?, ?it/s]

In [12]:
val_features = extract_features(val_df['image'])

x_val = val_features / 255.0

le = LabelEncoder()
le.fit(train_df['label'])
y_val = le.transform(val_df['label'])
y_val = to_categorical(y_val, num_classes=2)

  0%|          | 0/89 [00:00<?, ?it/s]

In [41]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)
datagen.fit(x_train)


In [43]:
from tensorflow.keras.regularizers import l1, l2
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam

l1_reg = 1e-4  
l2_reg = 1e-4  

# Build the model
model = Sequential()

# Convolutional layers with L1 and L2 regularization
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(236, 236, 3),
                 kernel_regularizer=l1(l1_reg)))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, kernel_size=(3, 3), activation='relu', kernel_regularizer=l2(l2_reg)))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(128, kernel_size=(3, 3), activation='relu', kernel_regularizer=l1(l1_reg)))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(128, kernel_size=(3, 3), activation='relu', kernel_regularizer=l1(l1_reg)))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(256, kernel_size=(3, 3), activation='relu', kernel_regularizer=l2(l2_reg)))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(512, kernel_size=(3, 3), activation='relu', kernel_regularizer=l2(l2_reg)))
model.add(MaxPooling2D(pool_size=(2, 2)))

# Fully connected layers with Dropout and L2 regularization
model.add(Flatten())
model.add(Dense(1024, activation='relu', kernel_regularizer=l2(l2_reg)))
model.add(Dropout(0.3))
model.add(Dense(1024, activation='relu', kernel_regularizer=l2(l2_reg)))
model.add(Dropout(0.3))
model.add(Dense(2048, activation='relu', kernel_regularizer=l2(l2_reg)))
model.add(Dropout(0.3))

# Output layer
model.add(Dense(2, activation='softmax', kernel_regularizer=l2(l2_reg)))

# Compile the model
optimizer = Adam(learning_rate=0.001)
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
# Train the model with validation data
history = model.fit(x=x_train, y=y_train, batch_size=32, epochs=200, validation_data=(x_val, y_val))


Epoch 1/200
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 900ms/step - accuracy: 0.4707 - loss: 1.5463 - val_accuracy: 0.5393 - val_loss: 1.2416
Epoch 2/200
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 40ms/step - accuracy: 0.5198 - loss: 1.1662 - val_accuracy: 0.5393 - val_loss: 0.9694
Epoch 3/200
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 39ms/step - accuracy: 0.4692 - loss: 0.9270 - val_accuracy: 0.4607 - val_loss: 0.8280
Epoch 4/200
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 38ms/step - accuracy: 0.4866 - loss: 0.8110 - val_accuracy: 0.4607 - val_loss: 0.7726
Epoch 5/200
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 39ms/step - accuracy: 0.4896 - loss: 0.7646 - val_accuracy: 0.4607 - val_loss: 0.7461
Epoch 6/200
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 39ms/step - accuracy: 0.4732 - loss: 0.7418 - val_accuracy: 0.4607 - val_loss: 0.7313
Epoch 7/200
[1m16/16[0m 

In [15]:
def createdataframetest(dir):
    image_paths = []
    for imagename in os.listdir(dir):
        image_path = os.path.join(dir, imagename)
        
        if os.path.isfile(image_path) and imagename.lower().endswith(('png', 'jpg', 'jpeg')):
            image_paths.append(image_path)
    return image_paths


TEST_DIR = "/kaggle/input/testdataset-realvsai/Test_Images"  
test = pd.DataFrame()
test['image'] = createdataframetest(TEST_DIR)

test_features = extract_features(test['image'])

x_test = test_features / 255.0  

  0%|          | 0/200 [00:00<?, ?it/s]

In [44]:
prediction = model.predict(x_test)

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 53ms/step


In [45]:
imagename = []

# Loop through the files in the specified directory
for img in os.listdir("/kaggle/input/testdataset-realvsai/Test_Images"):
    # Remove the file extension (e.g., '.jpg') and append the base name to the list
    base_name = os.path.splitext(img)[0]
    imagename.append(base_name)
    
predicted_labels = ['AI' if pred[0] > pred[1] else 'Real' for pred in prediction]

In [47]:
submission_df = pd.DataFrame({
    'Id': imagename,
    'Label': predicted_labels
})

# Save the DataFrame to a CSV file for Kaggle submission
submission_df.to_csv('submission_9.csv', index=False)

In [48]:

from sklearn.metrics import classification_report, confusion_matrix

y_pred = model.predict(x_val)
y_pred_classes = np.round(y_pred).astype(int)  # Convert probabilities to class labels


print(classification_report(y_val, y_pred_classes))
print(confusion_matrix(y_val, y_pred_classes))

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 144ms/step
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        48
           1       0.46      1.00      0.63        41

   micro avg       0.46      0.46      0.46        89
   macro avg       0.23      0.50      0.32        89
weighted avg       0.21      0.46      0.29        89
 samples avg       0.46      0.46      0.46        89



  _warn_prf(average, modifier, msg_start, len(result))


ValueError: multilabel-indicator is not supported