In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [1]:
import os
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt

# Set image size and batch size
IMG_SIZE = 624
BATCH_SIZE = 32  # Moderate batch size
LEARNING_RATE = 0.0001  # Adjusted learning rate
DROPOUT_RATE = 0  # Increased dropout for stronger regularization

# Data augmentation for training set (slightly adjusted for diversity)
train_datagen = ImageDataGenerator(
    rescale=1./255,
    horizontal_flip=True,
    rotation_range=30,  # Increased rotation range
    zoom_range=0.2,     # Increased zoom
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.2,    # Added shear range
    validation_split=0.2  # Reserve 20% of the data for validation
)

# Data generator for validation set (no augmentation, just rescaling)
val_datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)

# Load dataset with directory structure and create train/validation splits
train_generator = train_datagen.flow_from_directory(
    '/kaggle/input/wec-intelligence-sig-2024-recruitment-task-cv/train_images/train_images',
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='binary',
    subset='training'
)

validation_generator = val_datagen.flow_from_directory(
    '/kaggle/input/wec-intelligence-sig-2024-recruitment-task-cv/train_images/train_images',
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='binary',
    subset='validation'
)
print("Class Indices:", train_generator.class_indices)  # Expected output: {'real': 0, 'fake': 1}


# Define CNN model with Batch Normalization
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(IMG_SIZE, IMG_SIZE, 3)),
    BatchNormalization(),
    MaxPooling2D(pool_size=(2, 2)),

    Conv2D(64, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(pool_size=(2, 2)),

    Conv2D(128, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(pool_size=(2, 2)),

    Conv2D(256, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(pool_size=(2, 2)),

    Flatten(),
    Dense(256, activation='relu'),
    Dropout(DROPOUT_RATE),  # Increased dropout
    Dense(1, activation='sigmoid')  # Binary classification
])

# Compile the model
model.compile(optimizer=Adam(learning_rate=LEARNING_RATE),
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Callbacks for early stopping, model saving, and learning rate reduction
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=15, restore_best_weights=True)
model_checkpoint = tf.keras.callbacks.ModelCheckpoint('best_model.keras', save_best_only=True, monitor='val_accuracy')
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', factor=0.5, patience=15, min_lr=1e-9)

# Train the model with increased epochs
history = model.fit(
    train_generator,
    epochs=100,  # Increased epochs
    validation_data=validation_generator,
    callbacks=[early_stopping, model_checkpoint, reduce_lr]
)

# Save the final model
model.save('final_model.keras')

# Evaluate the model on the validation set
val_loss, val_accuracy = model.evaluate(validation_generator)
print(f"Validation Accuracy: {val_accuracy * 100:.2f}%")

# Plot learning cur

# Plot training & validation accuracy values
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

# Plot training & validation loss values
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

Found 1280 images belonging to 2 classes.
Found 320 images belonging to 2 classes.
Class Indices: {'fake_train': 0, 'real_train': 1}


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/100


  self._warn_if_super_not_called()
I0000 00:00:1728477845.129088      94 service.cc:145] XLA service 0x784e24007bd0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1728477845.129158      94 service.cc:153]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
I0000 00:00:1728477845.129164      94 service.cc:153]   StreamExecutor device (1): Tesla T4, Compute Capability 7.5
2024-10-09 12:44:23.404476: E external/local_xla/xla/service/slow_operation_alarm.cc:65] Trying algorithm eng0{} for conv (f32[32,32,311,311]{3,2,1,0}, u8[0]{0}) custom-call(f32[32,64,309,309]{3,2,1,0}, f32[64,32,3,3]{3,2,1,0}), window={size=3x3}, dim_labels=bf01_oi01->bf01, custom_call_target="__cudnn$convBackwardInput", backend_config={"operation_queue_id":"0","wait_on_operation_queues":[],"cudnn_conv_backend_config":{"conv_result_scale":1,"activation_mode":"kNone","side_input_scale":0,"leakyrelu_alpha":0}} is taking a while...
2024-10-09 12:44:23.599700: 

[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m152s[0m 2s/step - accuracy: 0.5716 - loss: 2.8011 - val_accuracy: 0.5000 - val_loss: 21.1589 - learning_rate: 1.0000e-04
Epoch 2/100
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m110s[0m 2s/step - accuracy: 0.7123 - loss: 0.5741 - val_accuracy: 0.5000 - val_loss: 43.6741 - learning_rate: 1.0000e-04
Epoch 3/100
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m109s[0m 2s/step - accuracy: 0.7382 - loss: 0.5370 - val_accuracy: 0.5000 - val_loss: 63.0798 - learning_rate: 1.0000e-04
Epoch 4/100
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m106s[0m 2s/step - accuracy: 0.7621 - loss: 0.5105 - val_accuracy: 0.5000 - val_loss: 79.5977 - learning_rate: 1.0000e-04
Epoch 5/100
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m105s[0m 2s/step - accuracy: 0.7924 - loss: 0.4353 - val_accuracy: 0.5000 - val_loss: 87.1505 - learning_rate: 1.0000e-04
Epoch 6/100
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[

NameError: name 'ves' is not defined

In [3]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import re  # Import regex module

# Set image size
IMG_SIZE = 624

# Load the saved model
model = tf.keras.models.load_model('final_model.keras')

# Define the path to the test images directory
test_images_dir = '/kaggle/input/wec-intelligence-sig-2024-recruitment-task-cv/test_images/test_images'

# List all image files in the test directory
image_files = os.listdir(test_images_dir)

# Prepare a list to hold the images and their corresponding IDs
images = []
ids = []

for filename in image_files:
    # Use regex to extract the numeric ID from the filename
    match = re.search(r'(\d+)', filename)
    if match:
        image_id = int(match.group(1))  # Get the numeric ID
        ids.append(image_id)

        # Load the image
        img_path = os.path.join(test_images_dir, filename)
        img = load_img(img_path, target_size=(IMG_SIZE, IMG_SIZE))
        img_array = img_to_array(img)  # Convert image to array
        img_array = img_array / 255.0  # Rescale pixel values to [0, 1]
        
        images.append(img_array)

# Convert the list of images to a NumPy array
images_array = np.array(images)

# Make predictions on the test set
predictions = model.predict(images_array)

# Convert probabilities to class labels (0 for fake, 1 for real)
predicted_classes = (predictions > 0.5).astype(int).flatten()

# Since 'fake_train' is 0 and 'real_train' is 1, we need to reverse this for submission
# This will map 'fake' (0) to 1 and 'real' (1) to 0
mapped_classes = 1 - predicted_classes

# Map predicted classes to the desired output format
submission_df = pd.DataFrame({
    'ID': ids,
    'TARGET': mapped_classes
})

# Save to CSV
submission_df.to_csv('submission.csv', index=False)

print("Submission file created successfully!")


[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 120ms/step
Submission file created successfully!


In [6]:
import os
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam

# Set image size and batch size
IMG_SIZE = 224
BATCH_SIZE = 32  # Moderate batch size
LEARNING_RATE = 0.0001  # Adjusted learning rate
DROPOUT_RATE = 0.4  # Increased dropout for stronger regularization

# Data augmentation for training set (slightly reduced for faster training)
train_datagen = ImageDataGenerator(
    rescale=1./255,
    horizontal_flip=True,
    rotation_range=15,  # Further reduced rotation range
    zoom_range=0.15,    # Further reduced zoom
    width_shift_range=0.1,
    height_shift_range=0.1,
    validation_split=0.2  # Reserve 20% of the data for validation
)

# Data generator for validation set (no augmentation, just rescaling)
val_datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)

# Load dataset with directory structure and create train/validation splits
train_generator = train_datagen.flow_from_directory(
    '/kaggle/input/wec-intelligence-sig-2024-recruitment-task-cv/train_images/train_images',
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='binary',
    subset='training'
)

validation_generator = val_datagen.flow_from_directory(
    '/kaggle/input/wec-intelligence-sig-2024-recruitment-task-cv/train_images/train_images',
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='binary',
    subset='validation'
)

# Define CNN model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(IMG_SIZE, IMG_SIZE, 3)),
    MaxPooling2D(pool_size=(2, 2)),

    Conv2D(85, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),

    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),

    Flatten(),
    Dense(128, activation='relu'),
    Dropout(DROPOUT_RATE),  # Increased dropout
    Dense(1, activation='sigmoid')  # Binary classification
])

# Compile the model
model.compile(optimizer=Adam(learning_rate=LEARNING_RATE),
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Callbacks for early stopping, model saving, and learning rate reduction
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=5, restore_best_weights=True)
model_checkpoint = tf.keras.callbacks.ModelCheckpoint('best_model.keras', save_best_only=True, monitor='val_accuracy')

# Reduce learning rate when a metric has stopped improving
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', factor=0.5, patience=3, min_lr=1e-9)

# Train the model
history = model.fit(
    train_generator,
    epochs=50,
    validation_data=validation_generator,
    callbacks=[early_stopping, model_checkpoint, reduce_lr]
)

# Save the final model
model.save('final_model.keras')

# Evaluate the model on the validation set
val_loss, val_accuracy = model.evaluate(validation_generator)
print(f"Validation Accuracy: {val_accuracy * 100:.2f}%")


Found 1280 images belonging to 2 classes.
Found 320 images belonging to 2 classes.
Epoch 1/50
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 406ms/step - accuracy: 0.5084 - loss: 0.7167 - val_accuracy: 0.4969 - val_loss: 0.6925 - learning_rate: 1.0000e-04
Epoch 2/50
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 404ms/step - accuracy: 0.5298 - loss: 0.6922 - val_accuracy: 0.5531 - val_loss: 0.6904 - learning_rate: 1.0000e-04
Epoch 3/50
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 402ms/step - accuracy: 0.5150 - loss: 0.6893 - val_accuracy: 0.6094 - val_loss: 0.6769 - learning_rate: 1.0000e-04
Epoch 4/50
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 385ms/step - accuracy: 0.5226 - loss: 0.6895 - val_accuracy: 0.5906 - val_loss: 0.6616 - learning_rate: 1.0000e-04
Epoch 5/50
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 417ms/step - accuracy: 0.6214 - loss: 0.6701 - val_accuracy: 0.6313 - val_los

In [10]:
import os
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam

# Set image size and batch size
IMG_SIZE = 224
BATCH_SIZE = 32  # Moderate batch size
LEARNING_RATE = 0.0001  # Adjusted learning rate
DROPOUT_RATE = 0.5  # Increased dropout for regularization

# Data augmentation for training set (slightly reduced for faster training)
train_datagen = ImageDataGenerator(
    rescale=1./255,
    horizontal_flip=True,
    rotation_range=15,
    zoom_range=0.15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    validation_split=0.2  # Reserve 20% of the data for validation
)

# Data generator for validation set (no augmentation, just rescaling)
val_datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)

# Load dataset with directory structure and create train/validation splits
train_generator = train_datagen.flow_from_directory(
    '/kaggle/input/wec-intelligence-sig-2024-recruitment-task-cv/train_images/train_images',
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='binary',
    subset='training'
)

validation_generator = val_datagen.flow_from_directory(
    '/kaggle/input/wec-intelligence-sig-2024-recruitment-task-cv/train_images/train_images',
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='binary',
    subset='validation'
)

print("Class Indices:", train_generator.class_indices)  # Expected output: {'real': 0, 'fake': 1}

# Define a more complex CNN model
model = Sequential([
    Conv2D(64, (3, 3), activation='relu', input_shape=(IMG_SIZE, IMG_SIZE, 3)),  # Increased filters
    MaxPooling2D(pool_size=(2, 2)),

    Conv2D(128, (3, 3), activation='relu'),  # Increased filters
    MaxPooling2D(pool_size=(2, 2)),

    Conv2D(256, (3, 3), activation='relu'),  # Further increased filters
    MaxPooling2D(pool_size=(2, 2)),

    Conv2D(512, (3, 3), activation='relu'),  # Added another convolutional layer with high filters
    MaxPooling2D(pool_size=(2, 2)),

    Flatten(),
    Dense(256, activation='relu'),  # Increased units in dense layer
    Dropout(DROPOUT_RATE),  # Increased dropout for stronger regularization
    Dense(128, activation='relu'),  # Added another fully connected layer
    Dropout(DROPOUT_RATE),
    Dense(1, activation='sigmoid')  # Binary classification
])

# Compile the model with the Adam optimizer
model.compile(optimizer=Adam(learning_rate=LEARNING_RATE),
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Callbacks for early stopping, model saving, and learning rate reduction
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=5, restore_best_weights=True)
model_checkpoint = tf.keras.callbacks.ModelCheckpoint('best_model.keras', save_best_only=True, monitor='val_accuracy')

# Reduce learning rate when a metric has stopped improving
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', factor=0.5, patience=3, min_lr=1e-9)

# Train the model
history = model.fit(
    train_generator,
    epochs=50,
    validation_data=validation_generator,
    callbacks=[early_stopping, model_checkpoint, reduce_lr]
)

# Save the final model
model.save('final_model.keras')

# Evaluate the model on the validation set
val_loss, val_accuracy = model.evaluate(validation_generator)
print(f"Validation Accuracy: {val_accuracy * 100:.2f}%")


Found 1280 images belonging to 2 classes.
Found 320 images belonging to 2 classes.
Class Indices: {'fake_train': 0, 'real_train': 1}
Epoch 1/50
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 438ms/step - accuracy: 0.4943 - loss: 0.6993 - val_accuracy: 0.5250 - val_loss: 0.6927 - learning_rate: 1.0000e-04
Epoch 2/50
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 402ms/step - accuracy: 0.5179 - loss: 0.6927 - val_accuracy: 0.5000 - val_loss: 0.6921 - learning_rate: 1.0000e-04
Epoch 3/50
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 406ms/step - accuracy: 0.4878 - loss: 0.6936 - val_accuracy: 0.5063 - val_loss: 0.6845 - learning_rate: 1.0000e-04
Epoch 4/50
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 405ms/step - accuracy: 0.5403 - loss: 0.6911 - val_accuracy: 0.5000 - val_loss: 0.6751 - learning_rate: 1.0000e-04
Epoch 5/50
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 404ms/step - accuracy: 0.51

In [18]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import re  # Import regex module

# Set image size
IMG_SIZE = 224

# Load the saved model
model = tf.keras.models.load_model('final_model.keras')

# Define the path to the test images directory
test_images_dir = '/kaggle/input/wec-intelligence-sig-2024-recruitment-task-cv/test_images/test_images'

# List all image files in the test directory
image_files = os.listdir(test_images_dir)

# Prepare a list to hold the images and their corresponding IDs
images = []
ids = []

for filename in image_files:
    # Use regex to extract the numeric ID from the filename
    match = re.search(r'(\d+)', filename)
    if match:
        image_id = int(match.group(1))  # Get the numeric ID
        ids.append(image_id)

        # Load the image
        img_path = os.path.join(test_images_dir, filename)
        img = load_img(img_path, target_size=(IMG_SIZE, IMG_SIZE))
        img_array = img_to_array(img)  # Convert image to array
        img_array = img_array / 255.0  # Rescale pixel values to [0, 1]
        
        images.append(img_array)

# Convert the list of images to a NumPy array
images_array = np.array(images)

# Make predictions on the test set
predictions = model.predict(images_array)

# Convert probabilities to class labels (0 for fake, 1 for real)
predicted_classes = (predictions > 0.5).astype(int).flatten()

# Since 'fake_train' is 0 and 'real_train' is 1, we need to reverse this for submission
# This will map 'fake' (0) to 1 and 'real' (1) to 0
mapped_classes = 1 - predicted_classes

# Map predicted classes to the desired output format
submission_df = pd.DataFrame({
    'ID': ids,
    'TARGET': mapped_classes
})

# Save to CSV
submission_df.to_csv('submission.csv', index=False)

print("Submission file created successfully!")


[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 236ms/step
Submission file created successfully!


In [7]:
import os
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam

# Set image size and batch size
IMG_SIZE = 224
BATCH_SIZE = 32  # Moderate batch size
LEARNING_RATE = 0.0001  # Adjusted learning rate
DROPOUT_RATE = 0.4  # Increased dropout for stronger regularization

# Data augmentation for training set (slightly reduced for faster training)
train_datagen = ImageDataGenerator(
    rescale=1./255,
    horizontal_flip=True,
    rotation_range=15,  # Further reduced rotation range
    zoom_range=0.15,    # Further reduced zoom
    width_shift_range=0.1,
    height_shift_range=0.1,
    validation_split=0.2  # Reserve 20% of the data for validation
)

# Data generator for validation set (no augmentation, just rescaling)
val_datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)

# Load dataset with directory structure and create train/validation splits
train_generator = train_datagen.flow_from_directory(
    '/kaggle/input/wec-intelligence-sig-2024-recruitment-task-cv/train_images/train_images',
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='binary',
    subset='training'
)

validation_generator = val_datagen.flow_from_directory(
    '/kaggle/input/wec-intelligence-sig-2024-recruitment-task-cv/train_images/train_images',
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='binary',
    subset='validation'
)

# Check that class indices are correct
print("Class Indices:", train_generator.class_indices)  # Expected output: {'real': 0, 'fake': 1}

# Define CNN model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(IMG_SIZE, IMG_SIZE, 3)),
    MaxPooling2D(pool_size=(2, 2)),

    Conv2D(85, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),

    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),

    Flatten(),
    Dense(128, activation='relu'),
    Dropout(DROPOUT_RATE),  # Increased dropout
    Dense(1, activation='sigmoid')  # Binary classification
])

# Compile the model
model.compile(optimizer=Adam(learning_rate=LEARNING_RATE),
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Callbacks for early stopping, model saving, and learning rate reduction
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=5, restore_best_weights=True)
model_checkpoint = tf.keras.callbacks.ModelCheckpoint('best_model.keras', save_best_only=True, monitor='val_accuracy')

# Reduce learning rate when a metric has stopped improving
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', factor=0.5, patience=3, min_lr=1e-9)

# Train the model
history = model.fit(
    train_generator,
    epochs=50,
    validation_data=validation_generator,
    callbacks=[early_stopping, model_checkpoint, reduce_lr]
)

# Save the final model
model.save('final_model.keras')

# Evaluate the model on the validation set
val_loss, val_accuracy = model.evaluate(validation_generator)
print(f"Validation Accuracy: {val_accuracy * 100:.2f}%")


Found 1280 images belonging to 2 classes.
Found 320 images belonging to 2 classes.
Class Indices: {'fake_train': 0, 'real_train': 1}
Epoch 1/50
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 438ms/step - accuracy: 0.4785 - loss: 0.7090 - val_accuracy: 0.5063 - val_loss: 0.6889 - learning_rate: 1.0000e-04
Epoch 2/50
[1m 3/40[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m16s[0m 435ms/step - accuracy: 0.4479 - loss: 0.6981

KeyboardInterrupt: 

In [None]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array

# Load your trained model
model = tf.keras.models.load_model('final_model.keras')

# Set image size and batch size for the test set
IMG_SIZE = 224
BATCH_SIZE = 32

# Path to the test images
test_dir = '/kaggle/input/wec-intelligence-sig-2024-recruitment-task-cv/test_images/test_images/'

# Prepare the test data generator (no augmentation, only rescaling)
test_datagen = ImageDataGenerator(rescale=1./255)

# Load test images and create a generator
test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode=None,
    shuffle=False
)

# Make predictions
predictions = model.predict(test_generator)

# Convert predictions to binary values (0 or 1)
predicted_classes = (predictions > 0.5).astype(int)

# Get image IDs from the filenames
image_ids = test_generator.filenames

# Create a DataFrame for submission
submission_df = pd.DataFrame({
    'ID': [os.path.splitext(os.path.basename(img))[0] for img in image_ids],
    'TARGET': predicted_classes.flatten()
})

# Save to submission.csv
submission_df.to_csv('submission.csv', index=False)

print(submission_df.head())
