In [1]:
import numpy as np
import pandas as pd
import sklearn
import tensorflow as tf


In [2]:
train = pd.read_csv('data/train.csv')
test = pd.read_csv('data/test.csv')

In [3]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
from tensorflow import keras


train = pd.read_csv("data/train.csv") 
test  = pd.read_csv("data/test.csv")  

train['file_name'] = train['file_name'].apply(lambda x: os.path.join('data', x))
if 'file_name' in test.columns:
    test['file_name'] = test['file_name'].apply(lambda x: os.path.join('data', x))
else:
    # test CSV has column 'id' instead
    test['file_name'] = test['id'].apply(lambda x: os.path.join('data', 'test_data_v2', x))


train_df, val_df = train_test_split(
    train, 
    test_size=0.2, 
    random_state=42, 
    stratify=train['label']
)

def create_generators(train_df, val_df, test_df):
    train_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    rotation_range=10,
    width_shift_range=0.05,
    height_shift_range=0.05,
    zoom_range=0.1,
    horizontal_flip=True
    )
    test_val_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

    train_generator = train_datagen.flow_from_dataframe(
        dataframe=train_df,
        x_col='file_name',
        y_col='label',
        target_size=(224, 224),
        class_mode='raw',
        batch_size=32,
        shuffle=True
    )
    val_generator = test_val_datagen.flow_from_dataframe(
        dataframe=val_df,
        x_col='file_name',
        y_col='label',
        target_size=(224, 224),
        class_mode='raw',
        batch_size=32,
        shuffle=False
    )
    test_generator = test_val_datagen.flow_from_dataframe(
        dataframe=test_df,
        x_col='file_name',
        y_col=None,
        target_size=(224, 224),
        class_mode=None,
        batch_size=32,
        shuffle=False
    )

    return train_generator, val_generator, test_generator

train_gen, val_gen, test_gen = create_generators(train_df, val_df, test)

Found 63960 validated image filenames.
Found 15990 validated image filenames.
Found 0 validated image filenames.




In [4]:
from tensorflow.keras import regularizers
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

base_model = MobileNetV2(
    weights='imagenet',
    include_top=False,
    input_shape=(224, 224, 3)
)
base_model.trainable = True 

for layer in base_model.layers[:-60]:  
    layer.trainable = False

model = models.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(128, activation='relu', kernel_regularizer=regularizers.l2(1e-4)),
    layers.BatchNormalization(),
    layers.Dropout(0.5),
    layers.Dense(1, activation='sigmoid')
])

model.compile(
    optimizer=optimizers.Adam(learning_rate=1e-5),
    loss='binary_crossentropy',
    metrics=['accuracy']
)


history = model.fit(
    train_gen,
    epochs=10,
    validation_data=val_gen,
    verbose=1
)

2025-12-04 17:19:07.438767: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M4
2025-12-04 17:19:07.438950: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2025-12-04 17:19:07.438961: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.33 GB
2025-12-04 17:19:07.439150: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2025-12-04 17:19:07.439166: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Epoch 1/10


2025-12-04 17:19:10.278904: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


[1m1999/1999[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m489s[0m 241ms/step - accuracy: 0.8477 - loss: 0.3759 - val_accuracy: 0.8996 - val_loss: 0.2803
Epoch 2/10
[1m1999/1999[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m477s[0m 238ms/step - accuracy: 0.9366 - loss: 0.1849 - val_accuracy: 0.9615 - val_loss: 0.1219
Epoch 3/10
[1m1999/1999[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m471s[0m 235ms/step - accuracy: 0.9550 - loss: 0.1395 - val_accuracy: 0.9675 - val_loss: 0.1095
Epoch 4/10
[1m1999/1999[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m885s[0m 442ms/step - accuracy: 0.9644 - loss: 0.1158 - val_accuracy: 0.9684 - val_loss: 0.1075
Epoch 5/10
[1m1999/1999[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m465s[0m 233ms/step - accuracy: 0.9703 - loss: 0.1003 - val_accuracy: 0.9664 - val_loss: 0.1155
Epoch 6/10
[1m1999/1999[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m466s[0m 233ms/step - accuracy: 0.9742 - loss: 0.0899 - val_accuracy: 0.9795 - val_loss: 0.0792
Epo

In [None]:
val_loss, val_acc = model.evaluate(val_gen, verbose=0)
print('Initial validation loss', val_loss)
print('Initial validation accuracy', val_acc)



Initial validation loss 0.10255365073680878
Initial validation accuracy 0.9709818363189697
