In [63]:
# System & ML imports
import os
import sys
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

# Ensure src/ folder is in Python path
sys.path.append(os.path.abspath("../src"))

# Import model builder
from model import build_model

# Dataset path
DATASET_PATH = "../data/raw/Indian/"

# Image settings
IMG_SIZE = (224, 224)
BATCH_SIZE = 32

In [64]:
# Collect all file paths and labels
file_paths = []
labels = []

for label in os.listdir(DATASET_PATH):
    class_path = os.path.join(DATASET_PATH, label)
    if os.path.isdir(class_path):
        for img in os.listdir(class_path):
            file_paths.append(os.path.join(class_path, img))
            labels.append(label)

# Create DataFrame
df = pd.DataFrame({
    "file_path": file_paths,
    "label": labels
})

print("Total images:", len(df))
print("Total classes:", df["label"].nunique())
df.head()


Total images: 42745
Total classes: 35


Unnamed: 0,file_path,label
0,../data/raw/Indian/1\0.jpg,1
1,../data/raw/Indian/1\1.jpg,1
2,../data/raw/Indian/1\10.jpg,1
3,../data/raw/Indian/1\100.jpg,1
4,../data/raw/Indian/1\1000.jpg,1


In [65]:
# Split dataset
train_df, val_df = train_test_split(
    df,
    test_size=0.2,
    stratify=df["label"],
    random_state=42
)

print("Train samples:", len(train_df))
print("Validation samples:", len(val_df))


Train samples: 34196
Validation samples: 8549


In [66]:
# Normalization & augmentation for training
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1
)

val_datagen = ImageDataGenerator(rescale=1./255)

# Flow from dataframe
train_gen = train_datagen.flow_from_dataframe(
    train_df,
    x_col="file_path",
    y_col="label",
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="categorical",
    shuffle=True
)

val_gen = val_datagen.flow_from_dataframe(
    val_df,
    x_col="file_path",
    y_col="label",
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="categorical",
    shuffle=False
)

print("Train batches:", len(train_gen))
print("Validation batches:", len(val_gen))
print("Classes:", len(train_gen.class_indices))


Found 34196 validated image filenames belonging to 35 classes.
Found 8549 validated image filenames belonging to 35 classes.
Train batches: 1069
Validation batches: 268
Classes: 35


In [67]:
# Build MobileNetV2 model
NUM_CLASSES = len(train_gen.class_indices)
model = build_model(NUM_CLASSES)

# Show summary
model.summary()


Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 inception_resnet_v2 (Funct  (None, 8, 8, 1536)        54336736  
 ional)                                                          
                                                                 
 global_average_pooling2d_3  (None, 1536)              0         
  (GlobalAveragePooling2D)                                       
                                                                 
 dense_6 (Dense)             (None, 128)               196736    
                                                                 
 batch_normalization_815 (B  (None, 128)               512       
 atchNormalization)                                              
                                                                 
 dropout_3 (Dropout)         (None, 128)               0         
                                                      

In [68]:
checkpoint = ModelCheckpoint(
    "../models/classify_model.h5",
    monitor="val_loss",
    save_best_only=True
)

early_stop = EarlyStopping(
    monitor="val_loss",
    patience=5,
    restore_best_weights=True
)


In [69]:
# Train the model
history = model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=1,   # Only first epoch
    callbacks=[checkpoint, early_stop]
)


RuntimeError: You must compile your model before training/testing. Use `model.compile(optimizer, loss)`.