In [1]:
# System & ML imports
import os
import sys
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

# Ensure src/ folder is in Python path
sys.path.append(os.path.abspath("../src"))

# Import model builder
from model import build_model

# Dataset path
DATASET_PATH = "../data/raw/Indian/"

# Image settings
IMG_SIZE = (224, 224)
BATCH_SIZE = 32

In [2]:
# Collect all file paths and labels
file_paths = []
labels = []

for label in os.listdir(DATASET_PATH):
    class_path = os.path.join(DATASET_PATH, label)
    if os.path.isdir(class_path):
        for img in os.listdir(class_path):
            file_paths.append(os.path.join(class_path, img))
            labels.append(label)

# Create DataFrame
df = pd.DataFrame({
    "file_path": file_paths,
    "label": labels
})

print("Total images:", len(df))
print("Total classes:", df["label"].nunique())
df.head()


Total images: 42745
Total classes: 35


Unnamed: 0,file_path,label
0,../data/raw/Indian/1\0.jpg,1
1,../data/raw/Indian/1\1.jpg,1
2,../data/raw/Indian/1\10.jpg,1
3,../data/raw/Indian/1\100.jpg,1
4,../data/raw/Indian/1\1000.jpg,1


In [3]:
# Split dataset
train_df, val_df = train_test_split(
    df,
    test_size=0.2,
    stratify=df["label"],
    random_state=42
)

print("Train samples:", len(train_df))
print("Validation samples:", len(val_df))


Train samples: 34196
Validation samples: 8549


In [4]:
# Normalization & augmentation for training
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1
)

val_datagen = ImageDataGenerator(rescale=1./255)

# Flow from dataframe
train_gen = train_datagen.flow_from_dataframe(
    train_df,
    x_col="file_path",
    y_col="label",
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="categorical",
    shuffle=True
)

val_gen = val_datagen.flow_from_dataframe(
    val_df,
    x_col="file_path",
    y_col="label",
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="categorical",
    shuffle=False
)

print("Train batches:", len(train_gen))
print("Validation batches:", len(val_gen))
print("Classes:", len(train_gen.class_indices))


Found 34196 validated image filenames belonging to 35 classes.
Found 8549 validated image filenames belonging to 35 classes.
Train batches: 1069
Validation batches: 268
Classes: 35


In [5]:
# Build MobileNetV2 model
NUM_CLASSES = len(train_gen.class_indices)
model = build_model(NUM_CLASSES)

# Show summary
model.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 inception_resnet_v2 (Funct  (None, 8, 8, 1536)        54336736  
 ional)                                                          
                                                                 
 global_average_pooling2d (  (None, 1536)              0         
 GlobalAveragePooling2D)                                         
                                                                 
 dense (Dense)               (None, 128)               196736    
                                                                 
 batch_normalization_203 (B  (None, 128)               512       
 atchNormalization)                                              
                                                                 
 dropout (Dropout)           (None, 128)               0         
                                                        

In [10]:
from tensorflow.keras.callbacks import EarlyStopping

early_stop = EarlyStopping(
    monitor="val_loss",
    patience=5,
    restore_best_weights=True
)


In [11]:
# Train the model (Epoch 1)
history = model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=1,
    callbacks=[early_stop]
)




In [12]:
# Save in modern Keras format (SAFE for InceptionResNetV2)
model.save("../models/classify_model.keras")


NameError: name 'cv2' is not defined

In [13]:
from tensorflow.keras.models import load_model

test_model = load_model("../models/classify_model.keras")
test_model.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 inception_resnet_v2 (Funct  (None, 8, 8, 1536)        54336736  
 ional)                                                          
                                                                 
 global_average_pooling2d (  (None, 1536)              0         
 GlobalAveragePooling2D)                                         
                                                                 
 dense (Dense)               (None, 128)               196736    
                                                                 
 batch_normalization_203 (B  (None, 128)               512       
 atchNormalization)                                              
                                                                 
 dropout (Dropout)           (None, 128)               0         
                                                        

In [15]:
import json
import os

os.makedirs("../models", exist_ok=True)

class_indices = train_gen.class_indices

with open("../models/class_indices.json", "w") as f:
    json.dump(class_indices, f)

print("Saved class indices:", class_indices)


Saved class indices: {'1': 0, '2': 1, '3': 2, '4': 3, '5': 4, '6': 5, '7': 6, '8': 7, '9': 8, 'A': 9, 'B': 10, 'C': 11, 'D': 12, 'E': 13, 'F': 14, 'G': 15, 'H': 16, 'I': 17, 'J': 18, 'K': 19, 'L': 20, 'M': 21, 'N': 22, 'O': 23, 'P': 24, 'Q': 25, 'R': 26, 'S': 27, 'T': 28, 'U': 29, 'V': 30, 'W': 31, 'X': 32, 'Y': 33, 'Z': 34}
