In [1]:
# rf_hindi_model.py

import os
import numpy as np
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import joblib
import re

# Parameters
img_height, img_width = 28, 28
train_dir = "Hindi/train"
test_dir = "Hindi/test"

def extract_numeric_label(folder_name):
    match = re.search(r'\d+', folder_name)
    return int(match.group()) if match else -1

def load_images_from_folder(folder):
    images = []
    labels = []
    for label_folder in os.listdir(folder):
        class_dir = os.path.join(folder, label_folder)
        if not os.path.isdir(class_dir):
            continue
        label = extract_numeric_label(label_folder)
        for file in os.listdir(class_dir):
            img_path = os.path.join(class_dir, file)
            img = load_img(img_path, color_mode="grayscale", target_size=(img_height, img_width))
            img_array = img_to_array(img).reshape(-1) / 255.0
            images.append(img_array)
            labels.append(label)
    return np.array(images), np.array(labels)

# Load data
x_train, y_train = load_images_from_folder(train_dir)
x_test, y_test = load_images_from_folder(test_dir)

# Train Random Forest
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(x_train, y_train)

# Evaluate
y_pred = rf_model.predict(x_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Hindi Random Forest Accuracy: {accuracy:.4f}")

# Save model
os.makedirs("models", exist_ok=True)
joblib.dump(rf_model, "models/hindi_rf_model.pkl")


Hindi Random Forest Accuracy: 0.9803


['models/hindi_rf_model.pkl']