In [1]:
# rf_roman_model.py

import os
import numpy as np
import re
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import joblib

# Parameters
img_height, img_width = 28, 28
train_dir = "Roman Numbers/train"
val_dir = "Roman Numbers/val"
test_dir = "Roman Numbers/test"

def extract_numeric_label(folder_name):
    match = re.search(r'\d+', folder_name)
    return int(match.group()) - 1  # Convert Roman class 1-10 to labels 0-9

def load_images_from_folder(folder):
    images = []
    labels = []
    for label_folder in os.listdir(folder):
        class_dir = os.path.join(folder, label_folder)
        if not os.path.isdir(class_dir):
            continue
        label = extract_numeric_label(label_folder)
        for file in os.listdir(class_dir):
            img_path = os.path.join(class_dir, file)
            try:
                img = load_img(img_path, color_mode="grayscale", target_size=(img_height, img_width))
                img_array = img_to_array(img).reshape(-1) / 255.0  # Flattened & normalized
                images.append(img_array)
                labels.append(label)
            except Exception as e:
                print(f"⚠️ Skipping file {img_path}: {e}")
    return np.array(images), np.array(labels)

# Load data
x_train, y_train = load_images_from_folder(train_dir)
x_val, y_val = load_images_from_folder(val_dir)
x_test, y_test = load_images_from_folder(test_dir)

# Combine train and val for better training
x_train_full = np.concatenate([x_train, x_val], axis=0)
y_train_full = np.concatenate([y_train, y_val], axis=0)

# Train Random Forest
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(x_train_full, y_train_full)

# Evaluate
y_pred = rf_model.predict(x_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Roman Random Forest Accuracy: {accuracy:.4f}")

# Save model
os.makedirs("models", exist_ok=True)
joblib.dump(rf_model, "models/roman_rf_model.pkl")


⚠️ Skipping file Roman Numbers/train\6\desktop.ini: cannot identify image file <_io.BytesIO object at 0x000002D71A5C9670>
Roman Random Forest Accuracy: 0.9675


['models/roman_rf_model.pkl']