In [None]:
import os
import math
import warnings
from datetime import datetime
from collections import Counter, defaultdict
import cv2
import numpy as np
import mediapipe as mp
import joblib
import arabic_reshaper
from bidi.algorithm import get_display
import imgaug.augmenters as iaa
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from PyQt5.QtWidgets import QApplication, QLabel, QVBoxLayout, QWidget
from PyQt5.QtGui import QImage, QPainter, QFont, QColor, QFontMetrics
from PyQt5.QtCore import Qt
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import StratifiedKFold, cross_val_score, GridSearchCV, train_test_split
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

# constants
VALID_EXTENSIONS=('.jpg', '.jpeg', '.png')
TRAIN_DATASET_PATH="add path here"
TEST_DATASET_PATH="add path here"
DATASET_TAGS=["original_images", "complete_UAlpha40", "original_plus_custom_aug"]

# mediapipe initialization
mp_hands=mp.solutions.hands
hands=mp_hands.Hands(static_image_mode=True,max_num_hands=1,min_detection_confidence=0.5,min_tracking_confidence=0.5)

# feature groups for using in feature groups ablation study
FEATURE_GROUPS={
    'pairwise distances': list(range(0, 210)),
    'joint angles': list(range(210, 225)),
    'hand position size': list(range(225, 231)),
    'hand orientation': list(range(231, 236)),
    'finger spread': list(range(236, 246)),
    'finger curvature': list(range(246, 251)),
    'length ratios': list(range(251, 261)),
    'palm relative': list(range(261, 276))
}

# for feature vector size ablation study
FEATURE_COUNTS=[10,20,30,40,50,60,70,80,90,100,110,120,130,140,150,160,170,180,190,200,210,220,230,240,250,260,270,276]

# augmentations
augmentations = iaa.Sequential([
    iaa.Sometimes(0.8,iaa.Affine(
        rotate=(-25,25),
        scale=(0.9,1.1),
        shear=(-10,10),
        translate_percent={"x":(-0.1,0.1), "y":(-0.1,0.1)}
    )),
    iaa.Sometimes(0.5,iaa.PerspectiveTransform(scale=(0.01,0.05))),
    iaa.Sometimes(0.6,iaa.MultiplyAndAddToBrightness(mul=(0.8, 1.2), add=(-20,20))),
    iaa.Sometimes(0.4,iaa.GaussianBlur(sigma=(0,0.5))),
])

# for creating dynamic paths for model saving and loading
def get_model_paths(model_type, n_features, dataset_tag=None,base_dir="models"):
    model_dir = os.path.join(base_dir,model_type)
    os.makedirs(model_dir,exist_ok=True)
    prefix = f"{dataset_tag + '_' if dataset_tag else ''}{model_type}_k{n_features}"
    
    return {
        "model": os.path.join(model_dir,f"model_{prefix}.pkl"),
        "encoder": os.path.join(model_dir,f"encoder_{prefix}.pkl"),
        "scaler": os.path.join(model_dir,f"scaler_{prefix}.pkl"),
        "selector": os.path.join(model_dir,f"selector_{prefix}.pkl"),
    }

# simple helper function used in compute_features
def calculate_angle(a, b, c):
    ang = math.degrees(math.atan2(c[1]-b[1],c[0]-b[0]) - math.atan2(a[1]-b[1],a[0]-b[0]))
    return ang + 360 if ang < 0 else ang

# extract_features and compute_features are separate functions 
# extract_features uses this helper function on the loaded dataset
# live_prediction uses it on live user input
def compute_features(landmarks):
    # normalization of landmarks to wrist and scale
    wrist = landmarks[0]
    normalized = [(x - wrist[0], y - wrist[1], z - wrist[2]) for x,y,z in landmarks]
    
    hand_scale = max(math.sqrt(normalized[9][0]**2 + normalized[9][1]**2 + normalized[9][2]**2), 1e-6)
    normalized = [(x/hand_scale, y/hand_scale, z/hand_scale) for x, y, z in normalized]
    
    features = []
    
    # pairwise distances
    for i in range(21):
        for j in range(i+1, 21):
            dist = math.sqrt(
                (normalized[i][0] - normalized[j][0])**2 + 
                (normalized[i][1] - normalized[j][1])**2 + 
                (normalized[i][2] - normalized[j][2])**2
            )
            features.append(dist)
    
    # finger joint angles
    finger_joints = [
        [(0,1,2), (1,2,3), (2,3,4)],      # thumb
        [(0,5,6), (5,6,7), (6,7,8)],      # index
        [(0,9,10), (9,10,11), (10,11,12)],# middle
        [(0,13,14), (13,14,15), (14,15,16)],# ring
        [(0,17,18), (17,18,19), (18,19,20)] # pinky
    ]
    
    for finger in finger_joints:
        for a, b, c in finger:
            ang = math.degrees(
                math.atan2(normalized[c][1] - normalized[b][1], normalized[c][0] - normalized[b][0]) - 
                math.atan2(normalized[a][1] - normalized[b][1], normalized[a][0] - normalized[b][0])
            )
            features.append(ang + 360 if ang < 0 else ang)
    
    # bounding box/hand position size features
    x_cords = [p[0] for p in landmarks]
    y_cords = [p[1] for p in landmarks]
    z_cords = [p[2] for p in landmarks]
    
    features.extend([
        (min(x_cords) + max(x_cords)) / 2,
        (min(y_cords) + max(y_cords)) / 2,
        (min(z_cords) + max(z_cords)) / 2,
        max(x_cords) - min(x_cords),
        max(y_cords) - min(y_cords),
        max(z_cords) - min(z_cords)
    ])
    
    # hand orientation (normal vector)
    hand_normal = np.cross(
        np.array(normalized[5]) - np.array(normalized[17]),
        np.array(normalized[9]) - np.array(normalized[17])
    )
    norm_mag = np.linalg.norm(hand_normal)
    hand_normal = hand_normal / norm_mag if norm_mag > 1e-6 else np.array([0, 0, 0])
    features.extend(hand_normal)
    
    # pitch and roll
    features.extend([
        np.arctan2(hand_normal[1], np.sqrt(hand_normal[0]**2 + hand_normal[2]**2)),
        np.arctan2(hand_normal[0], hand_normal[2])
    ])
    
    # fingertip spread distances
    fingertips = [4, 8, 12, 16, 20]
    for i in range(len(fingertips)):
        for j in range(i+1, len(fingertips)):
            dist = math.sqrt(
                (normalized[fingertips[i]][0] - normalized[fingertips[j]][0])**2 + 
                (normalized[fingertips[i]][1] - normalized[fingertips[j]][1])**2 + 
                (normalized[fingertips[i]][2] - normalized[fingertips[j]][2])**2
            )
            features.append(dist)
    
    # finger curvatures
    for base, mid, tip in [(0,2,4), (0,6,8), (0,10,12), (0,14,16), (0,18,20)]:
        features.append(calculate_angle(normalized[base], normalized[mid], normalized[tip]))
    
    # finger length ratios
    finger_lengths = {
        'thumb': math.sqrt(normalized[4][0]**2 + normalized[4][1]**2 + normalized[4][2]**2),
        'index': math.sqrt(normalized[8][0]**2 + normalized[8][1]**2 + normalized[8][2]**2),
        'middle': math.sqrt(normalized[12][0]**2 + normalized[12][1]**2 + normalized[12][2]**2),
        'ring': math.sqrt(normalized[16][0]**2 + normalized[16][1]**2 + normalized[16][2]**2),
        'pinky': math.sqrt(normalized[20][0]**2 + normalized[20][1]**2 + normalized[20][2]**2)
    }
    
    names = ['thumb', 'index', 'middle', 'ring', 'pinky']
    for i in range(len(names)):
        for j in range(i+1, len(names)):
            features.append(finger_lengths[names[i]] / (finger_lengths[names[j]] + 1e-6))
    
    # fingertip positions relative to palm center
    palm_center = np.mean([normalized[i] for i in [0, 5, 9, 13, 17]], axis=0)
    for tip in fingertips:
        features.extend([
            normalized[tip][0] - palm_center[0],
            normalized[tip][1] - palm_center[1],
            normalized[tip][2] - palm_center[2]
        ])
    
    return np.array(features)


def extract_features(image_path, img_size=(224, 224), augment=False):
    img = cv2.imdecode(np.fromfile(str(image_path), dtype=np.uint8), cv2.IMREAD_COLOR)
    if img is None:
        return None
    
    if augment:
        img = augmentations.augment_image(img)
    
    img = cv2.resize(img, img_size)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    result = hands.process(img)

    if result.multi_hand_landmarks:
        for hand_landmarks in result.multi_hand_landmarks:
            landmarks = [(p.x, p.y, p.z) for p in hand_landmarks.landmark]
            return compute_features(landmarks)
    
    return None

# can specify limits on samples per class
# debug gives you details on how many images failed etc.
def load_dataset(dataset_path, limit_per_class=None, debug=False, augment=False):
    X_data, y_data = [], []
    class_stats = {}
    
    for label in os.listdir(dataset_path):
        label_path = os.path.join(dataset_path, label)
        if not os.path.isdir(label_path):
            continue
        
        count_loaded, count_failed = 0, 0
        image_paths = [
            os.path.join(label_path, img) 
            for img in os.listdir(label_path) 
            if img.lower().endswith(VALID_EXTENSIONS)
        ]
        
        if limit_per_class and len(image_paths) > limit_per_class:
            np.random.shuffle(image_paths)
            image_paths = image_paths[:limit_per_class]
        
        for img_path in image_paths:
            features = extract_features(img_path, augment=False)
            if features is not None:
                X_data.append(features)
                y_data.append(label)
                count_loaded += 1
            else:
                count_failed += 1
            # range value can be changed, higher = more augmentations applied
            if augment:
                for _ in range(4):
                    aug_features = extract_features(img_path, augment=True)
                    if aug_features is not None:
                        X_data.append(aug_features)
                        y_data.append(label)
                        count_loaded += 1
        
        class_stats[label] = (count_loaded, count_failed)
        if debug:
            print(f"{label}: Loaded {count_loaded}, Failed {count_failed}")
    
    if debug:
        total = sum(loaded for loaded, _ in class_stats.values())
        print(f"\nTotal samples: {total}")
        print(f"Classes: {len(class_stats)}")
    
    return np.array(X_data), np.array(y_data)


def train_model(X, y, model_type="svm", n_features=100, save_dir="models", dataset_tag=None):
    label_encoder = LabelEncoder()
    y = label_encoder.fit_transform(y)
    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    selector = SelectKBest(score_func=f_classif, k=n_features)
    X = selector.fit_transform(X, y)
    
    # model config
    model_configs = {
        "svm": {
            'param_grid': {
                'C': [1, 10, 20, 50, 100, 200, 500],
                'gamma': [0.001, 0.005, 0.008, 0.015, 0.02, 0.01, 0.05, 0.1],
                'kernel': ['rbf']
            },
            'base_model': SVC(probability=True, random_state=42, cache_size=1000)
        },
        "svm_poly": {
            'param_grid': {
                'C': [10, 50, 100, 200],
                'gamma': [0.001, 0.005, 0.01, 0.05],
                'kernel': ['poly'],
                'degree': [2, 3],
                'coef0': [0, 1]
            },
            'base_model': SVC(probability=True, random_state=42, cache_size=1000)
        },
        "random_forest": {
            'param_grid': {
                'n_estimators': [100, 200],
                'max_depth': [5, 10, 15],
                'min_samples_split': [10, 20],
                'min_samples_leaf': [5, 10],
                'max_features': ['sqrt', 'log2']
            },
            'base_model': RandomForestClassifier(random_state=42, n_jobs=1)
        },
        "logistic_regression": {
            'param_grid': {
                'C': [0.1, 1, 5, 10, 50, 100],
                'penalty': ['l2'],
                'solver': ['lbfgs']
            },
            'base_model': LogisticRegression(max_iter=1000, random_state=42)
        },
        "knn": {
            'param_grid': {
                'n_neighbors': [3, 5, 7, 9],
                'weights': ['uniform', 'distance'],
                'p': [1, 2]
            },
            'base_model': KNeighborsClassifier(n_jobs=1)
        }
    }
    
    config = model_configs[model_type]
    grid_search = GridSearchCV(
        config['base_model'],
        config['param_grid'],
        cv=StratifiedKFold(n_splits=5, shuffle=True, random_state=42),
        scoring='accuracy',
        verbose=1,
        n_jobs=-1
    )
    
    grid_search.fit(X, y)
    print(f"best parameters: {grid_search.best_params_}")
    print(f"best CV score: {grid_search.best_score_:.4f}")
    
    cv_scores = cross_val_score(
        grid_search.best_estimator_,
        X,
        y,
        cv=StratifiedKFold(n_splits=10, shuffle=True, random_state=42),
        scoring='accuracy'
    )
    print(f"mean CV accuracy: {cv_scores.mean():.4f} (+/- {cv_scores.std():.4f})")
    
    grid_search.best_estimator_.fit(X, y)
    
    paths = get_model_paths(model_type, n_features, dataset_tag=dataset_tag, base_dir=save_dir)
    joblib.dump(grid_search.best_estimator_, paths["model"])
    joblib.dump(label_encoder, paths["encoder"])
    joblib.dump(scaler, paths["scaler"])
    joblib.dump(selector, paths["selector"])
    print(f"model saved to {paths['model']}")
    
    return grid_search.best_estimator_, label_encoder, scaler, selector

def render_urdu_text(text, font_name="Noto Nastaliq Urdu", font_size=48):
    text1 = get_display(arabic_reshaper.reshape(text))
    font = QFont(font_name, font_size)
    image = QImage(300, 300, QImage.Format_RGB32)
    image.fill(Qt.transparent)
    
    painter = QPainter(image)
    painter.setRenderHint(QPainter.TextAntialiasing)
    painter.setFont(font)
    painter.setPen(Qt.green)
    
    fm = QFontMetrics(font)
    x = (image.width() - fm.horizontalAdvance(text1)) // 2
    y = (image.height() + (fm.ascent() - fm.descent())) // 2
    painter.drawText(x, y, text1)
    painter.end()
    
    ptr = image.bits()
    ptr.setsize(image.byteCount())
    arr = np.array(ptr).reshape(image.height(), image.width(), 4)
    
    return cv2.cvtColor(arr, cv2.COLOR_RGBA2BGR)


class FingerSpellingSystem:
    def __init__(self, conf_threshold=0.7, stability_frames=15, no_hand_frames=30):
        self.conf_threshold = conf_threshold
        self.stability_frames = stability_frames
        self.no_hand_frames = no_hand_frames
        self.reset_sentence()
    
    def reset_sentence(self):
        self.sentence = ""
        self.current_predictions = []
        self.stable_prediction = None
        self.stable_count = 0
        self.no_hand_count = 0
        self.last_added_letter = None
        self.word_complete = False
    
    def process_prediction(self, prediction, conf):
        self.no_hand_count = 0
        self.word_complete = False
        
        if conf >= self.conf_threshold:
            self.current_predictions.append(prediction)
            if len(self.current_predictions) > self.stability_frames:
                self.current_predictions.pop(0)
            
            if len(self.current_predictions) >= self.stability_frames:
                common_pred, common_count = Counter(self.current_predictions).most_common(1)[0]
                
                if common_count / len(self.current_predictions) >= 0.7:
                    if self.stable_prediction == common_pred:
                        self.stable_count += 1
                    else:
                        self.stable_prediction = common_pred
                        self.stable_count = 1
                    
                    self.stable_count = min(self.stable_count, self.stability_frames)
                    
                    if (self.stable_count >= self.stability_frames and 
                        self.stable_prediction != self.last_added_letter):
                        self.sentence += self.stable_prediction
                        self.last_added_letter = self.stable_prediction
                        self.stable_count = 0
        else:
            self.current_predictions = []
            self.stable_prediction = None
            self.stable_count = 0
    
    def process_no_hand(self):
        self.no_hand_count += 1
        self.current_predictions = []
        self.stable_prediction = None
        self.stable_count = 0
        self.last_added_letter = None
        
        if (self.no_hand_count >= self.no_hand_frames and 
            not self.word_complete and 
            self.sentence and 
            not self.sentence.endswith(" ")):
            self.sentence += " "
            self.word_complete = True
    
    def get_sentence(self):
        return self.sentence
    
    def get_status(self):
        return {
            'sentence': self.sentence,
            'current_letter': self.stable_prediction or "None",
            'stability': f"{self.stable_count}/{self.stability_frames}",
            'confidence_needed': self.conf_threshold,
            'predictions_buffer': len(self.current_predictions)
        }


def live_prediction(model, label_encoder, scaler, selector, conf_threshold=0.5):
    cap = cv2.VideoCapture(0)
    hands_tracking = mp_hands.Hands(static_image_mode=False,max_num_hands=1,min_detection_confidence=0.5,min_tracking_confidence=0.5)
    
    recent_predictions = []
    spelling_system = FingerSpellingSystem(conf_threshold=conf_threshold,stability_frames=15,no_hand_frames=60)
    
    app = QApplication([])
    window = QWidget()
    window.setWindowTitle('Urdu Sign Language Recognition')
    layout = QVBoxLayout()
    label = QLabel()
    label.setFont(QFont("Noto Nastaliq Urdu", 24))
    layout.addWidget(label)
    window.setLayout(layout)
    window.show()
    
    print(" press 'r' to reset and 'q' to quit ")
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        frame = cv2.flip(frame, 1)
        frame = cv2.resize(frame, (1280, 720))
        result = hands_tracking.process(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
        
        if result.multi_hand_landmarks:
            for hand_landmarks in result.multi_hand_landmarks:
                mp.solutions.drawing_utils.draw_landmarks(
                    frame, hand_landmarks, mp_hands.HAND_CONNECTIONS,
                    mp.solutions.drawing_utils.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=4),
                    mp.solutions.drawing_utils.DrawingSpec(color=(0, 0, 255), thickness=2)
                )
                
                landmarks = [(p.x, p.y, p.z) for p in hand_landmarks.landmark]
                features = compute_features(landmarks)
                features = selector.transform(scaler.transform(features.reshape(1, -1)))
                
                prediction = model.predict(features)[0]

                if hasattr(model, 'predict_proba'):
                    conf = (np.max(model.predict_proba(features)[0]))
                else:
                    conf = 0.5
                
                recent_predictions.append(prediction)
                if len(recent_predictions) > 5:
                    recent_predictions.pop(0)
                
                label_text = label_encoder.inverse_transform([Counter(recent_predictions).most_common(1)[0][0]])[0]
                
                spelling_system.process_prediction(label_text, conf)
                
                if conf >= conf_threshold:
                    urdu_img = render_urdu_text(label_text)
                    h, w = urdu_img.shape[:2]
                    if 20 + h < frame.shape[0] and 30 + w < frame.shape[1]:
                        frame[20:20+h, 30:30+w] = urdu_img
                    
                    status = spelling_system.get_status()
                    cv2.rectangle(frame, (350, 25), (1120, 60), (0, 0, 0), -1)
                    cv2.putText(
                        frame,
                        f"Confidence: {conf:.2f} | Stable: {status['stability']}",
                        (350, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 0), 2
                    )
                else:
                    cv2.rectangle(frame, (30, 0), (400, 80), (0, 0, 0), -1)
                    cv2.putText(
                        frame, "Low confidence",
                        (30, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 0, 255), 2
                    )
        else:
            spelling_system.process_no_hand()
            cv2.rectangle(frame, (30, 0), (450, 80), (0, 0, 0), -1)
            cv2.putText(
                frame, "No hand detected",
                (30, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 255, 255), 2
            )
        
        label.setText(spelling_system.get_sentence() or "[Start spelling...]")
        cv2.imshow("Urdu Sign Language Recognition", frame)
        
        key = cv2.waitKey(1) & 0xFF
        if key == ord('q'):
            break
        elif key == ord('r'):
            spelling_system.reset_sentence()
            label.setText("[Sentence reset]")
    
    cap.release()
    cv2.destroyAllWindows()
    app.quit()


def model_evaluation(model_path, encoder_path, scaler_path, selector_path, test_dataset_path):
    model = joblib.load(model_path)
    label_encoder = joblib.load(encoder_path)
    scaler = joblib.load(scaler_path)
    selector = joblib.load(selector_path)
    
    X_test, y_test = load_dataset(test_dataset_path, augment=False, debug=True)
    
    # filter for just in case there are some classes present in test set but not in training set
    match = [i for i, label in enumerate(y_test) if label in label_encoder.classes_]
    X_test = X_test[match]
    y_test = y_test[match]
    
    X_test = scaler.transform(X_test)
    X_test = selector.transform(X_test)
    y_test = label_encoder.transform(y_test)
    
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    
    print(f"Test Accuracy: {accuracy:.4f}")
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred, target_names=label_encoder.classes_, zero_division=0))
    
    # per-class accuracy, some labels might be broken due to compatibility issues with Urdu
    cm = confusion_matrix(y_test, y_pred)
    per_class_acc = cm.diagonal() / cm.sum(axis=1)
    
    print("\nPer-Class Accuracy:")
    for cls, acc in zip(label_encoder.classes_, per_class_acc):
        print(f"{cls}: {acc:.3f}")
    
    plt.figure(figsize=(14, 5))
    plt.bar(label_encoder.classes_, per_class_acc, color='steelblue')
    plt.ylim(0, 1)
    plt.xlabel("Class")
    plt.ylabel("Accuracy")
    plt.title("Per-Class Accuracy on Test Dataset")
    plt.xticks(rotation=0, ha='right')
    plt.legend()
    plt.tight_layout()
    plt.savefig('per_class_accuracy.png', dpi=300, bbox_inches='tight')
    plt.show()
    print("plot saved to per_class_accuracy.png")
    
    return accuracy, per_class_acc

def feature_vector_ablation_study(model_types, feature_counts, dataset_tag):
    res = []
    
    X_train, y_train = load_dataset(TRAIN_DATASET_PATH, augment=False, debug=True)
    X_test, y_test = load_dataset(TEST_DATASET_PATH, augment=False, debug=True)
    
    total = len(model_types) * len(feature_counts)
    experiment = 0
    
    for model_type in model_types:
        for n_features in feature_counts:
            experiment += 1
            print(f"\n[{experiment}/{total}] {model_type.upper()} | Features: {n_features}")

            model, encoder, scaler, selector = train_model(
                X_train, y_train,
                model_type=model_type,
                n_features=n_features,
                dataset_tag=dataset_tag
            )
            
            match = [i for i, label in enumerate(y_test) if label in encoder.classes_]
            X_test = X_test[match]
            y_test = y_test[match]
            
            X_test = scaler.transform(X_test)
            X_test = selector.transform(X_test)
            y_test = encoder.transform(y_test)
            
            y_pred = model.predict(X_test)
            test_acc = accuracy_score(y_test, y_pred)
            
            res.append({
                'model_type': model_type,
                'n_features': n_features,
                'test_accuracy': test_acc
            })
    
    res_df = pd.DataFrame(res)
    res_df.to_csv('ablation_results.csv', index=False)
    
    plt.figure(figsize=(14, 6))
    for model in model_types:
        model_data = res_df[res_df['model_type'] == model]
        plt.plot(model_data['n_features'], model_data['test_accuracy'], 
                marker='o', linewidth=2, label=model.upper())
    
    plt.xlabel('Number of Features')
    plt.ylabel('Test Accuracy')
    plt.title('Model Performance vs Feature Count')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.savefig('ablation_results.png', dpi=300, bbox_inches='tight')
    plt.show()
    
    return res_df

def individual_feature_groups(X_train, y_train, X_test, y_test, feature_groups, model_type="logistic_regression"):
    res = {}
    model = LogisticRegression(max_iter=1000, random_state=42, C=10)
    
    for group_name, indices in feature_groups.items():
        # Use the original data for each group, not the modified version
        scaler = StandardScaler()
        X_train_group = scaler.fit_transform(X_train[:, indices])
        X_test_group = scaler.transform(X_test[:, indices])
        
        model.fit(X_train_group, y_train)
        
        res[group_name] = {
            "n_features": len(indices),
            "train_score": model.score(X_train_group, y_train),
            "test_score": model.score(X_test_group, y_test),
            "predictions": model.predict(X_test_group)
        }
    
    return res

def cumulative_feature_groups(X_train, y_train, X_test, y_test, feature_groups, group_order):
    model = LogisticRegression(max_iter=1000, random_state=42, C=10)
    res = []
    current_indices = []
    
    for group_name in group_order:
        if group_name not in feature_groups:
            continue
        
        current_indices.extend(feature_groups[group_name])

        # Use the original data with cumulative indices
        scaler = StandardScaler()
        X_train_cumulative = scaler.fit_transform(X_train[:, current_indices])
        X_test_cumulative = scaler.transform(X_test[:, current_indices])
        
        model.fit(X_train_cumulative, y_train)
        
        train_score = model.score(X_train_cumulative, y_train)
        test_score = model.score(X_test_cumulative, y_test)
        
        res.append({
            "group_added": group_name,
            "n_features": len(current_indices),
            "train_score": train_score,
            "test_score": test_score,
            "generalization_gap": train_score - test_score
        })
    
    return pd.DataFrame(res)


def feature_ablation_study():
    X_train, y_train = load_dataset(TRAIN_DATASET_PATH, debug=True)
    X_test, y_test = load_dataset(TEST_DATASET_PATH, debug=True)
    
    label_encoder = LabelEncoder()
    y_train = label_encoder.fit_transform(y_train)
    
    match = [i for i, label in enumerate(y_test) if label in label_encoder.classes_]
    X_test = X_test[match]
    y_test = y_test[match]
    y_test = label_encoder.transform(y_test)
    
    print(f"\nTrain: {X_train.shape}, Test: {X_test.shape}")
    print(f"Total features: {X_train.shape[1]}")
    for group, indices in FEATURE_GROUPS.items():
        print(f"{group}: {len(indices)} features")
    
    group_results = individual_feature_groups(X_train, y_train,X_test, y_test,FEATURE_GROUPS)

    group_order = [
        'pairwise distances', 'joint angles', 'hand position size',
        'hand orientation', 'finger spread', 'finger curvature',
        'length ratios', 'palm relative'
    ]

    cumulative_results = cumulative_feature_groups(X_train, y_train,X_test, y_test,FEATURE_GROUPS,group_order)
    
    # individual feature groups graph
    groups = list(group_results.keys())
    train = [group_results[g]["train_score"] for g in groups]
    test = [group_results[g]["test_score"] for g in groups]
    gaps = [t - v for t, v in zip(train, test)]

    x = np.arange(len(groups))
    w = 0.35

    fig, ax = plt.subplots(figsize=(12, 6))
    b1 = ax.bar(x - w/2, train, w, label="Train", alpha=0.8)
    b2 = ax.bar(x + w/2, test,  w, label="Test (External)", alpha=0.8)

    formatted_labels = [g.replace("_", "\n") for g in groups]
    ax.set_xticks(x)
    ax.set_xticklabels(formatted_labels, rotation=45, ha="right")
    ax.set_ylim(0, 1.05)
    ax.set_ylabel("Accuracy")
    ax.legend()
    ax.grid(axis="y", alpha=0.3)

    for bar in b1:
        h = bar.get_height()
        ax.text(bar.get_x() + bar.get_width() / 2, h + 0.01, f"{h:.2f}",ha="center", va="bottom", fontsize=8)
    
    for bar in b2:
        h = bar.get_height()
        ax.text(bar.get_x() + bar.get_width() / 2, h + 0.01, f"{h:.2f}",ha="center", va="bottom", fontsize=8)

    for i, g in enumerate(gaps):
        top = max(train[i], test[i])
        ax.text(x[i], top + 0.03, f"Δ={g:.2f}", ha="center", va="bottom", fontsize=9, color="red")

    plt.tight_layout()
    plt.savefig("group_performance.png", dpi=150, bbox_inches="tight")
    plt.show()
    print(f"figure saved to group_performance.png")

    # cumulative feature groups graph
    x_labels = cumulative_results["group_added"]
    gains = cumulative_results["test_score"].diff().fillna(cumulative_results["test_score"].iloc[0])

    fig, ax = plt.subplots(figsize=(12, 6))
    bars = ax.bar(x_labels, gains, color="skyblue")
    ax.axhline(0, linewidth=1, color="black", linestyle="--")

    formatted_labels = [g.replace("_", "\n") for g in x_labels]
    ax.set_xticklabels(formatted_labels, rotation=45, ha="right")
    ax.set_ylabel("Δ Test Accuracy")
    ax.grid(axis="y", alpha=0.3)

    for bar in bars:
        h = bar.get_height()
        ax.text(bar.get_x() + bar.get_width() / 2, h + 0.005, f"{h:+.3f}",ha="center", va="bottom", fontsize=8)

    plt.tight_layout()
    plt.savefig("marginal_gain.png", dpi=150, bbox_inches="tight")
    plt.show()
    print(f"figure saved to marginal_gain.png")
    
    group_df = pd.DataFrame([{'group': g, **r} for g, r in group_results.items()])
    group_df.to_csv('feature_group_results.csv', index=False)
    cumulative_results.to_csv('feature_cumulative_results.csv', index=False)
    
    return group_results, cumulative_results, FEATURE_GROUPS

In [None]:
# training code, specify dataset tag for model saving and loading purposes
X, y = load_dataset(TRAIN_DATASET_PATH, augment=False, debug=True)
model, encoder, scaler, selector = train_model(
    X, y,
    model_type="logistic_regression",
    n_features=100,
    dataset_tag=DATASET_TAGS[0]
)

In [None]:
# evaluation code
paths = get_model_paths("logistic_regression", 100, DATASET_TAGS[0])
model_evaluation(
    paths["model"], paths["encoder"],
    paths["scaler"], paths["selector"],
    TEST_DATASET_PATH
)

In [None]:
# for running feature ablation study, loads datasets again, just in case the code is run indepently of training/testing
group_results, cumulative_results, feature_groups = feature_ablation_study()

In [None]:
# for running feature vector size ablation study, loads datasets again same as above
ablation_results = feature_vector_ablation_study(
    model_types=["svm","svm_poly","knn","logistic_regression","random_forest"],
    feature_counts=FEATURE_COUNTS,
    dataset_tag=DATASET_TAGS[0]
)

In [None]:
# load model
paths = get_model_paths(model_type="logistic_regression", n_features=100, dataset_tag=DATASET_TAGS[0])
model = joblib.load(paths["model"])
encoder = joblib.load(paths["encoder"])
scaler = joblib.load(paths["scaler"])
selector = joblib.load(paths["selector"])

# for running real-time pipeline, for some reason causes kernel to crash upon pressing 'q'
live_prediction(model, encoder, scaler, selector, conf_threshold=0.7)