# Advanced Eye State Classifier Training
Training pipeline v·ªõi SVM v√† Random Forest ƒë·ªÉ ph√¢n lo·∫°i m·∫Øt m·ªü/nh·∫Øm

In [2]:
import cv2
import numpy as np
import os
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from sklearn.pipeline import Pipeline
import pickle
import matplotlib.pyplot as plt
from tqdm import tqdm
from utils.feature_extractor import extract_eye_features, preprocess_eye_image

## 1. Initialize Advanced Eye State Classifier

In [3]:
class AdvancedEyeStateClassifier:
    def __init__(self):
        # === PIPELINES ===
        self.pipelines = {
            'svm': Pipeline([
                ('scaler', StandardScaler()),
                ('classifier', SVC(probability=True, random_state=42))
            ]),
            'random_forest': Pipeline([
                ('scaler', StandardScaler()),
                ('classifier', RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=-1))
            ])
        }

        self.best_pipeline = None
        self.best_accuracy = 0
        self.best_pipeline_name = ""

# Initialize classifier
classifier = AdvancedEyeStateClassifier()
print("üéØ Advanced Eye State Classifier initialized")

üéØ Advanced Eye State Classifier initialized


## 2. Load Dataset with Preprocessing

In [5]:
def load_dataset(data_path='data/eyes'):
    """Load, preprocess and extract features from dataset"""
    X, y = [], []

    print("üìÇ Loading dataset with preprocessing + feature extraction...")

    # Load open eyes (label = 1)
    open_path = os.path.join(data_path, 'open')
    if os.path.exists(open_path):
        open_files = [f for f in os.listdir(open_path) if f.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp'))]

        for img_name in tqdm(open_files, desc=f"Processing {len(open_files)} open eye images"):
            img_path = os.path.join(open_path, img_name)
            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
            if img is not None and img.size > 0:
                # PREPROCESSING
                img = preprocess_eye_image(img)

                # FEATURE EXTRACTION
                features = extract_eye_features(img)
                if not np.any(np.isnan(features)):
                    X.append(features)
                    y.append(1)

    # Load closed eyes (label = 0)
    closed_path = os.path.join(data_path, 'closed')
    if os.path.exists(closed_path):
        closed_files = [f for f in os.listdir(closed_path) if f.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp'))]

        for img_name in tqdm(closed_files, desc=f"Processing {len(closed_files)} closed eye images"):
            img_path = os.path.join(closed_path, img_name)
            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
            if img is not None and img.size > 0:
                # PREPROCESSING
                img = preprocess_eye_image(img)

                # FEATURE EXTRACTION
                features = extract_eye_features(img)
                if not np.any(np.isnan(features)):
                    X.append(features)
                    y.append(0)

    return np.array(X), np.array(y)

# Load dataset
X, y = load_dataset()

if len(X) == 0:
    print("‚ùå No valid data found! Please check data/eyes/ folders")
else:
    print(f"‚úÖ Dataset loaded: {len(X)} samples")
    print(f"   Features per sample: {X.shape[1] if len(X.shape) > 1 else 'Unknown'}")
    print(f"   Open eyes: {np.sum(y == 1)}")
    print(f"   Closed eyes: {np.sum(y == 0)}")

üìÇ Loading dataset with preprocessing + feature extraction...


Processing 42952 open eye images: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 42952/42952 [00:35<00:00, 1225.64it/s]
Processing 41946 closed eye images: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 41946/41946 [00:35<00:00, 1196.40it/s]


‚úÖ Dataset loaded: 84898 samples
   Features per sample: 25
   Open eyes: 42952
   Closed eyes: 41946


## 3. Train Models with Cross-Validation

In [6]:
def train_models_with_cv(X, y):
    """Train pipelines with optimized strategy for best accuracy"""
    print("\nüöÄ Pipeline Training...")

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42, stratify=y
    )

    results = {}

    # Strategy 1: Quick screening of all models
    print("üîç Quick model screening...")
    for name, pipeline in tqdm(classifier.pipelines.items(), desc="Screening models"):
        cv_scores = cross_val_score(pipeline, X_train, y_train, cv=3, scoring='accuracy')
        results[name] = {
            'cv_mean': cv_scores.mean(),
            'cv_std': cv_scores.std()
        }
        print(f"   {name}: {cv_scores.mean():.4f} (¬±{cv_scores.std():.4f})")

    # Strategy 2: Focus on top 2 performers with hyperparameter tuning
    sorted_models = sorted(results.items(), key=lambda x: x[1]['cv_mean'], reverse=True)
    top_models = [model[0] for model in sorted_models[:2]]

    print(f"\nüéØ Optimizing top models: {', '.join(top_models)}")

    final_results = {}

    for name in top_models:
        print(f"\nü§ñ Optimizing {name}...")

        # Hyperparameter tuning for top models
        if name == 'random_forest':
            param_grid = {
                'classifier__n_estimators': [100, 200],
                'classifier__max_depth': [10, 20, None],
                'classifier__min_samples_split': [2, 5]
            }
        elif name == 'svm':
            param_grid = {
                'classifier__C': [1, 10],
                'classifier__gamma': ['scale', 'auto']
            }

        # GridSearchCV with limited scope for speed
        grid_search = GridSearchCV(
            classifier.pipelines[name],
            param_grid,
            cv=5,
            scoring='accuracy',
            n_jobs=-1
        )

        grid_search.fit(X_train, y_train)
        best_pipeline = grid_search.best_estimator_

        # Evaluate on test set
        y_pred = best_pipeline.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)

        final_results[name] = {
            'pipeline': best_pipeline,
            'accuracy': accuracy,
            'cv_mean': grid_search.best_score_,
            'best_params': grid_search.best_params_,
            'y_test': y_test,
            'y_pred': y_pred
        }

        print(f"   Best params: {grid_search.best_params_}")
        print(f"   Accuracy: {accuracy:.4f}")
        print(f"   CV Score: {grid_search.best_score_:.4f}")

        if accuracy > classifier.best_accuracy:
            classifier.best_accuracy = accuracy
            classifier.best_pipeline = best_pipeline
            classifier.best_pipeline_name = name

    return final_results

# Train models
if len(X) > 0:
    results = train_models_with_cv(X, y)


üöÄ Pipeline Training...
üîç Quick model screening...


Screening models:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 1/2 [20:56<20:56, 1256.24s/it]

   svm: 0.9352 (¬±0.0004)


Screening models: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 2/2 [21:10<00:00, 635.14s/it] 

   random_forest: 0.9529 (¬±0.0005)

üéØ Optimizing top models: random_forest, svm

ü§ñ Optimizing random_forest...





   Best params: {'classifier__max_depth': None, 'classifier__min_samples_split': 2, 'classifier__n_estimators': 200}
   Accuracy: 0.9584
   CV Score: 0.9554

ü§ñ Optimizing svm...
   Best params: {'classifier__C': 10, 'classifier__gamma': 'scale'}
   Accuracy: 0.9524
   CV Score: 0.9487


## 4. Visualize Results

In [None]:
def plot_results(results):
    """Visualize training results"""
    try:
        models = list(results.keys())
        accuracies = [results[model]['accuracy'] for model in models]

        plt.figure(figsize=(10, 4))

        plt.subplot(1, 2, 1)
        bars = plt.bar(models, accuracies, color=['skyblue', 'lightgreen', 'salmon', 'gold'])
        plt.title('Pipeline Accuracy')
        plt.ylabel('Accuracy')
        plt.xticks(rotation=45)
        plt.ylim(0.9, 1.0)

        for bar, acc in zip(bars, accuracies):
            plt.text(bar.get_x() + bar.get_width() / 2, bar.get_height() + 0.002,
                     f'{acc:.3f}', ha='center', va='bottom')

        plt.subplot(1, 2, 2)
        best_result = results[classifier.best_pipeline_name]
        cm = confusion_matrix(best_result['y_test'], best_result['y_pred'])

        plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
        plt.title(f'Best: {classifier.best_pipeline_name}')
        plt.colorbar()

        tick_marks = np.arange(2)
        plt.xticks(tick_marks, ['Closed', 'Open'])
        plt.yticks(tick_marks, ['Closed', 'Open'])

        for i in range(2):
            for j in range(2):
                plt.text(j, i, cm[i, j], ha='center', va='center')

        plt.tight_layout()
        plt.savefig('models/training_results.png', dpi=300, bbox_inches='tight')
        plt.show()

    except Exception as e:
        print(f"Visualization error: {e}")

# Plot results
if len(X) > 0 and 'results' in locals():
    plot_results(results)

## 5. Display Best Model Results

In [None]:
# Display results
if len(X) > 0 and 'results' in locals():
    print(f"\nüèÜ Best Model: {classifier.best_pipeline_name}")
    print(f"   Accuracy: {classifier.best_accuracy:.4f}")

    # Generate detailed report for best model
    best_result = results[classifier.best_pipeline_name]
    print(f"\nüìä Detailed Report for {classifier.best_pipeline_name}:")
    print(classification_report(
        best_result['y_test'],
        best_result['y_pred'],
        target_names=['Closed', 'Open']
    ))

## 6. Save Model

In [None]:
def save_model():
    """Save best pipeline"""
    os.makedirs('models', exist_ok=True)

    pipeline_data = {
        'pipeline': classifier.best_pipeline,
        'pipeline_name': classifier.best_pipeline_name,
        'accuracy': classifier.best_accuracy,
        'feature_count': 25
    }

    with open('models/eye_classifier.pkl', 'wb') as f:
        pickle.dump(pipeline_data, f)

    print(f"üíæ Pipeline saved: models/eye_classifier.pkl")

# Save the best model
if len(X) > 0 and classifier.best_pipeline is not None:
    save_model()
    
    print("\n‚úÖ Training completed successfully!")
    print("üìÅ Files saved:")
    print("   - models/eye_classifier.pkl (trained model)")
    print("   - models/training_results.png (visualization)")