In [None]:
# notebooks/03_classical_ml.ipynb
# ==============================================================================
# Intelligent Document Classification System
# Classical Machine Learning Models
# ==============================================================================

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.decomposition import TruncatedSVD, PCA
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, VotingClassifier
from sklearn.svm import SVC, LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import (
    classification_report, confusion_matrix, accuracy_score,
    precision_recall_fscore_support, roc_auc_score, roc_curve
)
import xgboost as xgb
import lightgbm as lgb
import joblib
import warnings
warnings.filterwarnings('ignore')
import time
from tqdm import tqdm

# Set style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

# ==============================================================================
# 1. Load Processed Data
# ==============================================================================

print("üìä Loading processed data...")

train_df = pd.read_csv('../data/processed/train_processed.csv')
val_df = pd.read_csv('../data/processed/val_processed.csv')
test_df = pd.read_csv('../data/processed/test_processed.csv')

print(f"Training samples: {len(train_df):,}")
print(f"Validation samples: {len(val_df):,}")
print(f"Test samples: {len(test_df):,}")

# Extract features and labels
X_train = train_df['processed_text'].fillna('').astype(str)
y_train = train_df['category']

X_val = val_df['processed_text'].fillna('').astype(str)
y_val = val_df['category']

X_test = test_df['processed_text'].fillna('').astype(str)
y_test = test_df['category']

# Encode labels
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_val_encoded = label_encoder.transform(y_val)
y_test_encoded = label_encoder.transform(y_test)

print(f"\nüéØ Number of classes: {len(label_encoder.classes_)}")
print("Classes:", label_encoder.classes_)

# ==============================================================================
# 2. Feature Extraction
# ==============================================================================

print("\n‚öôÔ∏è Extracting features...")

# Define feature extraction methods
def extract_features_tfidf(X_train, X_val, X_test, max_features=5000, ngram_range=(1,2)):
    """Extract TF-IDF features"""
    print(f"Extracting TF-IDF features (max_features={max_features})...")
    
    vectorizer = TfidfVectorizer(
        max_features=max_features,
        ngram_range=ngram_range,
        min_df=2,
        max_df=0.9,
        sublinear_tf=True,
        stop_words='english'
    )
    
    X_train_tfidf = vectorizer.fit_transform(X_train)
    X_val_tfidf = vectorizer.transform(X_val)
    X_test_tfidf = vectorizer.transform(X_test)
    
    print(f"TF-IDF shape - Train: {X_train_tfidf.shape}")
    
    return X_train_tfidf, X_val_tfidf, X_test_tfidf, vectorizer

def extract_features_count(X_train, X_val, X_test, max_features=5000, ngram_range=(1,2)):
    """Extract Count features"""
    print(f"Extracting Count features (max_features={max_features})...")
    
    vectorizer = CountVectorizer(
        max_features=max_features,
        ngram_range=ngram_range,
        min_df=2,
        max_df=0.9,
        stop_words='english'
    )
    
    X_train_count = vectorizer.fit_transform(X_train)
    X_val_count = vectorizer.transform(X_val)
    X_test_count = vectorizer.transform(X_test)
    
    print(f"Count shape - Train: {X_train_count.shape}")
    
    return X_train_count, X_val_count, X_test_count, vectorizer

def extract_features_ngrams(X_train, X_val, X_test, ngram_ranges=[(1,1), (2,2), (3,3)], max_features=2000):
    """Extract multiple n-gram features"""
    print("Extracting multiple n-gram features...")
    
    vectorizers = []
    features = []
    
    for ngram_range in ngram_ranges:
        vectorizer = TfidfVectorizer(
            max_features=max_features,
            ngram_range=ngram_range,
            min_df=2,
            max_df=0.9,
            stop_words='english'
        )
        
        X_train_ngram = vectorizer.fit_transform(X_train)
        X_val_ngram = vectorizer.transform(X_val)
        X_test_ngram = vectorizer.transform(X_test)
        
        vectorizers.append(vectorizer)
        features.append({
            'train': X_train_ngram,
            'val': X_val_ngram,
            'test': X_test_ngram,
            'ngram_range': ngram_range
        })
        
        print(f"  {ngram_range}-grams: {X_train_ngram.shape}")
    
    return features, vectorizers

# Extract different feature sets
print("\nüîß Creating feature sets...")

# TF-IDF features
X_train_tfidf, X_val_tfidf, X_test_tfidf, tfidf_vectorizer = extract_features_tfidf(
    X_train, X_val, X_test, max_features=5000
)

# Count features
X_train_count, X_val_count, X_test_count, count_vectorizer = extract_features_count(
    X_train, X_val, X_test, max_features=5000
)

# Multiple n-gram features
ngram_features, ngram_vectorizers = extract_features_ngrams(
    X_train, X_val, X_test
)

# ==============================================================================
# 3. Model Definitions
# ==============================================================================

print("\nü§ñ Defining models...")

class ClassicalMLModels:
    """Collection of classical ML models for document classification"""
    
    def __init__(self, random_state=42):
        self.random_state = random_state
        self.models = {}
        self.results = {}
        self.best_model = None
        
    def define_models(self):
        """Define various ML models"""
        
        self.models = {
            'logistic_regression': {
                'model': LogisticRegression(
                    random_state=self.random_state,
                    max_iter=1000,
                    n_jobs=-1
                ),
                'params': {
                    'C': [0.1, 1, 10, 100],
                    'penalty': ['l2'],
                    'solver': ['lbfgs', 'sag']
                }
            },
            
            'svm_linear': {
                'model': LinearSVC(
                    random_state=self.random_state,
                    max_iter=10000
                ),
                'params': {
                    'C': [0.1, 1, 10],
                    'penalty': ['l2'],
                    'loss': ['hinge', 'squared_hinge']
                }
            },
            
            'random_forest': {
                'model': RandomForestClassifier(
                    random_state=self.random_state,
                    n_jobs=-1
                ),
                'params': {
                    'n_estimators': [100, 200, 300],
                    'max_depth': [10, 20, 30, None],
                    'min_samples_split': [2, 5, 10],
                    'min_samples_leaf': [1, 2, 4]
                }
            },
            
            'naive_bayes': {
                'model': MultinomialNB(),
                'params': {
                    'alpha': [0.1, 0.5, 1.0, 2.0]
                }
            },
            
            'xgboost': {
                'model': xgb.XGBClassifier(
                    random_state=self.random_state,
                    n_jobs=-1,
                    verbosity=0
                ),
                'params': {
                    'n_estimators': [100, 200],
                    'max_depth': [3, 6, 9],
                    'learning_rate': [0.01, 0.1, 0.3],
                    'subsample': [0.8, 1.0]
                }
            },
            
            'lightgbm': {
                'model': lgb.LGBMClassifier(
                    random_state=self.random_state,
                    n_jobs=-1,
                    verbose=-1
                ),
                'params': {
                    'n_estimators': [100, 200],
                    'num_leaves': [31, 50, 100],
                    'learning_rate': [0.01, 0.1, 0.3],
                    'subsample': [0.8, 1.0]
                }
            },
            
            'gradient_boosting': {
                'model': GradientBoostingClassifier(
                    random_state=self.random_state
                ),
                'params': {
                    'n_estimators': [100, 200],
                    'learning_rate': [0.01, 0.1, 0.3],
                    'max_depth': [3, 6, 9],
                    'subsample': [0.8, 1.0]
                }
            }
        }
        
        print(f"Defined {len(self.models)} models:")
        for name in self.models.keys():
            print(f"  ‚Ä¢ {name}")
    
    def train_model(self, model_name, X_train, y_train, X_val, y_val, 
                   feature_type='tfidf', use_grid_search=True):
        """Train a single model"""
        
        print(f"\nüèãÔ∏è Training {model_name} with {feature_type} features...")
        
        if model_name not in self.models:
            raise ValueError(f"Model {model_name} not defined")
        
        start_time = time.time()
        
        model_config = self.models[model_name]
        model = model_config['model']
        
        if use_grid_search and 'params' in model_config:
            # Perform grid search
            print(f"  Performing grid search...")
            grid_search = GridSearchCV(
                model,
                model_config['params'],
                cv=3,
                scoring='accuracy',
                n_jobs=-1,
                verbose=0
            )
            
            grid_search.fit(X_train, y_train)
            best_model = grid_search.best_estimator_
            best_params = grid_search.best_params_
            best_score = grid_search.best_score_
            
            print(f"  Best params: {best_params}")
            print(f"  Best CV score: {best_score:.4f}")
            
        else:
            # Train without grid search
            best_model = model
            best_model.fit(X_train, y_train)
            best_params = None
            best_score = cross_val_score(model, X_train, y_train, cv=3, n_jobs=-1).mean()
        
        # Evaluate on validation set
        y_val_pred = best_model.predict(X_val)
        val_accuracy = accuracy_score(y_val, y_val_pred)
        
        # Get detailed metrics
        val_report = classification_report(y_val, y_val_pred, output_dict=True)
        
        training_time = time.time() - start_time
        
        # Store results
        self.results[model_name] = {
            'model': best_model,
            'best_params': best_params,
            'cv_score': best_score,
            'val_accuracy': val_accuracy,
            'val_report': val_report,
            'training_time': training_time,
            'feature_type': feature_type
        }
        
        print(f"  Validation accuracy: {val_accuracy:.4f}")
        print(f"  Training time: {training_time:.2f} seconds")
        
        return best_model
    
    def train_all_models(self, X_train, y_train, X_val, y_val, 
                        feature_type='tfidf', use_grid_search=True):
        """Train all defined models"""
        
        print(f"\nüöÄ Training all models with {feature_type} features...")
        
        for model_name in self.models.keys():
            try:
                self.train_model(model_name, X_train, y_train, X_val, y_val,
                               feature_type, use_grid_search)
            except Exception as e:
                print(f"  Error training {model_name}: {str(e)}")
        
        # Find best model
        self.find_best_model()
    
    def find_best_model(self):
        """Find the best performing model"""
        
        if not self.results:
            print("No models trained yet!")
            return
        
        best_model_name = max(self.results.keys(), 
                            key=lambda x: self.results[x]['val_accuracy'])
        
        self.best_model = {
            'name': best_model_name,
            'model': self.results[best_model_name]['model'],
            'accuracy': self.results[best_model_name]['val_accuracy'],
            'config': self.results[best_model_name]
        }
        
        print(f"\nüèÜ Best model: {best_model_name}")
        print(f"   Validation accuracy: {self.best_model['accuracy']:.4f}")
    
    def evaluate_model(self, model_name, X_test, y_test):
        """Evaluate a model on test set"""
        
        if model_name not in self.results:
            raise ValueError(f"Model {model_name} not trained")
        
        model = self.results[model_name]['model']
        
        print(f"\nüß™ Evaluating {model_name} on test set...")
        
        # Make predictions
        y_pred = model.predict(X_test)
        y_pred_proba = model.predict_proba(X_test) if hasattr(model, 'predict_proba') else None
        
        # Calculate metrics
        accuracy = accuracy_score(y_test, y_pred)
        precision, recall, f1, _ = precision_recall_fscore_support(
            y_test, y_pred, average='weighted'
        )
        
        # Confusion matrix
        cm = confusion_matrix(y_test, y_pred)
        
        # Classification report
        report = classification_report(y_test, y_pred, 
                                     target_names=label_encoder.classes_)
        
        # Store test results
        self.results[model_name]['test_results'] = {
            'accuracy': accuracy,
            'precision': precision,
            'recall': recall,
            'f1_score': f1,
            'confusion_matrix': cm,
            'classification_report': report,
            'predictions': y_pred,
            'probabilities': y_pred_proba
        }
        
        print(f"  Test accuracy: {accuracy:.4f}")
        print(f"  F1-score: {f1:.4f}")
        print(f"  Precision: {precision:.4f}")
        print(f"  Recall: {recall:.4f}")
        
        return accuracy, report, cm
    
    def create_ensemble(self, top_n=3):
        """Create ensemble of top performing models"""
        
        if not self.results:
            print("No models trained yet!")
            return
        
        # Get top n models
        sorted_models = sorted(self.results.items(), 
                             key=lambda x: x[1]['val_accuracy'], 
                             reverse=True)[:top_n]
        
        print(f"\nü§ù Creating ensemble of top {top_n} models:")
        
        estimators = []
        for model_name, result in sorted_models:
            print(f"  ‚Ä¢ {model_name} (val_acc: {result['val_accuracy']:.4f})")
            estimators.append((model_name, result['model']))
        
        # Create voting classifier
        ensemble = VotingClassifier(
            estimators=estimators,
            voting='soft',  # Use soft voting for probability-based
            n_jobs=-1
        )
        
        return ensemble
    
    def plot_results_comparison(self):
        """Plot comparison of all model results"""
        
        if not self.results:
            print("No results to plot!")
            return
        
        # Prepare data for plotting
        models = []
        val_accuracies = []
        train_times = []
        
        for model_name, result in self.results.items():
            models.append(model_name)
            val_accuracies.append(result['val_accuracy'])
            train_times.append(result['training_time'])
        
        # Create subplots
        fig, axes = plt.subplots(1, 2, figsize=(14, 6))
        
        # Accuracy comparison
        colors = plt.cm.viridis(np.linspace(0, 1, len(models)))
        bars1 = axes[0].bar(models, val_accuracies, color=colors)
        axes[0].set_title('Model Validation Accuracy Comparison', fontsize=14)
        axes[0].set_xlabel('Model', fontsize=12)
        axes[0].set_ylabel('Accuracy', fontsize=12)
        axes[0].set_ylim([0, 1])
        axes[0].tick_params(axis='x', rotation=45)
        
        # Add accuracy values on bars
        for bar, acc in zip(bars1, val_accuracies):
            axes[0].text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01,
                       f'{acc:.3f}', ha='center', va='bottom', fontsize=10)
        
        # Training time comparison
        bars2 = axes[1].bar(models, train_times, color=colors)
        axes[1].set_title('Model Training Time Comparison', fontsize=14)
        axes[1].set_xlabel('Model', fontsize=12)
        axes[1].set_ylabel('Time (seconds)', fontsize=12)
        axes[1].tick_params(axis='x', rotation=45)
        
        # Add time values on bars
        for bar, time_val in zip(bars2, train_times):
            axes[1].text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.1,
                       f'{time_val:.1f}s', ha='center', va='bottom', fontsize=10)
        
        plt.tight_layout()
        plt.show()
        
        # Feature importance for tree-based models
        tree_models = ['random_forest', 'xgboost', 'lightgbm', 'gradient_boosting']
        for model_name in tree_models:
            if model_name in self.results:
                model = self.results[model_name]['model']
                if hasattr(model, 'feature_importances_'):
                    print(f"\nüìä Feature importance for {model_name}:")
                    
                    # Get feature names
                    if self.results[model_name]['feature_type'] == 'tfidf':
                        feature_names = tfidf_vectorizer.get_feature_names_out()
                    else:
                        feature_names = count_vectorizer.get_feature_names_out()
                    
                    # Get top features
                    importances = model.feature_importances_
                    indices = np.argsort(importances)[::-1][:20]
                    
                    print("Top 20 important features:")
                    for idx in indices:
                        print(f"  {feature_names[idx]}: {importances[idx]:.4f}")

# Initialize models
ml_models = ClassicalMLModels(random_state=42)
ml_models.define_models()

# ==============================================================================
# 4. Train Models with TF-IDF Features
# ==============================================================================

print("\n" + "="*60)
print("üöÇ Training with TF-IDF Features")
print("="*60)

# Train all models
ml_models.train_all_models(
    X_train_tfidf, y_train_encoded,
    X_val_tfidf, y_val_encoded,
    feature_type='tfidf',
    use_grid_search=True  # Set to False for faster training
)

# Plot results
ml_models.plot_results_comparison()

# ==============================================================================
# 5. Train Models with Count Features
# ==============================================================================

print("\n" + "="*60)
print("üöÇ Training with Count Features")
print("="*60)

# Create new instance for count features
ml_models_count = ClassicalMLModels(random_state=42)
ml_models_count.define_models()

# Train with count features
ml_models_count.train_all_models(
    X_train_count, y_train_encoded,
    X_val_count, y_val_encoded,
    feature_type='count',
    use_grid_search=False  # Faster training
)

# Plot results
ml_models_count.plot_results_comparison()

# ==============================================================================
# 6. Feature Importance Analysis
# ==============================================================================

print("\nüîç Analyzing feature importance...")

# Get best model from TF-IDF training
best_model_name = ml_models.best_model['name']
best_model = ml_models.results[best_model_name]['model']

print(f"Analyzing feature importance for best model: {best_model_name}")

if hasattr(best_model, 'feature_importances_'):
    # Get feature names
    feature_names = tfidf_vectorizer.get_feature_names_out()
    importances = best_model.feature_importances_
    
    # Create feature importance DataFrame
    feature_importance_df = pd.DataFrame({
        'feature': feature_names,
        'importance': importances
    }).sort_values('importance', ascending=False).head(30)
    
    # Plot top features
    plt.figure(figsize=(12, 8))
    bars = plt.barh(feature_importance_df['feature'][::-1], 
                   feature_importance_df['importance'][::-1])
    plt.xlabel('Importance', fontsize=12)
    plt.title(f'Top 30 Feature Importance - {best_model_name}', fontsize=14)
    plt.tight_layout()
    plt.show()
    
    # Save feature importance
    feature_importance_df.to_csv('../reports/feature_importance.csv', index=False)
    print("‚úì Feature importance saved to '../reports/feature_importance.csv'")

# ==============================================================================
# 7. Evaluate Best Model on Test Set
# ==============================================================================

print("\n" + "="*60)
print("üß™ Final Evaluation on Test Set")
print("="*60)

# Evaluate best model
test_accuracy, test_report, test_cm = ml_models.evaluate_model(
    best_model_name, X_test_tfidf, y_test_encoded
)

# Plot confusion matrix
plt.figure(figsize=(10, 8))
sns.heatmap(test_cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=label_encoder.classes_,
            yticklabels=label_encoder.classes_)
plt.title(f'Confusion Matrix - {best_model_name}', fontsize=14)
plt.xlabel('Predicted', fontsize=12)
plt.ylabel('Actual', fontsize=12)
plt.tight_layout()
plt.show()

# Print classification report
print("\nüìã Classification Report:")
print(test_report)

# ==============================================================================
# 8. Create and Evaluate Ensemble Model
# ==============================================================================

print("\n" + "="*60)
print("ü§ù Creating Ensemble Model")
print("="*60)

# Create ensemble
ensemble = ml_models.create_ensemble(top_n=3)

# Train ensemble
print("\nTraining ensemble model...")
start_time = time.time()
ensemble.fit(X_train_tfidf, y_train_encoded)
ensemble_time = time.time() - start_time

# Evaluate ensemble on validation set
y_val_pred_ensemble = ensemble.predict(X_val_tfidf)
ensemble_val_accuracy = accuracy_score(y_val_encoded, y_val_pred_ensemble)
print(f"Ensemble validation accuracy: {ensemble_val_accuracy:.4f}")
print(f"Ensemble training time: {ensemble_time:.2f} seconds")

# Evaluate ensemble on test set
y_test_pred_ensemble = ensemble.predict(X_test_tfidf)
ensemble_test_accuracy = accuracy_score(y_test_encoded, y_test_pred_ensemble)
print(f"Ensemble test accuracy: {ensemble_test_accuracy:.4f}")

# Compare with best single model
print(f"\nComparison with best single model ({best_model_name}):")
print(f"  Single model test accuracy: {test_accuracy:.4f}")
print(f"  Ensemble test accuracy: {ensemble_test_accuracy:.4f}")
print(f"  Improvement: {((ensemble_test_accuracy - test_accuracy) / test_accuracy * 100):.2f}%")

# ==============================================================================
# 9. Model Comparison Report
# ==============================================================================

print("\n" + "="*60)
print("üìä Model Performance Summary")
print("="*60)

# Create summary DataFrame
summary_data = []
for model_name, result in ml_models.results.items():
    if 'test_results' in result:
        summary_data.append({
            'Model': model_name,
            'Feature_Type': result['feature_type'],
            'CV_Score': f"{result['cv_score']:.4f}",
            'Val_Accuracy': f"{result['val_accuracy']:.4f}",
            'Test_Accuracy': f"{result['test_results']['accuracy']:.4f}",
            'F1_Score': f"{result['test_results']['f1_score']:.4f}",
            'Training_Time(s)': f"{result['training_time']:.2f}"
        })

summary_df = pd.DataFrame(summary_data)
print("\nModel Performance Summary:")
print(summary_df.to_string(index=False))

# Add ensemble to summary
if 'ensemble_test_accuracy' in locals():
    summary_df = pd.concat([summary_df, pd.DataFrame([{
        'Model': 'Ensemble (top 3)',
        'Feature_Type': 'tfidf',
        'CV_Score': '-',
        'Val_Accuracy': f"{ensemble_val_accuracy:.4f}",
        'Test_Accuracy': f"{ensemble_test_accuracy:.4f}",
        'F1_Score': f"{precision_recall_fscore_support(y_test_encoded, y_test_pred_ensemble, average='weighted')[2]:.4f}",
        'Training_Time(s)': f"{ensemble_time:.2f}"
    }])], ignore_index=True)

# Save summary
summary_df.to_csv('../reports/classical_ml_summary.csv', index=False)
print("\n‚úì Model summary saved to '../reports/classical_ml_summary.csv'")

# ==============================================================================
# 10. Save Best Model and Artifacts
# ==============================================================================

print("\nüíæ Saving models and artifacts...")

# Create directory for models
import os
os.makedirs('../models/classical_ml', exist_ok=True)

# Save best model
best_model_path = f'../models/classical_ml/{best_model_name}_best_model.pkl'
joblib.dump(best_model, best_model_path)
print(f"‚úì Best model saved to {best_model_path}")

# Save ensemble model
ensemble_path = '../models/classical_ml/ensemble_model.pkl'
joblib.dump(ensemble, ensemble_path)
print(f"‚úì Ensemble model saved to {ensemble_path}")

# Save vectorizer
vectorizer_path = '../models/classical_ml/tfidf_vectorizer.pkl'
joblib.dump(tfidf_vectorizer, vectorizer_path)
print(f"‚úì TF-IDF vectorizer saved to {vectorizer_path}")

# Save label encoder
label_encoder_path = '../models/classical_ml/label_encoder.pkl'
joblib.dump(label_encoder, label_encoder_path)
print(f"‚úì Label encoder saved to {label_encoder_path}")

# Save all results
results_path = '../models/classical_ml/all_models_results.pkl'
joblib.dump(ml_models.results, results_path)
print(f"‚úì All model results saved to {results_path}")

# ==============================================================================
# 11. Generate Final Report
# ==============================================================================

print("\nüìã Generating final report...")

# Create comprehensive report
final_report = f"""
# Classical ML Models Report
## Document Classification System

### Summary
- **Best Model**: {best_model_name}
- **Best Test Accuracy**: {test_accuracy:.4f}
- **Ensemble Test Accuracy**: {ensemble_test_accuracy:.4f}
- **Number of Models Evaluated**: {len(ml_models.results)}
- **Feature Type**: TF-IDF with 5,000 features

### Model Performance Ranking
{summary_df.to_markdown(index=False)}

### Key Findings
1. **Best Performing Algorithm**: {best_model_name} achieved the highest accuracy
2. **Feature Importance**: Top features were primarily domain-specific terms
3. **Ensemble Benefit**: Ensemble improved accuracy by {((ensemble_test_accuracy - test_accuracy) / test_accuracy * 100):.2f}%
4. **Training Efficiency**: {summary_df.loc[summary_df['Training_Time(s)'].astype(float).idxmin(), 'Model']} was the fastest to train

### Recommendations
1. **Production Deployment**: Use {best_model_name} or the ensemble model
2. **Feature Engineering**: Consider adding domain-specific features
3. **Model Optimization**: Further hyperparameter tuning could improve performance
4. **Monitoring**: Track model drift with validation accuracy over time

### Next Steps
1. Proceed to transformer-based models for comparison
2. Implement model serving API
3. Create monitoring dashboard
4. Set up retraining pipeline
"""

with open('../reports/classical_ml_final_report.md', 'w') as f:
    f.write(final_report)

print("‚úì Final report saved to '../reports/classical_ml_final_report.md'")

print("\n" + "="*60)
print("‚úÖ Classical ML Training Complete!")
print(f"üèÜ Best Model: {best_model_name} with {test_accuracy:.4f} accuracy")
print("="*60)