# **Pelatihan dan Pengujian Model**

In [None]:
# Install library yang diperlukan
!pip install -U scikit-learn==1.6.1
!pip install joblib
!pip install scipy

# Import library
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, RandomizedSearchCV, StratifiedKFold
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.multiclass import OneVsRestClassifier
from sklearn.pipeline import Pipeline
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix, f1_score,
    precision_score, recall_score, roc_auc_score, roc_curve, auc,
    precision_recall_curve, average_precision_score, matthews_corrcoef,
    cohen_kappa_score, hamming_loss, jaccard_score, log_loss,
    balanced_accuracy_score, top_k_accuracy_score
)
from sklearn.preprocessing import LabelBinarizer
import seaborn as sns
import matplotlib.pyplot as plt
import joblib
from scipy.stats import loguniform, uniform
from joblib import Memory
import time
import psutil
import json
from collections import Counter
import warnings
warnings.filterwarnings('ignore')

# Setup caching
cachedir = './cache_dir'
memory = Memory(location=cachedir, verbose=0)

# Load data
df = pd.read_csv('tweets-labeled.csv')

# EDA Awal
print("Distribusi Kelas:")
class_dist = df['label'].value_counts(normalize=True)
print(class_dist)

# Visualisasi distribusi kelas
plt.figure(figsize=(8, 4))
sns.barplot(x=class_dist.index, y=class_dist.values)
plt.title('Distribusi Kelas Label')
plt.ylabel('Proporsi')
plt.xlabel('Kelas')
plt.savefig('class_distribution.png', bbox_inches='tight')
plt.show()

# Split data dengan stratifikasi
X = df['cleaned_text']
y = df['label']
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=y
)

print(f"\nUkuran Dataset:\nTrain: {X_train.shape[0]} sampel\nTest: {X_test.shape[0]} sampel")

# Pipeline dengan One-vs-Rest (OvR)
pipeline = Pipeline([
    ('tfidf', TfidfVectorizer(
        sublinear_tf=True,
    )),
    ('ovr', OneVsRestClassifier(
        LinearSVC(
            class_weight='balanced',
            dual=False,
            random_state=42,
            max_iter=2000
        ),
        n_jobs=-1
    ))
], memory=memory)

# Parameter space yang dioptimalkan untuk OvR
param_dist = {
    'tfidf__ngram_range': [(1, 1), (1, 2), (2, 2)],
    'tfidf__max_df': uniform(0.65, 0.3),
    'tfidf__min_df': [0.0005, 0.001, 0.002, 0.005],
    'tfidf__max_features': [8000, 12000, 15000, 20000],
    'tfidf__use_idf': [True],
    'ovr__estimator__C': loguniform(1e-2, 1e3),
    'ovr__estimator__tol': [1e-4, 1e-3, 1e-2],
    'ovr__estimator__loss': ['squared_hinge']
}

# Konfigurasi RandomizedSearch
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
search = RandomizedSearchCV(
    estimator=pipeline,
    param_distributions=param_dist,
    n_iter=100,
    cv=skf,
    scoring='f1_weighted',
    n_jobs=-1,
    random_state=42,
    verbose=2,
    refit=True,
    error_score='raise',
    return_train_score=True
)

# Monitoring sumber daya sebelum training
start_time = time.time()
mem_before = psutil.virtual_memory().used / (1024 ** 3)

# Training dengan RandomizedSearchCV
print("\nMemulai proses tuning hyperparameter dengan One-vs-Rest...")
search.fit(X_train, y_train)

# Monitoring sumber daya setelah training
training_time = time.time() - start_time
mem_after = psutil.virtual_memory().used / (1024 ** 3)
print(f"\nWaktu training: {training_time/60:.2f} menit")
print(f"Penggunaan memori: {mem_after - mem_before:.2f} GB")

# Analisis hasil tuning
print("\n=== Hasil Tuning (One-vs-Rest) ===")
print("Parameter Terbaik:", search.best_params_)
print("Skor F1 Terbaik (Validasi):", search.best_score_)

# Evaluasi pada test set
best_model = search.best_estimator_
y_pred = best_model.predict(X_test)
y_pred_proba = best_model.decision_function(X_test)

# ==================== COMPREHENSIVE EVALUATION METRICS ====================

print("\n" + "="*60)
print("COMPREHENSIVE EVALUATION METRICS")
print("="*60)

# Basic Classification Metrics
print("\n=== 1. BASIC CLASSIFICATION METRICS ===")
accuracy = accuracy_score(y_test, y_pred)
balanced_acc = balanced_accuracy_score(y_test, y_pred)
precision_macro = precision_score(y_test, y_pred, average='macro')
precision_micro = precision_score(y_test, y_pred, average='micro')
precision_weighted = precision_score(y_test, y_pred, average='weighted')
recall_macro = recall_score(y_test, y_pred, average='macro')
recall_micro = recall_score(y_test, y_pred, average='micro')
recall_weighted = recall_score(y_test, y_pred, average='weighted')
f1_macro = f1_score(y_test, y_pred, average='macro')
f1_micro = f1_score(y_test, y_pred, average='micro')
f1_weighted = f1_score(y_test, y_pred, average='weighted')

print(f"Accuracy: {accuracy:.4f}")
print(f"Balanced Accuracy: {balanced_acc:.4f}")
print(f"Precision (Macro): {precision_macro:.4f}")
print(f"Precision (Micro): {precision_micro:.4f}")
print(f"Precision (Weighted): {precision_weighted:.4f}")
print(f"Recall (Macro): {recall_macro:.4f}")
print(f"Recall (Micro): {recall_micro:.4f}")
print(f"Recall (Weighted): {recall_weighted:.4f}")
print(f"F1-Score (Macro): {f1_macro:.4f}")
print(f"F1-Score (Micro): {f1_micro:.4f}")
print(f"F1-Score (Weighted): {f1_weighted:.4f}")



# Top-k Accuracy (untuk k=2, k=3)
if len(best_model.classes_) > 2:
    for k in [2, 3]:
        if k < len(best_model.classes_):
            top_k_acc = top_k_accuracy_score(y_test, y_pred_proba, k=k, labels=best_model.classes_)
            print(f"Top-{k} Accuracy: {top_k_acc:.4f}")


# Detailed Classification Report
print("\n=== 6. DETAILED CLASSIFICATION REPORT ===")
print(classification_report(y_test, y_pred, digits=4))

# Per-Class Metrics Summary
print("\n=== 7. PER-CLASS METRICS SUMMARY ===")
per_class_precision = precision_score(y_test, y_pred, average=None)
per_class_recall = recall_score(y_test, y_pred, average=None)
per_class_f1 = f1_score(y_test, y_pred, average=None)

metrics_df = pd.DataFrame({
    'Class': best_model.classes_,
    'Precision': per_class_precision,
    'Recall': per_class_recall,
    'F1-Score': per_class_f1,
    'Support': [sum(y_test == cls) for cls in best_model.classes_]
})
print(metrics_df.round(4))

# ==================== ADVANCED VISUALIZATIONS ====================

print("\n" + "="*60)
print("ADVANCED VISUALIZATIONS")
print("="*60)

# Enhanced Confusion Matrix
fig, axes = plt.subplots(2, 2, figsize=(15, 12))

# Normalized confusion matrix
cm_norm = confusion_matrix(y_test, y_pred, normalize='true')
sns.heatmap(cm_norm, annot=True, fmt=".2%", cmap='Blues',
           xticklabels=best_model.classes_,
           yticklabels=best_model.classes_, ax=axes[0,0])
axes[0,0].set_title('Confusion Matrix (Normalized)')
axes[0,0].set_xlabel('Predicted')
axes[0,0].set_ylabel('True')

# Absolute confusion matrix
cm_abs = confusion_matrix(y_test, y_pred)
sns.heatmap(cm_abs, annot=True, fmt="d", cmap='Blues',
           xticklabels=best_model.classes_,
           yticklabels=best_model.classes_, ax=axes[0,1])
axes[0,1].set_title('Confusion Matrix (Absolute)')
axes[0,1].set_xlabel('Predicted')
axes[0,1].set_ylabel('True')

# Per-class metrics bar plot
metrics_plot = metrics_df.set_index('Class')[['Precision', 'Recall', 'F1-Score']]
metrics_plot.plot(kind='bar', ax=axes[1,0])
axes[1,0].set_title('Per-Class Metrics Comparison')
axes[1,0].set_ylabel('Score')
axes[1,0].legend()
axes[1,0].tick_params(axis='x', rotation=45)

# Class distribution vs Performance
class_counts = pd.Series(y_test).value_counts()
performance_data = pd.DataFrame({
    'Class': best_model.classes_,
    'F1-Score': per_class_f1,
    'Sample_Count': [class_counts[cls] for cls in best_model.classes_]
})

scatter = axes[1,1].scatter(performance_data['Sample_Count'],
                          performance_data['F1-Score'],
                          s=100, alpha=0.7, c=range(len(best_model.classes_)), cmap='viridis')
axes[1,1].set_xlabel('Number of Test Samples')
axes[1,1].set_ylabel('F1-Score')
axes[1,1].set_title('F1-Score vs Sample Count')
for i, txt in enumerate(performance_data['Class']):
    axes[1,1].annotate(txt, (performance_data['Sample_Count'].iloc[i],
                            performance_data['F1-Score'].iloc[i]),
                      xytext=(5, 5), textcoords='offset points', fontsize=8)

plt.tight_layout()
plt.savefig('comprehensive_evaluation_plots.png', bbox_inches='tight', dpi=300)
plt.show()



# Decision Score Distribution Analysis
plt.figure(figsize=(15, 10))

# Decision scores distribution for each class
n_classes = len(best_model.classes_)
fig, axes = plt.subplots(2, (n_classes + 1) // 2, figsize=(15, 8))
axes = axes.flatten() if n_classes > 2 else [axes]

for i, class_name in enumerate(best_model.classes_):
    if i < len(axes):
        # Get decision scores for this class
        class_scores = y_pred_proba[:, i]

        # Separate scores by true label
        true_class_scores = class_scores[y_test == class_name]
        other_class_scores = class_scores[y_test != class_name]

        axes[i].hist(other_class_scores, bins=30, alpha=0.7, label=f'Other Classes', color='red')
        axes[i].hist(true_class_scores, bins=30, alpha=0.7, label=f'True {class_name}', color='blue')
        axes[i].set_title(f'Decision Scores: {class_name}')
        axes[i].set_xlabel('Decision Score')
        axes[i].set_ylabel('Frequency')
        axes[i].legend()
        axes[i].grid(True, alpha=0.3)

# Remove empty subplots
for i in range(n_classes, len(axes)):
    fig.delaxes(axes[i])

plt.tight_layout()
plt.savefig('decision_scores_distribution.png', bbox_inches='tight', dpi=300)
plt.show()

# ==================== MODEL ANALYSIS ====================

print("\n" + "="*60)
print("MODEL ANALYSIS")
print("="*60)

# Feature Importance Analysis
if hasattr(best_model.named_steps['ovr'], 'estimators_'):
    feature_names = best_model.named_steps['tfidf'].get_feature_names_out()

    print("\n=== FEATURE IMPORTANCE ANALYSIS ===")

    # Create comprehensive feature importance analysis
    feature_importance_data = []

    for i, (class_label, estimator) in enumerate(zip(best_model.classes_, best_model.named_steps['ovr'].estimators_)):
        if hasattr(estimator, 'coef_'):
            coef = estimator.coef_[0]

            # Top positive and negative features
            top_positive_indices = np.argsort(coef)[-10:][::-1]
            top_negative_indices = np.argsort(coef)[:10]

            for idx in top_positive_indices:
                feature_importance_data.append({
                    'Class': class_label,
                    'Feature': feature_names[idx],
                    'Weight': coef[idx],
                    'Type': 'Positive'
                })

            for idx in top_negative_indices:
                feature_importance_data.append({
                    'Class': class_label,
                    'Feature': feature_names[idx],
                    'Weight': coef[idx],
                    'Type': 'Negative'
                })

    # Convert to DataFrame for analysis
    feature_df = pd.DataFrame(feature_importance_data)

    # Plot feature importance
    fig, axes = plt.subplots(len(best_model.classes_), 1, figsize=(12, 4*len(best_model.classes_)))
    if len(best_model.classes_) == 1:
        axes = [axes]

    for i, class_name in enumerate(best_model.classes_):
        class_features = feature_df[feature_df['Class'] == class_name]
        class_features_sorted = class_features.reindex(class_features['Weight'].abs().sort_values(ascending=True).index)

        colors = ['red' if x < 0 else 'blue' for x in class_features_sorted['Weight']]
        axes[i].barh(range(len(class_features_sorted)), class_features_sorted['Weight'], color=colors, alpha=0.7)
        axes[i].set_yticks(range(len(class_features_sorted)))
        axes[i].set_yticklabels(class_features_sorted['Feature'], fontsize=8)
        axes[i].set_title(f'Feature Importance: {class_name}')
        axes[i].set_xlabel('Weight')
        axes[i].grid(True, alpha=0.3)
        axes[i].axvline(x=0, color='black', linestyle='-', alpha=0.5)

    plt.tight_layout()
    plt.savefig('feature_importance_analysis.png', bbox_inches='tight', dpi=300)
    plt.show()

# Cross-Validation Analysis
print("\n=== CROSS-VALIDATION ANALYSIS ===")
cv_results = pd.DataFrame(search.cv_results_)
cv_results_sorted = cv_results.sort_values(by='rank_test_score').head(10)

print("Top 10 Parameter Combinations:")
for i, (idx, row) in enumerate(cv_results_sorted.iterrows()):
    print(f"\nRank {i+1}:")
    print(f"  Mean CV Score: {row['mean_test_score']:.4f} (+/- {row['std_test_score']*2:.4f})")
    print(f"  Parameters: {row['params']}")

# CV scores visualization
plt.figure(figsize=(12, 8))
plt.subplot(2, 2, 1)
plt.scatter(cv_results['mean_train_score'], cv_results['mean_test_score'],
           alpha=0.6, c=cv_results['param_ovr__estimator__C'], cmap='viridis')
plt.colorbar(label='C parameter')
plt.xlabel('Mean Train Score')
plt.ylabel('Mean CV Score')
plt.title('Train vs CV Score')
plt.plot([0, 1], [0, 1], 'k--', alpha=0.5)

plt.subplot(2, 2, 2)
plt.scatter(cv_results['mean_test_score'], cv_results['std_test_score'], alpha=0.6)
plt.xlabel('Mean CV Score')
plt.ylabel('Std CV Score')
plt.title('CV Score vs Stability')

plt.subplot(2, 2, 3)
plt.hist(cv_results['mean_test_score'], bins=20, alpha=0.7)
plt.xlabel('Mean CV Score')
plt.ylabel('Frequency')
plt.title('Distribution of CV Scores')

plt.subplot(2, 2, 4)
top_params = cv_results_sorted.head(20)
plt.scatter(top_params['param_ovr__estimator__C'], top_params['mean_test_score'], alpha=0.7)
plt.xscale('log')
plt.xlabel('C Parameter (log scale)')
plt.ylabel('Mean CV Score')
plt.title('C Parameter vs Performance')

plt.tight_layout()
plt.savefig('cross_validation_analysis.png', bbox_inches='tight', dpi=300)
plt.show()

# ==================== COMPREHENSIVE SUMMARY ====================

print("\n" + "="*60)
print("COMPREHENSIVE MODEL SUMMARY")
print("="*60)

# Create summary dictionary
summary = {
    'Model_Type': 'One-vs-Rest Linear SVM',
    'Dataset_Size': {
        'Train': len(X_train),
        'Test': len(X_test),
        'Features': len(feature_names) if 'feature_names' in locals() else 'N/A'
    },
    'Classes': {
        'Count': len(best_model.classes_),
        'Names': list(best_model.classes_)
    },
    'Performance_Metrics': {
        'Accuracy': round(accuracy, 4),
        'Balanced_Accuracy': round(balanced_acc, 4),
        'F1_Macro': round(f1_macro, 4),
        'F1_Micro': round(f1_micro, 4),
        'F1_Weighted': round(f1_weighted, 4),
        'Precision_Macro': round(precision_macro, 4),
        'Recall_Macro': round(recall_macro, 4),
    },
    'Best_Parameters': search.best_params_,
    'Cross_Validation': {
        'Best_Score': round(search.best_score_, 4),
        'Std_Score': round(cv_results.loc[cv_results['rank_test_score'] == 1, 'std_test_score'].iloc[0], 4)
    },
    'Training_Info': {
        'Training_Time_Minutes': round(training_time/60, 2),
        'Memory_Usage_GB': round(mem_after - mem_before, 2)
    }
}

# Print summary
print(json.dumps(summary, indent=2))

# Save summary to file
with open('model_evaluation_summary.json', 'w') as f:
    json.dump(summary, f, indent=2)

# Save detailed metrics to CSV
detailed_metrics = pd.DataFrame({
    'Metric': ['Accuracy', 'Balanced_Accuracy', 'Precision_Macro', 'Precision_Micro', 'Precision_Weighted',
               'Recall_Macro', 'Recall_Micro', 'Recall_Weighted', 'F1_Macro', 'F1_Micro', 'F1_Weighted'
               ],
    'Score': [accuracy, balanced_acc, precision_macro, precision_micro, precision_weighted,
              recall_macro, recall_micro, recall_weighted, f1_macro, f1_micro, f1_weighted,
              ]
})



if 'avg_precision_macro' in locals():
    detailed_metrics = pd.concat([detailed_metrics, pd.DataFrame({
        'Metric': ['Avg_Precision_Macro', 'Avg_Precision_Micro', 'Avg_Precision_Weighted'],
    })], ignore_index=True)

detailed_metrics.to_csv('detailed_evaluation_metrics.csv', index=False)

# Save per-class metrics
metrics_df.to_csv('per_class_metrics.csv', index=False)

# Save model
joblib.dump(best_model, 'optimized_svm_ovr_model.pkl')

print(f"\n=== FILES SAVED ===")
print("1. optimized_svm_ovr_model.pkl - Trained model")
print("2. model_evaluation_summary.json - Comprehensive summary")
print("3. detailed_evaluation_metrics.csv - All metrics")
print("4. per_class_metrics.csv - Per-class performance")
print("5. Multiple visualization PNG files")

# Clean up
memory.clear(warn=False)

# ==================== ADDITIONAL ADVANCED ANALYSIS ====================

print("\n" + "="*60)
print("ADDITIONAL ADVANCED ANALYSIS")
print("="*60)

# 1. Learning Curve Analysis (if computational resources allow)
print("\n=== LEARNING CURVE ANALYSIS ===")
from sklearn.model_selection import learning_curve

try:
    # Use a subset for learning curve to save computation time
    sample_sizes = np.linspace(0.1, 1.0, 10)
    train_sizes, train_scores, val_scores = learning_curve(
        best_model, X_train, y_train,
        train_sizes=sample_sizes,
        cv=3, scoring='f1_weighted', n_jobs=-1, random_state=42
    )

    # Calculate mean and std
    train_mean = np.mean(train_scores, axis=1)
    train_std = np.std(train_scores, axis=1)
    val_mean = np.mean(val_scores, axis=1)
    val_std = np.std(val_scores, axis=1)

    plt.figure(figsize=(10, 6))
    plt.plot(train_sizes, train_mean, 'o-', color='blue', label='Training Score')
    plt.fill_between(train_sizes, train_mean - train_std, train_mean + train_std, alpha=0.1, color='blue')
    plt.plot(train_sizes, val_mean, 'o-', color='red', label='Validation Score')
    plt.fill_between(train_sizes, val_mean - val_std, val_mean + val_std, alpha=0.1, color='red')
    plt.xlabel('Training Set Size')
    plt.ylabel('F1 Score (Weighted)')
    plt.title('Learning Curve')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.savefig('learning_curve.png', bbox_inches='tight', dpi=300)
    plt.show()

    print(f"Final training score: {train_mean[-1]:.4f} (+/- {train_std[-1]:.4f})")
    print(f"Final validation score: {val_mean[-1]:.4f} (+/- {val_std[-1]:.4f})")

except Exception as e:
    print(f"Learning curve analysis failed: {e}")

# Validation Curve for Key Hyperparameters
print("\n=== VALIDATION CURVE ANALYSIS ===")
from sklearn.model_selection import validation_curve

try:
    # Validation curve for C parameter
    param_range = np.logspace(-2, 2, 10)
    train_scores, val_scores = validation_curve(
        Pipeline([
            ('tfidf', TfidfVectorizer(**{k.replace('tfidf__', ''): v for k, v in search.best_params_.items() if k.startswith('tfidf__')})),
            ('ovr', OneVsRestClassifier(LinearSVC(class_weight='balanced', dual=False, random_state=42), n_jobs=-1))
        ]),
        X_train, y_train, param_name='ovr__estimator__C', param_range=param_range,
        cv=3, scoring='f1_weighted', n_jobs=-1
    )

    train_mean = np.mean(train_scores, axis=1)
    train_std = np.std(train_scores, axis=1)
    val_mean = np.mean(val_scores, axis=1)
    val_std = np.std(val_scores, axis=1)

    plt.figure(figsize=(10, 6))
    plt.semilogx(param_range, train_mean, 'o-', color='blue', label='Training Score')
    plt.fill_between(param_range, train_mean - train_std, train_mean + train_std, alpha=0.1, color='blue')
    plt.semilogx(param_range, val_mean, 'o-', color='red', label='Validation Score')
    plt.fill_between(param_range, val_mean - val_std, val_mean + val_std, alpha=0.1, color='red')
    plt.xlabel('C Parameter')
    plt.ylabel('F1 Score (Weighted)')
    plt.title('Validation Curve for C Parameter')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.savefig('validation_curve_C.png', bbox_inches='tight', dpi=300)
    plt.show()

except Exception as e:
    print(f"Validation curve analysis failed: {e}")

# 3. Error Analysis
print("\n=== ERROR ANALYSIS ===")

# Get misclassified samples
misclassified_mask = y_test != y_pred
misclassified_indices = np.where(misclassified_mask)[0]

print(f"Total misclassified samples: {len(misclassified_indices)}")
print(f"Misclassification rate: {len(misclassified_indices)/len(y_test)*100:.2f}%")

# Analyze misclassification patterns
misclass_df = pd.DataFrame({
    'True_Label': y_test.iloc[misclassified_indices],
    'Predicted_Label': y_pred[misclassified_indices],
    'Text': X_test.iloc[misclassified_indices]
})

# Most common misclassification patterns
print("\nMost Common Misclassification Patterns:")
misclass_patterns = misclass_df.groupby(['True_Label', 'Predicted_Label']).size().sort_values(ascending=False)
print(misclass_patterns.head(10))

# Misclassification matrix (confusion matrix for errors only)
plt.figure(figsize=(10, 8))
misclass_matrix = pd.crosstab(misclass_df['True_Label'], misclass_df['Predicted_Label'])
sns.heatmap(misclass_matrix, annot=True, fmt='d', cmap='Reds')
plt.title('Misclassification Matrix (Error Patterns)')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.savefig('misclassification_matrix.png', bbox_inches='tight', dpi=300)
plt.show()

# Confidence Analysis
print("\n=== CONFIDENCE ANALYSIS ===")

# Calculate confidence scores (max decision score for each prediction)
confidence_scores = np.max(y_pred_proba, axis=1)
correct_predictions = y_test == y_pred

# Analyze confidence distribution
plt.figure(figsize=(12, 8))

plt.subplot(2, 2, 1)
plt.hist(confidence_scores[correct_predictions], bins=30, alpha=0.7, label='Correct', color='green')
plt.hist(confidence_scores[~correct_predictions], bins=30, alpha=0.7, label='Incorrect', color='red')
plt.xlabel('Confidence Score')
plt.ylabel('Frequency')
plt.title('Confidence Distribution: Correct vs Incorrect')
plt.legend()

plt.subplot(2, 2, 2)
confidence_bins = np.linspace(confidence_scores.min(), confidence_scores.max(), 10)
bin_accuracy = []
bin_centers = []
for i in range(len(confidence_bins)-1):
    mask = (confidence_scores >= confidence_bins[i]) & (confidence_scores < confidence_bins[i+1])
    if np.sum(mask) > 0:
        bin_accuracy.append(np.mean(correct_predictions[mask]))
        bin_centers.append((confidence_bins[i] + confidence_bins[i+1]) / 2)

plt.plot(bin_centers, bin_accuracy, 'o-')
plt.plot([confidence_scores.min(), confidence_scores.max()],
         [confidence_scores.min(), confidence_scores.max()], 'k--', alpha=0.5)
plt.xlabel('Confidence Score')
plt.ylabel('Accuracy')
plt.title('Reliability Diagram')

plt.subplot(2, 2, 3)
for class_name in best_model.classes_:
    class_mask = y_test == class_name
    if np.sum(class_mask) > 0:
        plt.hist(confidence_scores[class_mask], bins=20, alpha=0.5, label=class_name)
plt.xlabel('Confidence Score')
plt.ylabel('Frequency')
plt.title('Confidence by True Class')
plt.legend()

plt.subplot(2, 2, 4)
plt.scatter(confidence_scores, correct_predictions.astype(int), alpha=0.6)
plt.xlabel('Confidence Score')
plt.ylabel('Correct (1) / Incorrect (0)')
plt.title('Confidence vs Correctness')

plt.tight_layout()
plt.savefig('confidence_analysis.png', bbox_inches='tight', dpi=300)
plt.show()

# Feature Analysis Deep Dive
print("\n=== FEATURE ANALYSIS DEEP DIVE ===")

if hasattr(best_model.named_steps['ovr'], 'estimators_'):
    # Analyze feature usage across all classifiers
    all_features = best_model.named_steps['tfidf'].get_feature_names_out()
    feature_usage = np.zeros(len(all_features))

    for estimator in best_model.named_steps['ovr'].estimators_:
        if hasattr(estimator, 'coef_'):
            feature_usage += np.abs(estimator.coef_[0])

    # Most important features overall
    top_feature_indices = np.argsort(feature_usage)[-50:][::-1]

    print("Top 50 Most Important Features Across All Classes:")
    for i, idx in enumerate(top_feature_indices):
        print(f"{i+1:2d}. {all_features[idx]}: {feature_usage[idx]:.4f}")

    # Feature importance visualization
    plt.figure(figsize=(12, 10))
    top_features = all_features[top_feature_indices[:20]]
    top_weights = feature_usage[top_feature_indices[:20]]

    plt.barh(range(len(top_features)), top_weights)
    plt.yticks(range(len(top_features)), top_features)
    plt.xlabel('Cumulative Absolute Weight')
    plt.title('Top 20 Most Important Features (Cumulative)')
    plt.gca().invert_yaxis()
    plt.tight_layout()
    plt.savefig('top_features_cumulative.png', bbox_inches='tight', dpi=300)
    plt.show()


# Performance Stability Analysis
print("\n=== PERFORMANCE STABILITY ANALYSIS ===")

# Analyze CV scores distribution
cv_scores = []
for fold in range(skf.n_splits):
    fold_col = f'split{fold}_test_score'
    if fold_col in cv_results.columns:
        cv_scores.extend(cv_results[fold_col].values)

if cv_scores:
    plt.figure(figsize=(12, 6))

    plt.subplot(1, 2, 1)
    plt.hist(cv_scores, bins=30, alpha=0.7)
    plt.xlabel('CV Score')
    plt.ylabel('Frequency')
    plt.title('Distribution of All CV Scores')
    plt.axvline(np.mean(cv_scores), color='red', linestyle='--', label=f'Mean: {np.mean(cv_scores):.4f}')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.boxplot([cv_results[f'split{fold}_test_score'].values for fold in range(skf.n_splits)
                 if f'split{fold}_test_score' in cv_results.columns])
    plt.xlabel('CV Fold')
    plt.ylabel('Score')
    plt.title('Score Distribution by CV Fold')

    plt.tight_layout()
    plt.savefig('performance_stability.png', bbox_inches='tight', dpi=300)
    plt.show()

    print(f"CV Score Statistics:")
    print(f"  Mean: {np.mean(cv_scores):.4f}")
    print(f"  Std: {np.std(cv_scores):.4f}")
    print(f"  Min: {np.min(cv_scores):.4f}")
    print(f"  Max: {np.max(cv_scores):.4f}")

# Model Complexity Analysis
print("\n=== MODEL COMPLEXITY ANALYSIS ===")

# Analyze sparsity of the model
total_features = len(all_features)
non_zero_features = 0
total_coefficients = 0

for estimator in best_model.named_steps['ovr'].estimators_:
    if hasattr(estimator, 'coef_'):
        coef = estimator.coef_[0]
        non_zero_features += np.sum(coef != 0)
        total_coefficients += len(coef)

sparsity = 1 - (non_zero_features / total_coefficients)
print(f"Model Sparsity: {sparsity:.4f} ({non_zero_features}/{total_coefficients} non-zero coefficients)")
print(f"Average non-zero features per classifier: {non_zero_features/len(best_model.named_steps['ovr'].estimators_):.0f}")


# ==================== CROSS-VALIDATION FOLD PERFORMANCE ====================
print("\n" + "="*60)
print("CROSS-VALIDATION FOLD PERFORMANCE (BEST MODEL)")
print("="*60)

# Ambil indeks model terbaik
best_index = search.best_index_

# Ambil skor untuk setiap fold
fold_scores = []
for i in range(skf.n_splits):
    fold_score = search.cv_results_[f'split{i}_test_score'][best_index]
    fold_scores.append(fold_score)
    print(f"Fold {i+1} F1 Weighted: {fold_score:.4f}")

# Hitung statistik
mean_score = np.mean(fold_scores)
std_score = np.std(fold_scores)
min_score = np.min(fold_scores)
max_score = np.max(fold_scores)

print("\nSummary:")
print(f"Rata-rata F1: {mean_score:.4f}")
print(f"Standar Deviasi: {std_score:.4f}")
print(f"Min F1: {min_score:.4f}")
print(f"Max F1: {max_score:.4f}")

# Visualisasi kinerja per fold
plt.figure(figsize=(10, 6))
plt.plot(range(1, skf.n_splits+1), fold_scores, 'o-', label='F1 per Fold')
plt.axhline(y=mean_score, color='r', linestyle='--', label=f'Rata-rata ({mean_score:.4f})')
plt.fill_between(range(1, skf.n_splits+1),
                 mean_score - std_score,
                 mean_score + std_score,
                 alpha=0.1, color='g', label='±1 SD')

plt.xlabel('Fold')
plt.ylabel('F1 Score (Weighted)')
plt.title('Kinerja Model Terbaik pada Setiap Fold Cross-Validation')
plt.xticks(range(1, skf.n_splits+1))
plt.legend()
plt.grid(True, alpha=0.3)
plt.savefig('best_model_cv_performance.png', bbox_inches='tight', dpi=300)
plt.show()

# Tambahkan informasi ke summary
summary['Cross_Validation']['Fold_Scores'] = fold_scores
summary['Cross_Validation']['Mean'] = round(mean_score, 4)
summary['Cross_Validation']['Std'] = round(std_score, 4)
summary['Cross_Validation']['Min'] = round(min_score, 4)
summary['Cross_Validation']['Max'] = round(max_score, 4)

# Final Comprehensive Report
print("\n" + "="*60)
print("FINAL COMPREHENSIVE EVALUATION REPORT")
print("="*60)

final_report = f"""
MODEL PERFORMANCE SUMMARY
========================
✓ Model Type: One-vs-Rest Linear SVM
✓ Classes: {len(best_model.classes_)} ({', '.join(best_model.classes_)})
✓ Training Samples: {len(X_train):,}
✓ Test Samples: {len(X_test):,}
✓ Features: {total_features:,}

PERFORMANCE METRICS
==================
✓ Accuracy: {accuracy:.4f}
✓ Balanced Accuracy: {balanced_acc:.4f}
✓ F1-Score (Macro): {f1_macro:.4f}
✓ F1-Score (Weighted): {f1_weighted:.4f}
✓ Precision (Macro): {precision_macro:.4f}
✓ Recall (Macro): {recall_macro:.4f}
"""



final_report += f"""
MODEL CHARACTERISTICS
====================
✓ Model Sparsity: {sparsity:.4f}
✓ Training Time: {training_time/60:.2f} minutes
✓ Memory Usage: {mem_after - mem_before:.2f} GB
✓ Cross-Validation Score: {search.best_score_:.4f}
✓ Hyperparameter Combinations Tested: {len(cv_results)}

BEST HYPERPARAMETERS
===================
"""

for param, value in search.best_params_.items():
    final_report += f"✓ {param}: {value}\n"

final_report += f"""
ERROR ANALYSIS
=============
✓ Misclassified Samples: {len(misclassified_indices)} ({len(misclassified_indices)/len(y_test)*100:.2f}%)
✓ Most Problematic Class Pairs: {misclass_patterns.head(3).to_dict()}

RECOMMENDATIONS
==============
"""

if accuracy > 0.8:
    final_report += "✓ Model shows GOOD performance\n"
elif accuracy > 0.7:
    final_report += "✓ Model shows ACCEPTABLE performance\n"
else:
    final_report += "⚠ Model shows POOR performance - consider feature engineering or different algorithms\n"

if sparsity > 0.8:
    final_report += "✓ Model is highly sparse - good for interpretability\n"
elif sparsity > 0.5:
    final_report += "✓ Model has moderate sparsity\n"
elif sparsity > 0.2:
    final_report += "⚠ Model uses most features - consider feature selection\n"

else:
    final_report += "⚠ Weak correlation - model may not be reliable\n"

print(final_report)

# Save final report
with open('final_evaluation_report.txt', 'w') as f:
    f.write(final_report)

print("\n=== ALL ANALYSIS COMPLETE ===")
print("Generated files:")
print("1. optimized_svm_ovr_model.pkl - Trained model")
print("2. model_evaluation_summary.json - JSON summary")
print("3. detailed_evaluation_metrics.csv - All metrics")
print("4. per_class_metrics.csv - Per-class performance")
print("5. final_evaluation_report.txt - Human-readable report")
print("6. Multiple visualization PNG files")
print("\nCache cleared successfully!")
print("="*60)