In [None]:
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Experimentos - Se√ß√£o 5.2: Compara√ß√£o com M√©todos de Aproxima√ß√£o\n",
    "\n",
    "Compara√ß√£o sistem√°tica entre FFA Formal, LIME, SHAP e Permutation Importance."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "import os\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "import pandas as pd\n",
    "\n",
    "# Adiciona src ao path\n",
    "sys.path.append('../src')\n",
    "\n",
    "from experiments.section_5_2 import run_section_5_2\n",
    "from utils.metrics import calculate_correlations, calculate_ranking_metrics\n",
    "from utils.visualization import plot_attribution_comparison\n",
    "\n",
    "# Configura√ß√µes de visualiza√ß√£o\n",
    "plt.style.use('default')\n",
    "sns.set_palette(\"husl\")\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1. Carregamento dos Resultados da Se√ß√£o 5.1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(\"üìÇ CARREGANDO RESULTADOS DA SE√á√ÉO 5.1\")\n",
    "print(\"=\" * 60)\n",
    "\n",
    "import pickle\n",
    "\n",
    "try:\n",
    "    with open('../data/results/section_5_1_results.pkl', 'rb') as f:\n",
    "        results_5_1 = pickle.load(f)\n",
    "    print(\"‚úÖ Resultados da Se√ß√£o 5.1 carregados com sucesso!\")\n",
    "    \n",
    "    # Mostra resumo\n",
    "    for dataset_name, data in results_5_1.items():\n",
    "        perf = data['performance']\n",
    "        print(f\"\\nüìä {dataset_name.upper()}:\")\n",
    "        print(f\"   ‚Ä¢ Acur√°cia Teste: {perf['test_accuracy']:.3f}\")\n",
    "        print(f\"   ‚Ä¢ Inst√¢ncias: {len(data['sample_indices'])}\")\n",
    "        \n",
    "except FileNotFoundError:\n",
    "    print(\"‚ùå Arquivo de resultados n√£o encontrado!\")\n",
    "    print(\"üí° Execute primeiro o notebook 02_experimentos_5_1.ipynb\")\n",
    "    results_5_1 = None"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2. Execu√ß√£o dos Experimentos da Se√ß√£o 5.2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "if results_5_1 is not None:\n",
    "    print(\"\\nüöÄ EXECUTANDO EXPERIMENTOS DA SE√á√ÉO 5.2\")\n",
    "    print(\"=\" * 60)\n",
    "    \n",
    "    results_5_2 = run_section_5_2(results_5_1)\n",
    "    \n",
    "    # Salva resultados\n",
    "    with open('../data/results/section_5_2_results.pkl', 'wb') as f:\n",
    "        pickle.dump(results_5_2, f)\n",
    "    print(\"\\n‚úÖ Resultados da Se√ß√£o 5.2 salvos em:'../data/results/section_5_2_results.pkl'\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3. An√°lise Comparativa Detalhada"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "if results_5_1 is not None:\n",
    "    print(\"üìä AN√ÅLISE COMPARATIVA DETALHADA\")\n",
    "    print(\"=\" * 60)\n",
    "    \n",
    "    all_correlations = []\n",
    "    \n",
    "    for dataset_name, data in results_5_2.items():\n",
    "        print(f\"\\nüéØ DATASET: {dataset_name.upper()}\")\n",
    "        print(\"-\" * 40)\n",
    "        \n",
    "        metrics_data = data['metrics']\n",
    "        \n",
    "        # Calcula estat√≠sticas agregadas\n",
    "        correlations_ffa_lime = []\n",
    "        correlations_ffa_shap = []\n",
    "        correlations_lime_shap = []\n",
    "        top_feature_agreements = []\n",
    "        \n",
    "        for instance_idx, metrics in metrics_data.items():\n",
    "            corrs = metrics['correlations']\n",
    "            ranking = metrics['ranking_metrics']\n",
    "            \n",
    "            correlations_ffa_lime.append(corrs['ffa_vs_lime'])\n",
    "            correlations_ffa_shap.append(corrs['ffa_vs_shap'])\n",
    "            correlations_lime_shap.append(corrs['lime_vs_shap'])\n",
    "            top_feature_agreements.append(ranking['top_feature_agreement'])\n",
    "            \n",
    "            # Armazena para an√°lise geral\n",
    "            all_correlations.append({\n",
    "                'dataset': dataset_name,\n",
    "                'instance': instance_idx,\n",
    "                'ffa_vs_lime': corrs['ffa_vs_lime'],\n",
    "                'ffa_vs_shap': corrs['ffa_vs_shap'],\n",
    "                'lime_vs_shap': corrs['lime_vs_shap']\n",
    "            })\n",
    "        \n",
    "        # Estat√≠sticas do dataset\n",
    "        print(f\"üìà Correla√ß√µes M√©dias (Kendall's Tau):\")\n",
    "        print(f\"   ‚Ä¢ FFA vs LIME:  {np.mean(correlations_ffa_lime):.3f} (¬±{np.std(correlations_ffa_lime):.3f})\")\n",
    "        print(f\"   ‚Ä¢ FFA vs SHAP:  {np.mean(correlations_ffa_shap):.3f} (¬±{np.std(correlations_ffa_shap):.3f})\")\n",
    "        print(f\"   ‚Ä¢ LIME vs SHAP: {np.mean(correlations_lime_shap):.3f} (¬±{np.std(correlations_lime_shap):.3f})\")\n",
    "        \n",
    "        print(f\"\\nüéØ Concord√¢ncia no Top Feature:\")\n",
    "        agreement_rate = np.mean(top_feature_agreements)\n",
    "        print(f\"   ‚Ä¢ Taxa de concord√¢ncia: {agreement_rate:.1%} ({sum(top_feature_agreements)}/{len(top_feature_agreements)} inst√¢ncias)\")\n",
    "        \n",
    "        # An√°lise de consist√™ncia\n",
    "        high_agreement_ffa_lime = sum(1 for corr in correlations_ffa_lime if corr > 0.5)\n",
    "        high_agreement_ffa_shap = sum(1 for corr in correlations_ffa_shap if corr > 0.5)\n",
    "        \n",
    "        print(f\"\\nüîç An√°lise de Consist√™ncia:\")\n",
    "        print(f\"   ‚Ä¢ Alta concord√¢ncia FFA-LIME:  {high_agreement_ffa_lime}/{len(correlations_ffa_lime)} inst√¢ncias\")\n",
    "        print(f\"   ‚Ä¢ Alta concord√¢ncia FFA-SHAP:  {high_agreement_ffa_shap}/{len(correlations_ffa_shap)} inst√¢ncias\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4. Visualiza√ß√£o das Compara√ß√µes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "if results_5_1 is not None:\n",
    "    print(\"üìà VISUALIZA√á√ÉO DAS COMPARA√á√ïES\")\n",
    "    print(\"=\" * 60)\n",
    "    \n",
    "    # Cria diret√≥rio para gr√°ficos\n",
    "    os.makedirs('../data/results/plots', exist_ok=True)\n",
    "    \n",
    "    for dataset_name, data in results_5_2.items():\n",
    "        sample_indices = data['sample_indices']\n",
    "        \n",
    "        for i, instance_idx in enumerate(sample_indices):\n",
    "            print(f\"\\nüìä Gerando gr√°fico: {dataset_name} - Inst√¢ncia {instance_idx}\")\n",
    "            \n",
    "            # Prepara dados para plotagem\n",
    "            plot_data = {\n",
    "                'feature_names': data['feature_names'],\n",
    "                'formal_attributions': data['formal_attributions'],\n",
    "                'lime_attributions': data['lime_attributions'],\n",
    "                'shap_attributions': data['shap_attributions'],\n",
    "                'permutation_importance': data['permutation_importance']\n",
    "            }\n",
    "            \n",
    "            save_path = f'../data/results/plots/{dataset_name}_instance_{instance_idx}.png'\n",
    "            plot_attribution_comparison(plot_data, dataset_name, i, save_path)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 5. An√°lise Estat√≠stica Agregada"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "if results_5_1 is not None:\n",
    "    print(\"üìä AN√ÅLISE ESTAT√çSTICA AGREGADA\")\n",
    "    print(\"=\" * 60)\n",
    "    \n",
    "    # Converte para DataFrame\n",
    "    df_correlations = pd.DataFrame(all_correlations)\n",
    "    \n",
    "    # Estat√≠sticas gerais\n",
    "    print(\"\\nüìà ESTAT√çSTICAS GERAIS DAS CORRELA√á√ïES:\")\n",
    "    print(\"-\" * 40)\n",
    "    \n",
    "    for comparison in ['ffa_vs_lime', 'ffa_vs_shap', 'lime_vs_shap']:\n",
    "        values = df_correlations[comparison]\n",
    "        print(f\"\\nüîç {comparison.upper()}:\")\n",
    "        print(f\"   ‚Ä¢ M√©dia: {values.mean():.3f}\")\n",
    "        print(f\"   ‚Ä¢ Desvio Padr√£o: {values.std():.3f}\")\n",
    "        print(f\"   ‚Ä¢ M√≠nimo: {values.min():.3f}\")\n",
    "        print(f\"   ‚Ä¢ M√°ximo: {values.max():.3f}\")\n",
    "        print(f\"   ‚Ä¢ > 0.5: {len(values[values > 0.5])}/{len(values)} ({len(values[values > 0.5])/len(values):.1%})\")\n",
    "        print(f\"   ‚Ä¢ < 0.0: {len(values[values < 0.0])}/{len(values)} ({len(values[values < 0.0])/len(values):.1%})\")\n",
    "    \n",
    "    # An√°lise por dataset\n",
    "    print(\"\\nüìä AN√ÅLISE POR DATASET:\")\n",
    "    print(\"-\" * 40)\n",
    "    \n",
    "    dataset_stats = df_correlations.groupby('dataset').agg({\n",
    "        'ffa_vs_lime': ['mean', 'std'],\n",
    "        'ffa_vs_shap': ['mean', 'std'],\n",
    "        'lime_vs_shap': ['mean', 'std']\n",
    "    }).round(3)\n",
    "    \n",
    "    print(dataset_stats)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 6. Visualiza√ß√£o das Correla√ß√µes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "if results_5_1 is not None:\n",
    "    print(\"üìà VISUALIZA√á√ÉO DAS CORRELA√á√ïES\")\n",
    "    print(\"=\" * 60)\n",
    "    \n",
    "    # Boxplot das correla√ß√µes\n",
    "    fig, axes = plt.subplots(1, 3, figsize=(18, 6))\n",
    "    \n",
    "    comparisons = ['ffa_vs_lime', 'ffa_vs_shap', 'lime_vs_shap']\n",
    "    titles = ['FFA vs LIME', 'FFA vs SHAP', 'LIME vs SHAP']\n",
    "    \n",
    "    for idx, (comp, title) in enumerate(zip(comparisons, titles)):\n",
    "        # Prepara dados\n",
    "        data_to_plot = []\n",
    "        labels = []\n",
    "        \n",
    "        for dataset_name in df_correlations['dataset'].unique():\n",
    "            dataset_data = df_correlations[df_correlations['dataset'] == dataset_name][comp]\n",
    "            data_to_plot.append(dataset_data)\n",
    "            labels.append(dataset_name)\n",
    "        \n",
    "        # Boxplot\n",
    "        bp = axes[idx].boxplot(data_to_plot, labels=labels, patch_artist=True)\n",
    "        \n",
    "        # Customiza cores\n",
    "        colors = ['lightblue', 'lightcoral']\n",
    "        for patch, color in zip(bp['boxes'], colors):\n",
    "            patch.set_facecolor(color)\n",
    "        \n",
    "        axes[idx].set_title(f'{title}\\nCorrela√ß√£o de Kendall', fontweight='bold')\n",
    "        axes[idx].set_ylabel('Correla√ß√£o')\n",
    "        axes[idx].grid(axis='y', alpha=0.3)\n",
    "        axes[idx].set_ylim(-1, 1)\n",
    "    \n",
    "    plt.tight_layout()\n",
    "    plt.show()\n",
    "    \n",
    "    # Heatmap de correla√ß√µes m√©dias\n",
    "    print(\"\\nüî• HEATMAP DE CORRELA√á√ïES M√âDIAS\")\n",
    "    \n",
    "    # Calcula matriz de correla√ß√µes m√©dias\n",
    "    correlation_matrix = df_correlations.groupby('dataset')[comparisons].mean()\n",
    "    \n",
    "    fig, ax = plt.subplots(figsize=(10, 6))\n",
    "    sns.heatmap(correlation_matrix, annot=True, cmap='RdBu_r', center=0, \n",
    "                vmin=-1, vmax=1, ax=ax, fmt='.3f')\n",
    "    ax.set_title('Correla√ß√µes M√©dias por Dataset e M√©todo', fontweight='bold', pad=20)\n",
    "    plt.tight_layout()\n",
    "    plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 7. An√°lise de Discord√¢ncias"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "if results_5_1 is not None:\n",
    "    print(\"üîç AN√ÅLISE DE DISCORD√ÇNCIAS\")\n",
    "    print(\"=\" * 60)\n",
    "    \n",
    "    # Identifica casos de maior discord√¢ncia\n",
    "    high_disagreement = df_correlations[\n",
    "        (df_correlations['ffa_vs_lime'] < 0) | \n",
    "        (df_correlations['ffa_vs_shap'] < 0)\n",
    "    ]\n",
    "    \n",
    "    print(f\"\\n‚ö†Ô∏è  CASOS DE DISCORD√ÇNCIA (correla√ß√£o negativa):\")\n",
    "    print(f\"   ‚Ä¢ Total de inst√¢ncias com discord√¢ncia: {len(high_disagreement)}/{len(df_correlations)}\")\n",
    "    print(f\"   ‚Ä¢ Taxa de discord√¢ncia: {len(high_disagreement)/len(df_correlations):.1%}\")\n",
    "    \n",
    "    if len(high_disagreement) > 0:\n",
    "        print(f\"\\nüìã INST√ÇNCIAS COM MAIOR DISCORD√ÇNCIA:\")\n",
    "        for _, row in high_disagreement.iterrows():\n",
    "            print(f\"   ‚Ä¢ {row['dataset']} - Inst√¢ncia {row['instance']}:\")\n",
    "            print(f\"     FFA vs LIME: {row['ffa_vs_lime']:.3f}, FFA vs SHAP: {row['ffa_vs_shap']:.3f}\")\n",
    "    \n",
    "    # An√°lise de concord√¢ncia entre LIME e SHAP\n",
    "    lime_shap_high_agreement = len(df_correlations[df_correlations['lime_vs_shap'] > 0.7])\n",
    "    lime_shap_low_agreement = len(df_correlations[df_correlations['lime_vs_shap'] < 0.3])\n",
    "    \n",
    "    print(f\"\\nüîó CONCORD√ÇNCIA LIME vs SHAP:\")\n",
    "    print(f\"   ‚Ä¢ Alta concord√¢ncia (>0.7): {lime_shap_high_agreement}/{len(df_correlations)} ({lime_shap_high_agreement/len(df_correlations):.1%})\")\n",
    "    print(f\"   ‚Ä¢ Baixa concord√¢ncia (<0.3): {lime_shap_low_agreement}/{len(df_correlations)} ({lime_shap_low_agreement/len(df_correlations):.1%})\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 8. Resumo Executivo e Conclus√µes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "if results_5_1 is not None:\n",
    "    print(\"üìã RESUMO EXECUTIVO - SE√á√ÉO 5.2\")\n",