In [None]:
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Experimentos - Se√ß√£o 5.1: Datasets Sint√©ticos\n",
    "\n",
    "Reprodu√ß√£o dos experimentos com datasets sint√©ticos lineares e n√£o-lineares."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "import os\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "\n",
    "# Adiciona src ao path\n",
    "sys.path.append('../src')\n",
    "\n",
    "from experiments.section_5_1 import run_section_5_1\n",
    "from formal_ffa import FormalFFA, HeuristicFFA\n",
    "\n",
    "# Configura√ß√µes de visualiza√ß√£o\n",
    "plt.style.use('default')\n",
    "sns.set_palette(\"husl\")\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1. Execu√ß√£o dos Experimentos da Se√ß√£o 5.1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(\"üöÄ EXECUTANDO EXPERIMENTOS DA SE√á√ÉO 5.1\")\n",
    "print(\"=\" * 60)\n",
    "\n",
    "results_5_1 = run_section_5_1()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2. An√°lise Detalhada dos Resultados"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(\"üìä AN√ÅLISE DETALHADA DOS RESULTADOS\")\n",
    "print(\"=\" * 60)\n",
    "\n",
    "for dataset_name, data in results_5_1.items():\n",
    "    print(f\"\\nüéØ DATASET: {dataset_name.upper()}\")\n",
    "    print(\"-\" * 40)\n",
    "    \n",
    "    # Performance do modelo\n",
    "    perf = data['performance']\n",
    "    print(f\"üìà Performance do Modelo:\")\n",
    "    print(f\"   ‚Ä¢ Acur√°cia Treino: {perf['train_accuracy']:.3f}\")\n",
    "    print(f\"   ‚Ä¢ Acur√°cia Teste:  {perf['test_accuracy']:.3f}\")\n",
    "    \n",
    "    # An√°lise das atribui√ß√µes\n",
    "    heuristic_attrs = data['heuristic_attributions']\n",
    "    formal_attrs = data['formal_attributions']\n",
    "    sample_indices = data['sample_indices']\n",
    "    feature_names = data['feature_names']\n",
    "    \n",
    "    print(f\"\\nüîç An√°lise das Atribui√ß√µes:\")\n",
    "    for i, idx in enumerate(sample_indices):\n",
    "        heuristic_attr = heuristic_attrs[i]\n",
    "        formal_attr = formal_attrs[i]\n",
    "        \n",
    "        # Top features\n",
    "        top_heuristic = feature_names[np.argmax(heuristic_attr)]\n",
    "        top_formal = feature_names[np.argmax(formal_attr)]\n",
    "        \n",
    "        # Correla√ß√£o entre m√©todos\n",
    "        from scipy.stats import kendalltau\n",
    "        corr, _ = kendalltau(heuristic_attr, formal_attr)\n",
    "        \n",
    "        print(f\"   üìç Inst√¢ncia {idx}:\")\n",
    "        print(f\"      ‚Ä¢ FFA Heur√≠stico: {top_heuristic} ({heuristic_attr.max():.3f})\")\n",
    "        print(f\"      ‚Ä¢ FFA Formal:     {top_formal} ({formal_attr.max():.3f})\")\n",
    "        print(f\"      ‚Ä¢ Correla√ß√£o:     {corr:.3f}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3. Visualiza√ß√£o Comparativa FFA Heur√≠stico vs Formal"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(\"üìà VISUALIZA√á√ÉO COMPARATIVA\")\n",
    "print(\"=\" * 60)\n",
    "\n",
    "for dataset_name, data in results_5_1.items():\n",
    "    heuristic_attrs = data['heuristic_attributions']\n",
    "    formal_attrs = data['formal_attributions']\n",
    "    sample_indices = data['sample_indices']\n",
    "    feature_names = data['feature_names']\n",
    "    \n",
    "    for i, idx in enumerate(sample_indices):\n",
    "        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))\n",
    "        \n",
    "        # FFA Heur√≠stico\n",
    "        ax1.barh(feature_names, heuristic_attrs[i], color='skyblue', alpha=0.7)\n",
    "        ax1.set_title(f'FFA Heur√≠stico - {dataset_name} (Inst√¢ncia {idx})', fontweight='bold')\n",
    "        ax1.set_xlim(0, 1)\n",
    "        ax1.grid(axis='x', alpha=0.3)\n",
    "        \n",
    "        # FFA Formal\n",
    "        ax2.barh(feature_names, formal_attrs[i], color='lightcoral', alpha=0.7)\n",
    "        ax2.set_title(f'FFA Formal - {dataset_name} (Inst√¢ncia {idx})', fontweight='bold')\n",
    "        ax2.set_xlim(0, 1)\n",
    "        ax2.grid(axis='x', alpha=0.3)\n",
    "        \n",
    "        plt.tight_layout()\n",
    "        plt.show()\n",
    "        \n",
    "        # Estat√≠sticas da inst√¢ncia\n",
    "        heuristic_attr = heuristic_attrs[i]\n",
    "        formal_attr = formal_attrs[i]\n",
    "        \n",
    "        from scipy.stats import kendalltau\n",
    "        corr, _ = kendalltau(heuristic_attr, formal_attr)\n",
    "        \n",
    "        print(f\"üìä Dataset {dataset_name} - Inst√¢ncia {idx}:\")\n",
    "        print(f\"   ‚Ä¢ Correla√ß√£o FFA Heur√≠stico vs Formal: {corr:.3f}\")\n",
    "        print(f\"   ‚Ä¢ Features mais importantes:\")\n",
    "        print(f\"     - Heur√≠stico: {feature_names[np.argmax(heuristic_attr)]} ({heuristic_attr.max():.3f})\")\n",
    "        print(f\"     - Formal:     {feature_names[np.argmax(formal_attr)]} ({formal_attr.max():.3f})\")\n",
    "        print()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4. An√°lise de Consist√™ncia entre M√©todos FFA"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(\"üîç AN√ÅLISE DE CONSIST√äNCIA ENTRE M√âTODOS FFA\")\n",
    "print(\"=\" * 60)\n",
    "\n",
    "consistency_analysis = {}\n",
    "\n",
    "for dataset_name, data in results_5_1.items():\n",
    "    heuristic_attrs = data['heuristic_attributions']\n",
    "    formal_attrs = data['formal_attributions']\n",
    "    sample_indices = data['sample_indices']\n",
    "    feature_names = data['feature_names']\n",
    "    \n",
    "    correlations = []\n",
    "    top_feature_agreements = []\n",
    "    \n",
    "    for i in range(len(sample_indices)):\n",
    "        heuristic_attr = heuristic_attrs[i]\n",
    "        formal_attr = formal_attrs[i]\n",
    "        \n",
    "        # Correla√ß√£o\n",
    "        from scipy.stats import kendalltau\n",
    "        corr, _ = kendalltau(heuristic_attr, formal_attr)\n",
    "        correlations.append(corr)\n",
    "        \n",
    "        # Concord√¢ncia no top feature\n",
    "        top_heuristic = np.argmax(heuristic_attr)\n",
    "        top_formal = np.argmax(formal_attr)\n",
    "        agreement = top_heuristic == top_formal\n",
    "        top_feature_agreements.append(agreement)\n",
    "    \n",
    "    consistency_analysis[dataset_name] = {\n",
    "        'avg_correlation': np.mean(correlations),\n",
    "        'std_correlation': np.std(correlations),\n",
    "        'agreement_rate': np.mean(top_feature_agreements),\n",
    "        'n_instances': len(sample_indices)\n",
    "    }\n",
    "    \n",
    "    print(f\"\\nüìà {dataset_name.upper()}:\")\n",
    "    print(f\"   ‚Ä¢ Correla√ß√£o m√©dia: {np.mean(correlations):.3f} (¬±{np.std(correlations):.3f})\")\n",
    "    print(f\"   ‚Ä¢ Taxa de concord√¢ncia no top feature: {np.mean(top_feature_agreements):.1%}\")\n",
    "    print(f\"   ‚Ä¢ Inst√¢ncias analisadas: {len(sample_indices)}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 5. Salvando Resultados para a Se√ß√£o 5.2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(\"üíæ SALVANDO RESULTADOS\")\n",
    "print(\"=\" * 60)\n",
    "\n",
    "import pickle\n",
    "import os\n",
    "\n",
    "# Cria diret√≥rio se n√£o existir\n",
    "os.makedirs('../data/results', exist_ok=True)\n",
    "\n",
    "# Salva resultados\n",
    "with open('../data/results/section_5_1_results.pkl', 'wb') as f:\n",
    "    pickle.dump(results_5_1, f)\n",
    "\n",
    "print(\"‚úÖ Resultados da Se√ß√£o 5.1 salvos em:'../data/results/section_5_1_results.pkl'\")\n",
    "print(\"\\nüéØ PR√ìXIMO PASSO: Executar notebook 03_experimentos_5_2.ipynb\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 6. Resumo Executivo"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(\"üìã RESUMO EXECUTIVO - SE√á√ÉO 5.1\")\n",
    "print(\"=\" * 60)\n",
    "\n",
    "print(\"\\nüéØ OBJETIVOS ATINGIDOS:\")\n",
    "print(\"‚úÖ Gera√ß√£o de datasets sint√©ticos lineares e n√£o-lineares\")\n",
    "print(\"‚úÖ Treinamento de modelos XGBoost conforme especifica√ß√£o do artigo\") \n",
    "print(\"‚úÖ Implementa√ß√£o e compara√ß√£o de FFA Heur√≠stico vs Formal\")\n",
    "print(\"‚úÖ An√°lise de consist√™ncia entre m√©todos de atribui√ß√£o\")\n",
    "\n",
    "print(f\"\\nüìä RESULTADOS OBTIDOS:\")\n",
    "for dataset_name, consistency in consistency_analysis.items():\n",
    "    print(f\"   ‚Ä¢ {dataset_name}:\")\n",
    "    print(f\"     - Correla√ß√£o FFA Heur√≠stico vs Formal: {consistency['avg_correlation']:.3f}\")\n",
    "    print(f\"     - Concord√¢ncia no top feature: {consistency['agreement_rate']:.1%}\")\n",
    "\n",
    "print(f\"\\nüîÆ PR√ìXIMOS PASSOS:\")\n",
    "print(\"   ‚Ä¢ Compara√ß√£o com m√©todos de aproxima√ß√£o (LIME, SHAP) na Se√ß√£o 5.2\")\n",
    "print(\"   ‚Ä¢ An√°lise estat√≠stica das diferen√ßas entre m√©todos\")\n",
    "print(\"   ‚Ä¢ Valida√ß√£o contra resultados reportados no artigo\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "formal-feature-attribution",
   "language": "python",
   "name": "python3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}