In [None]:
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Explora√ß√£o de Dados - Formal Feature Attribution\n",
    "\n",
    "Notebook para explorar os datasets sint√©ticos usados nos experimentos."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "from sklearn.datasets import make_classification\n",
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "# Configura√ß√µes\n",
    "plt.style.use('seaborn-v0_8')\n",
    "sns.set_palette(\"husl\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Gera datasets sint√©ticos\n",
    "datasets = {\n",
    "    'linear_separable': {\n",
    "        'n_informative': 4, 'n_redundant': 1, 'class_sep': 1.5\n",
    "    },\n",
    "    'non_linear': {\n",
    "        'n_informative': 6, 'n_redundant': 0, 'class_sep': 0.8\n",
    "    }\n",
    "}\n",
    "\n",
    "fig, axes = plt.subplots(1, 2, figsize=(15, 6))\n",
    "\n",
    "for idx, (name, config) in enumerate(datasets.items()):\n",
    "    X, y = make_classification(\n",
    "        n_samples=400, n_features=6, \n",
    "        n_informative=config['n_informative'],\n",
    "        n_redundant=config['n_redundant'],\n",
    "        class_sep=config['class_sep'],\n",
    "        random_state=42\n",
    "    )\n",
    "    \n",
    "    # Plot primeiro par de features\n",
    "    scatter = axes[idx].scatter(X[:, 0], X[:, 1], c=y, cmap='viridis', alpha=0.7)\n",
    "    axes[idx].set_title(f'Dataset: {name}\\n({config[\"n_informative\"]} features informativas)')\n",
    "    axes[idx].set_xlabel('Feature 0')\n",
    "    axes[idx].set_ylabel('Feature 1')\n",
    "    plt.colorbar(scatter, ax=axes[idx])\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## An√°lise Estat√≠stica dos Datasets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for name, config in datasets.items():\n",
    "    X, y = make_classification(\n",
    "        n_samples=400, n_features=6,\n",
    "        n_informative=config['n_informative'],\n",
    "        n_redundant=config['n_redundant'], \n",
    "        class_sep=config['class_sep'],\n",
    "        random_state=42\n",
    "    )\n",
    "    \n",
    "    print(f\"\\nüìä Dataset: {name}\")\n",
    "    print(f\"   Shape: {X.shape}\")\n",
    "    print(f\"   Classes: {np.unique(y)} - Distribui√ß√£o: {np.bincount(y)}\")\n",
    "    print(f\"   Features informativas: {config['n_informative']}\")\n",
    "    print(f\"   Features redundantes: {config['n_redundant']}\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "formal-feature-attribution",
   "language": "python",
   "name": "python3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}