In [None]:
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 📊 03 - Visualization Development: Airlines Dashboard\n",
    "## Desenvolvimento de Visualizações Interativas para Dashboard\n",
    "\n",
    "**Objetivo**: Criar visualizações interativas e impactantes para o dashboard Streamlit, focando na experiência do usuário e insights acionáveis.\n",
    "\n",
    "**Autor**: [Seu Nome]  \n",
    "**Data**: $(date +\"%Y-%m-%d\")  \n",
    "**Versão**: 1.0\n",
    "\n",
    "---"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 🔧 Setup e Importações"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Importações principais\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "import plotly.express as px\n",
    "import plotly.graph_objects as go\n",
    "from plotly.subplots import make_subplots\n",
    "import plotly.figure_factory as ff\n",
    "\n",
    "# Importações para análises avançadas\n",
    "from scipy import stats\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "from sklearn.cluster import KMeans\n",
    "from sklearn.decomposition import PCA\n",
    "\n",
    "import warnings\n",
    "warnings.filterwarnings('ignore')\n",
    "\n",
    "# Configurações de visualização\n",
    "plt.style.use('seaborn-v0_8')\n",
    "pd.set_option('display.max_columns', None)\n",
    "\n",
    "# Paleta de cores personalizada para o dashboard\n",
    "DASHBOARD_COLORS = {\n",
    "    'primary': '#1f77b4',\n",
    "    'secondary': '#ff7f0e', \n",
    "    'success': '#2ca02c',\n",
    "    'danger': '#d62728',\n",
    "    'warning': '#ff7f0e',\n",
    "    'info': '#17a2b8',\n",
    "    'light': '#f8f9fa',\n",
    "    'dark': '#343a40'\n",
    "}\n",
    "\n",
    "# Paleta para airlines\n",
    "AIRLINE_COLORS = {\n",
    "    'SpiceJet': '#FF6B6B',\n",
    "    'Vistara': '#4ECDC4', \n",
    "    'AirAsia': '#45B7D1',\n",
    "    'GO_FIRST': '#96CEB4',\n",
    "    'Indigo': '#FFEAA7',\n",
    "    'Air_India': '#DDA0DD',\n",
    "    'Others': '#95A5A6'\n",
    "}\n",
    "\n",
    "print(\"🎨 Configurações de visualização carregadas com sucesso!\")\n",
    "print(f\"🎨 Paleta principal: {list(DASHBOARD_COLORS.keys())}\")\n",
    "print(f\"✈️ Paleta airlines: {list(AIRLINE_COLORS.keys())}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 📥 Carregamento e Preparação dos Dados"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Carregar dados enriquecidos do notebook anterior\n",
    "df = pd.read_csv('../data/processed/flights_with_features.csv')\n",
    "\n",
    "# Carregar resultados estatísticos\n",
    "import json\n",
    "with open('../data/processed/statistical_analysis_results.json', 'r') as f:\n",
    "    stats_results = json.load(f)\n",
    "\n",
    "print(f\"✅ Dataset carregado: {df.shape[0]} linhas × {df.shape[1]} colunas\")\n",
    "print(f\"📊 Features adicionais disponíveis: price_category, duration_category, efficiency_score, etc.\")\n",
    "print(f\"🔬 Resultados estatísticos carregados: {len(stats_results)} categorias de análise\")\n",
    "\n",
    "# Verificar novas features\n",
    "new_features = ['price_category', 'duration_category', 'efficiency_score', 'is_direct', 'is_premium_time']\n",
    "print(f\"\\n🔍 Verificação das novas features:\")\n",
    "for feature in new_features:\n",
    "    if feature in df.columns:\n",
    "        print(f\"   ✅ {feature}: {df[feature].dtype}\")\n",
    "        if df[feature].dtype == 'object':\n",
    "            print(f\"      Valores: {df[feature].unique()}\")\n",
    "    else:\n",
    "        print(f\"   ❌ {feature}: Não encontrada\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 📊 1. Visualizações de Overview Executivo"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Função para criar cards de métricas\n",
    "def create_metrics_cards():\n",
    "    \"\"\"Criar dados para cards de métricas principais\"\"\"\n",
    "    \n",
    "    metrics = {\n",
    "        'total_flights': {\n",
    "            'value': len(df),\n",
    "            'label': 'Total de Voos',\n",
    "            'icon': '✈️',\n",
    "            'color': DASHBOARD_COLORS['primary']\n",
    "        },\n",
    "        'avg_price': {\n",
    "            'value': f\"₹{df['price'].mean():,.0f}\",\n",
    "            'label': 'Preço Médio',\n",
    "            'icon': '💰',\n",
    "            'color': DASHBOARD_COLORS['success'],\n",
    "            'delta': f\"{((df['price'].mean() - df['price'].median()) / df['price'].median() * 100):+.1f}%\"\n",
    "        },\n",
    "        'airlines_count': {\n",
    "            'value': df['airline'].nunique(),\n",
    "            'label': 'Companhias',\n",
    "            'icon': '🏢',\n",
    "            'color': DASHBOARD_COLORS['info']\n",
    "        },\n",
    "        'direct_flights_pct': {\n",
    "            'value': f\"{(df['is_direct'].mean() * 100):.1f}%\",\n",
    "            'label': 'Voos Diretos',\n",
    "            'icon': '🎯',\n",
    "            'color': DASHBOARD_COLORS['warning']\n",
    "        },\n",
    "        'avg_duration': {\n",
    "            'value': f\"{df['duration'].mean():.1f}h\",\n",
    "            'label': 'Duração Média',\n",
    "            'icon': '⏱️',\n",
    "            'color': DASHBOARD_COLORS['secondary']\n",
    "        },\n",
    "        'price_range': {\n",
    "            'value': f\"₹{df['price'].max() - df['price'].min():,.0f}\",\n",
    "            'label': 'Amplitude de Preços',\n",
    "            'icon': '📊',\n",
    "            'color': DASHBOARD_COLORS['danger']\n",
    "        }\n",
    "    }\n",
    "    \n",
    "    return metrics\n",
    "\n",
    "# Gerar métricas\n",
    "dashboard_metrics = create_metrics_cards()\n",
    "\n",
    "print(\"📈 MÉTRICAS PRINCIPAIS DO DASHBOARD:\")\n",
    "print(\"=\" * 50)\n",
    "for key, metric in dashboard_metrics.items():\n",
    "    delta_info = f\" ({metric.get('delta', 'N/A')})\" if 'delta' in metric else \"\"\n",
    "    print(f\"{metric['icon']} {metric['label']}: {metric['value']}{delta_info}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Gráfico 1: Distribuição de Preços com Densidade\n",
    "def create_price_distribution_chart():\n",
    "    \"\"\"Criar gráfico de distribuição de preços com curva de densidade\"\"\"\n",
    "    \n",
    "    fig = make_subplots(\n",
    "        rows=2, cols=1,\n",
    "        subplot_titles=['Distribuição de Preços', 'Distribuição por Categoria'],\n",
    "        vertical_spacing=0.15,\n",
    "        specs=[[{\"secondary_y\": True}], [{\"type\": \"bar\"}]]\n",
    "    )\n",
    "    \n",
    "    # Histograma principal\n",
    "    fig.add_trace(\n",
    "        go.Histogram(\n",
    "            x=df['price'],\n",
    "            nbinsx=30,\n",
    "            name='Frequência',\n",
    "            opacity=0.7,\n",
    "            marker_color=DASHBOARD_COLORS['primary']\n",
    "        ),\n",
    "        row=1, col=1\n",
    "    )\n",
    "    \n",
    "    # Linha de média e mediana\n",
    "    mean_price = df['price'].mean()\n",
    "    median_price = df['price'].median()\n",
    "    \n",
    "    fig.add_vline(\n",
    "        x=mean_price, \n",
    "        line_dash=\"dash\", \n",
    "        line_color=DASHBOARD_COLORS['danger'],\n",
    "        annotation_text=f\"Média: ₹{mean_price:,.0f}\",\n",
    "        row=1, col=1\n",
    "    )\n",
    "    \n",
    "    fig.add_vline(\n",
    "        x=median_price, \n",
    "        line_dash=\"dot\", \n",
    "        line_color=DASHBOARD_COLORS['success'],\n",
    "        annotation_text=f\"Mediana: ₹{median_price:,.0f}\",\n",
    "        row=1, col=1\n",
    "    )\n",
    "    \n",
    "    # Gráfico de barras por categoria\n",
    "    price_cat_counts = df['price_category'].value_counts()\n",
    "    \n",
    "    fig.add_trace(\n",
    "        go.Bar(\n",
    "            x=price_cat_counts.index,\n",
    "            y=price_cat_counts.values,\n",
    "            name='Voos por Categoria',\n",
    "            marker_color=[DASHBOARD_COLORS['success'], DASHBOARD_COLORS['warning'], DASHBOARD_COLORS['danger']],\n",
    "            text=price_cat_counts.values,\n",
    "            textposition='auto'\n",
    "        ),\n",
    "        row=2, col=1\n",
    "    )\n",
    "    \n",
    "    # Layout\n",
    "    fig.update_layout(\n",
    "        height=700,\n",
    "        title_text=\"📊 Análise de Distribuição de Preços\",\n",
    "        title_x=0.5,\n",
    "        showlegend=True,\n",
    "        template=\"plotly_white\"\n",
    "    )\n",
    "    \n",
    "    fig.update_xaxes(title_text=\"Preço (₹)\", row=1, col=1)\n",
    "    fig.update_yaxes(title_text=\"Frequência\", row=1, col=1)\n",
    "    fig.update_xaxes(title_text=\"Categoria de Preço\", row=2, col=1)\n",
    "    fig.update_yaxes(title_text=\"Número de Voos\", row=2, col=1)\n",
    "    \n",
    "    return fig\n",
    "\n",
    "# Criar e exibir o gráfico\n",
    "price_distribution_fig = create_price_distribution_chart()\n",
    "price_distribution_fig.show()\n",
    "\n",
    "print(\"✅ Gráfico de distribuição de preços criado\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Gráfico 2: Heatmap de Correlação Interativo\n",
    "def create_correlation_heatmap():\n",
    "    \"\"\"Criar heatmap interativo de correlações\"\"\"\n",
    "    \n",
    "    # Selecionar variáveis numéricas relevantes\n",
    "    numeric_cols = ['price', 'duration', 'efficiency_score', 'is_direct', 'is_premium_time']\n",
    "    corr_matrix = df[numeric_cols].corr()\n",
    "    \n",
    "    # Criar heatmap\n",
    "    fig = go.Figure(data=go.Heatmap(\n",
    "        z=corr_matrix.values,\n",
    "        x=corr_matrix.columns,\n",
    "        y=corr_matrix.columns,\n",
    "        colorscale='RdBu',\n",
    "        zmid=0,\n",
    "        text=corr_matrix.round(3).values,\n",
    "        texttemplate='%{text}',\n",
    "        textfont={\"size\": 12},\n",
    "        hoverongaps=False,\n",
    "        hovertemplate='<b>%{y} vs %{x}</b><br>Correlação: %{z:.3f}<extra></extra>'\n",
    "    ))\n",
    "    \n",
    "    fig.update_layout(\n",
    "        title={\n",
    "            'text': '🔗 Matriz de Correlação - Variáveis Chave',\n",
    "            'x': 0.5,\n",
    "            'font': {'size': 16}\n",
    "        },\n",
    "        height=500,\n",
    "        template=\"plotly_white\"\n",
    "    )\n",
    "    \n",
    "    return fig\n",
    "\n",
    "# Criar e exibir heatmap\n",
    "correlation_heatmap = create_correlation_heatmap()\n",
    "correlation_heatmap.show()\n",
    "\n",
    "print(\"✅ Heatmap de correlação criado\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 🏢 2. Visualizações de Análise por Airlines"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Gráfico 3: Comparação Multidimensional das Airlines\n",
    "def create_airline_comparison_dashboard():\n",
    "    \"\"\"Criar dashboard comparativo das airlines\"\"\"\n",
    "    \n",
    "    # Preparar dados agregados por airline\n",
    "    airline_stats = df.groupby('airline').agg({\n",
    "        'price': ['mean', 'std', 'count'],\n",
    "        'duration': 'mean',\n",
    "        'efficiency_score': 'mean',\n",
    "        'is_direct': 'sum'\n",
    "    }).round(2)\n",
    "    \n",
    "    airline_stats.columns = ['Price_Mean', 'Price_Std', 'Flight_Count', 'Duration_Mean', 'Efficiency_Mean', 'Direct_Flights']\n",
    "    airline_stats = airline_stats.reset_index()\n",
    "    \n",
    "    # Calcular market share\n",
    "    airline_stats['Market_Share'] = (airline_stats['Flight_Count'] / airline_stats['Flight_Count'].sum() * 100).round(1)\n",
    "    \n",
    "    # Subplots\n",
    "    fig = make_subplots(\n",
    "        rows=2, cols=2,\n",
    "        subplot_titles=[\n",
    "            '💰 Preço Médio por Airline',\n",
    "            '📊 Market Share (%)', \n",
    "            '⚡ Eficiência (₹/hora)',\n",
    "            '🎯 Voos Diretos Oferecidos'\n",
    "        ],\n",
    "        specs=[\n",
    "            [{\"type\": \"bar\"}, {\"type\": \"pie\"}],\n",
    "            [{\"type\": \"scatter\"}, {\"type\": \"bar\"}]\n",
    "        ]\n",
    "    )\n",
    "    \n",
    "    # 1. Preço médio por airline\n",
    "    airline_stats_sorted = airline_stats.sort_values('Price_Mean', ascending=False)\n",
    "    \n",
    "    colors_list = [AIRLINE_COLORS.get(airline, AIRLINE_COLORS['Others']) for airline in airline_stats_sorted['airline']]\n",
    "    \n",
    "    fig.add_trace(\n",
    "        go.Bar(\n",
    "            x=airline_stats_sorted['airline'],\n",
    "            y=airline_stats_sorted['Price_Mean'],\n",
    "            name='Preço Médio',\n",
    "            marker_color=colors_list,\n",
    "            text=[f'₹{x:,.0f}' for x in airline_stats_sorted['Price_Mean']],\n",
    "            textposition='auto',\n",
    "            error_y=dict(\n",
    "                type='data',\n",
    "                array=airline_stats_sorted['Price_Std'],\n",
    "                visible=True\n",
    "            )\n",
    "        ),\n",
    "        row=1, col=1\n",
    "    )\n",
    "    \n",
    "    # 2. Market share\n",
    "    fig.add_trace(\n",
    "        go.Pie(\n",
    "            labels=airline_stats['airline'],\n",
    "            values=airline_stats['Market_Share'],\n",
    "            name=\"Market Share\",\n",
    "            marker_colors=[AIRLINE_COLORS.get(airline, AIRLINE_COLORS['Others']) for airline in airline_stats['airline']],\n",
    "            textinfo='label+percent',\n",
    "            hovertemplate='<b>%{label}</b><br>Participação: %{value}%<br>Voos: %{customdata}<extra></extra>',\n",
    "            customdata=airline_stats['Flight_Count']\n",
    "        ),\n",
    "        row=1, col=2\n",
    "    )\n",
    "    \n",
    "    # 3. Scatter: Eficiência vs Duração\n",
    "    fig.add_trace(\n",
    "        go.Scatter(\n",
    "            x=airline_stats['Duration_Mean'],\n",
    "            y=airline_stats['Efficiency_Mean'],\n",
    "            mode='markers+text',\n",
    "            name='Eficiência',\n",
    "            text=airline_stats['airline'],\n",
    "            textposition='top center',\n",
    "            marker=dict(\n",
    "                size=airline_stats['Flight_Count'] * 2,\n",
    "                color=[AIRLINE_COLORS.get(airline, AIRLINE_COLORS['Others']) for airline in airline_stats['airline']],\n",
    "                opacity=0.7,\n",
    "                line=dict(width=2, color='white')\n",
    "            ),\n",
    "            hovertemplate='<b>%{text}</b><br>Duração Média: %{x:.1f}h<br>Eficiência: ₹%{y:.0f}/h<br>Voos: %{marker.size}<extra></extra>'\n",
    "        ),\n",
    "        row=2, col=1\n",
    "    )\n",
    "    \n",
    "    # 4. Voos diretos por airline\n",
    "    fig.add_trace(\n",
    "        go.Bar(\n",
    "            x=airline_stats['airline'],\n",
    "            y=airline_stats['Direct_Flights'],\n",
    "            name='Voos Diretos',\n",
    "            marker_color=[AIRLINE_COLORS.get(airline, AIRLINE_COLORS['Others']) for airline in airline_stats['airline']],\n",
    "            text=airline_stats['Direct_Flights'],\n",
    "            textposition='auto'\n",
    "        ),\n",
    "        row=2, col=2\n",
    "    )\n",
    "    \n",
    "    # Layout geral\n",
    "    fig.update_layout(\n",
    "        height=800,\n",
    "        title_text=\"🏢 Dashboard Comparativo - Airlines\",\n",
    "        title_x=0.5,\n",
    "        showlegend=False,\n",
    "        template=\"plotly_white\"\n",
    "    )\n",
    "    \n",
    "    # Atualizar eixos\n",
    "    fig.update_xaxes(title_text=\"Airline\", row=1, col=1, tickangle=-45)\n",
    "    fig.update_yaxes(title_text=\"Preço Médio (₹)\", row=1, col=1)\n",
    "    \n",
    "    fig.update_xaxes(title_text=\"Duração Média (h)\", row=2, col=1)\n",
    "    fig.update_yaxes(title_text=\"Eficiência (₹/h)\", row=2, col=1)\n",
    "    \n",
    "    fig.update_xaxes(title_text=\"Airline\", row=2, col=2, tickangle=-45)\n",
    "    fig.update_yaxes(title_text=\"Número de Voos Diretos\", row=2, col=2)\n",
    "    \n",
    "    return fig, airline_stats\n",
    "\n",
    "# Criar dashboard de airlines\n",
    "airline_dashboard, airline_data = create_airline_comparison_dashboard()\n",
    "airline_dashboard.show()\n",
    "\n",
    "# Mostrar dados resumidos\n",
    "print(\"📊 RESUMO DAS AIRLINES:\")\n",
    "print(airline_data.round(1))\n",
    "\n",
    "print(\"\\n✅ Dashboard comparativo de airlines criado\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Gráfico 4: Box Plot Interativo com Violin Plot\n",
    "def create_advanced_price_distribution():\n",
    "    \"\"\"Criar visualização avançada de distribuição de preços por airline\"\"\"\n",
    "    \n",
    "    fig = go.Figure()\n",
    "    \n",
    "    # Adicionar violin plot para cada airline\n",
    "    airlines = df['airline'].unique()\n",
    "    \n",
    "    for airline in airlines:\n",
    "        airline_data = df[df['airline'] == airline]['price']\n",
    "        \n",
    "        fig.add_trace(go.Violin(\n",
    "            y=airline_data,\n",
    "            name=airline,\n",
    "            box_visible=True,\n",
    "            meanline_visible=True,\n",
    "            fillcolor=AIRLINE_COLORS.get(airline, AIRLINE_COLORS['Others']),\n",
    "            opacity=0.6,\n",
    "            x0=airline\n",
    "        ))\n",
    "    \n",
    "    # Adicionar linha de preço médio geral\n",
    "print("✅ Violin plot de distribuição de preços criado")
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## ⏰ 3. Visualizações de Análise Temporal"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Gráfico 5: Heatmap Temporal - Preços por Horário e Airline\n",
    "def create_temporal_heatmap():\n",
    "    \"\"\"Criar heatmap de preços por horário de partida e airline\"\"\"\n",
    "    \n",
    "    # Criar tabela pivô\n",
    "    pivot_data = df.pivot_table(\n",
    "        values='price',\n",
    "        index='airline',\n",
    "        columns='departure_time',\n",
    "        aggfunc='mean'\n",
    "    ).round(0)\n",
    "    \n",
    "    # Ordenar colunas de forma lógica\n",
    "    time_order = ['Early_Morning', 'Morning', 'Afternoon', 'Evening', 'Night']\n",
    "    available_times = [t for t in time_order if t in pivot_data.columns]\n",
    "    pivot_data = pivot_data[available_times]\n",
    "    \n",
    "    # Criar heatmap\n",
    "    fig = go.Figure(data=go.Heatmap(\n",
    "        z=pivot_data.values,\n",
    "        x=pivot_data.columns,\n",
    "        y=pivot_data.index,\n",
    "        colorscale='Viridis',\n",
    "        text=pivot_data.values.astype(int),\n",
    "        texttemplate='₹%{text}',\n",
    "        textfont={\"size\": 10},\n",
    "        hoverongaps=False,\n",
    "        hovertemplate='<b>%{y}</b><br>Horário: %{x}<br>Preço Médio: ₹%{z:.0f}<extra></extra>',\n",
    "        colorbar=dict(\n",
    "            title=\"Preço Médio (₹)\",\n",
    "            titleside=\"right\"\n",
    "        )\n",
    "    ))\n",
    "    \n",
    "    fig.update_layout(\n",
    "        title={\n",
    "            'text': '🕐 Heatmap Temporal - Preços por Horário e Airline',\n",
    "            'x': 0.5\n",
    "        },\n",
    "        xaxis_title=\"Horário de Partida\",\n",
    "        yaxis_title=\"Companhia Aérea\",\n",
    "        height=500,\n",
    "        template=\"plotly_white\"\n",
    "    )\n",
    "    \n",
    "    return fig, pivot_data\n",
    "\n",
    "# Criar heatmap temporal\n",
    "temporal_heatmap, temporal_data = create_temporal_heatmap()\n",
    "temporal_heatmap.show()\n",
    "\n",
    "print(\"📊 DADOS DO HEATMAP TEMPORAL:\")\n",
    "print(temporal_data)\n",
    "print(\"\\n✅ Heatmap temporal criado\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Gráfico 6: Análise de Padrões Temporais\n",
    "def create_temporal_patterns_analysis():\n",
    "    \"\"\"Criar análise detalhada dos padrões temporais\"\"\"\n",
    "    \n",
    "    # Preparar dados por horário\n",
    "    time_stats = df.groupby('departure_time').agg({\n",
    "        'price': ['mean', 'std', 'count'],\n",
    "        'duration': 'mean',\n",
    "        'is_direct': ['sum', 'mean']\n",
    "    }).round(2)\n",
    "    \n",
    "    time_stats.columns = ['Price_Mean', 'Price_Std', 'Flight_Count', 'Duration_Mean', 'Direct_Count', 'Direct_Pct']\n",
    "    time_stats = time_stats.reset_index()\n",
    "    time_stats['Direct_Pct'] = (time_stats['Direct_Pct'] * 100).round(1)\n",
    "    \n",
    "    # Ordenar por horários\n",
    "    time_order = ['Early_Morning', 'Morning', 'Afternoon', 'Evening', 'Night']\n",
    "    time_stats['time_order'] = time_stats['departure_time'].map({t: i for i, t in enumerate(time_order)})\n",
    "    time_stats = time_stats.sort_values('time_order')\n",
    "    \n",
    "    # Criar subplots\n",
    "    fig = make_subplots(\n",
    "        rows=2, cols=2,\n",
    "        subplot_titles=[\n",
    "            '💰 Preço Médio por Horário',\n",
    "            '📊 Volume de Voos',\n",
    "            '⏱️ Duração Média',\n",
    "            '🎯 Percentual de Voos Diretos'\n",
    "        ],\n",
    "        specs=[[{\"secondary_y\": True}, {\"type\": \"bar\"}],\n",
    "               [{\"type\": \"bar\"}, {\"type\": \"bar\"}]]\n",
    "    )\n",
    "    \n",
    "    # 1. Preço médio com barras de erro\n",
    "    fig.add_trace(\n",
    "        go.Scatter(\n",
    "            x=time_stats['departure_time'],\n",
    "            y=time_stats['Price_Mean'],\n",
    "            mode='lines+markers',\n",
    "            name='Preço Médio',\n",
    "            line=dict(color=DASHBOARD_COLORS['primary'], width=3),\n",
    "            marker=dict(size=8),\n",
    "            error_y=dict(\n",
    "                type='data',\n",
    "                array=time_stats['Price_Std'],\n",
    "                visible=True\n",
    "            )\n",
    "        ),\n",
    "        row=1, col=1\n",
    "    )\n",
    "    \n",
    "    # Adicionar tendência no eixo secundário\n",
    "    fig.add_trace(\n",
    "        go.Bar(\n",
    "            x=time_stats['departure_time'],\n",
    "            y=time_stats['Flight_Count'],\n",
    "            name='Volume',\n",
    "            opacity=0.3,\n",
    "            marker_color=DASHBOARD_COLORS['secondary'],\n",
    "            yaxis='y2'\n",
    "        ),\n",
    "        row=1, col=1, secondary_y=True\n",
    "    )\n",
    "    \n",
    "    # 2. Volume de voos\n",
    "    fig.add_trace(\n",
    "        go.Bar(\n",
    "            x=time_stats['departure_time'],\n",
    "            y=time_stats['Flight_Count'],\n",
    "            name='Volume de Voos',\n",
    "            marker_color=DASHBOARD_COLORS['info'],\n",
    "            text=time_stats['Flight_Count'],\n",
    "            textposition='auto'\n",
    "        ),\n",
    "        row=1, col=2\n",
    "    )\n",
    "    \n",
    "    # 3. Duração média\n",
    "    fig.add_trace(\n",
    "        go.Bar(\n",
    "            x=time_stats['departure_time'],\n",
    "            y=time_stats['Duration_Mean'],\n",
    "            name='Duração Média',\n",
    "            marker_color=DASHBOARD_COLORS['warning'],\n",
    "            text=[f'{x:.1f}h' for x in time_stats['Duration_Mean']],\n",
    "            textposition='auto'\n",
    "        ),\n",
    "        row=2, col=1\n",
    "    )\n",
    "    \n",
    "    # 4. Percentual de voos diretos\n",
    "    fig.add_trace(\n",
    "        go.Bar(\n",
    "            x=time_stats['departure_time'],\n",
    "            y=time_stats['Direct_Pct'],\n",
    "            name='% Voos Diretos',\n",
    "            marker_color=DASHBOARD_COLORS['success'],\n",
    "            text=[f'{x:.1f}%' for x in time_stats['Direct_Pct']],\n",
    "            textposition='auto'\n",
    "        ),\n",
    "        row=2, col=2\n",
    "    )\n",
    "    \n",
    "    # Layout\n",
    "    fig.update_layout(\n",
    "        height=800,\n",
    "        title_text=\"⏰ Análise Completa de Padrões Temporais\",\n",
    "        title_x=0.5,\n",
    "        showlegend=False,\n",
    "        template=\"plotly_white\"\n",
    "    )\n",
    "    \n",
    "    # Atualizar eixos\n",
    "    fig.update_xaxes(tickangle=-45)\n",
    "    fig.update_yaxes(title_text=\"Preço Médio (₹)\", row=1, col=1)\n",
    "    fig.update_yaxes(title_text=\"Volume\", row=1, col=1, secondary_y=True)\n",
    "    fig.update_yaxes(title_text=\"Número de Voos\", row=1, col=2)\n",
    "    fig.update_yaxes(title_text=\"Duração (horas)\", row=2, col=1)\n",
    "    fig.update_yaxes(title_text=\"Percentual (%)\", row=2, col=2)\n",
    "    \n",
    "    return fig, time_stats\n",
    "\n",
    "# Criar análise temporal\n",
    "temporal_patterns, time_data = create_temporal_patterns_analysis()\n",
    "temporal_patterns.show()\n",
    "\n",
    "print(\"📊 DADOS TEMPORAIS DETALHADOS:\")\n",
    "print(time_data[['departure_time', 'Price_Mean', 'Flight_Count', 'Duration_Mean', 'Direct_Pct']])\n",
    "print(\"\\n✅ Análise de padrões temporais criada\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 🎯 4. Visualizações de Segmentação de Mercado"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Gráfico 7: Análise de Clustering (Segmentação)\n",
    "def create_market_segmentation_analysis():\n",
    "    \"\"\"Criar análise de segmentação de mercado usando clustering\"\"\"\n",
    "    \n",
    "    # Preparar dados para clustering\n",
    "    features_for_clustering = ['price', 'duration', 'efficiency_score']\n",
    "    X = df[features_for_clustering].copy()\n",
    "    \n",
    "    # Normalizar dados\n",
    "    scaler = StandardScaler()\n",
    "    X_scaled = scaler.fit_transform(X)\n",
    "    \n",
    "    # Aplicar K-means\n",
    "    kmeans = KMeans(n_clusters=4, random_state=42, n_init=10)\n",
    "    clusters = kmeans.fit_predict(X_scaled)\n",
    "    \n",
    "    # Adicionar clusters ao dataframe\n",
    "    df_clustered = df.copy()\n",
    "    df_clustered['cluster'] = clusters\n",
    "    \n",
    "    # Nomear clusters baseado nas características\n",
    "    cluster_stats = df_clustered.groupby('cluster')[features_for_clustering].mean()\n",
    "    \n",
    "    cluster_names = {}\n",
    "    for i, row in cluster_stats.iterrows():\n",
    "        if row['price'] > df['price'].quantile(0.75):\n",
    "            if row['duration'] < df['duration'].median():\n",
    "                cluster_names[i] = 'Premium Express'\n",
    "            else:\n",
    "                cluster_names[i] = 'Premium Standard'\n",
    "        elif row['price'] < df['price'].quantile(0.25):\n",
    "            cluster_names[i] = 'Budget'\n",
    "        else:\n",
    "            cluster_names[i] = 'Mid-Range'\n",
    "    \n",
    "    df_clustered['cluster_name'] = df_clustered['cluster'].map(cluster_names)\n",
    "    \n",
    "    # Criar visualização 3D\n",
    "    fig = go.Figure()\n",
    "    \n",
    "    cluster_colors = [DASHBOARD_COLORS['success'], DASHBOARD_COLORS['info'], \n",
    "                     DASHBOARD_COLORS['warning'], DASHBOARD_COLORS['danger']]\n",
    "    \n",
    "    for i, cluster_name in cluster_names.items():\n",
    "        cluster_data = df_clustered[df_clustered['cluster'] == i]\n",
    "        \n",
    "        fig.add_trace(go.Scatter3d(\n",
    "            x=cluster_data['price'],\n",
    "            y=cluster_data['duration'],\n",
    "            z=cluster_data['efficiency_score'],\n",
    "            mode='markers',\n",
    "            name=cluster_name,\n",
    "            marker=dict(\n",
    "                size=5,\n",
    "                color=cluster_colors[i],\n",
    "                opacity=0.7,\n",
    "                line=dict(width=0.5, color='white')\n",
    "            ),\n",
    "            hovertemplate='<b>%{text}</b><br>' +\n",
    "                         'Preço: ₹%{x:.0f}<br>' +\n",
    "                         'Duração: %{y:.1f}h<br>' +\n",
    "                         'Eficiência: ₹%{z:.0f}/h<br>' +\n",
    "                         '<extra></extra>',\n",
    "            text=[f\"{row['airline']} {row['flight']}\" for _, row in cluster_data.iterrows()]\n",
    "        ))\n",
    "    \n",
    "    # Layout 3D\n",
    "    fig.update_layout(\n",
    "        title={\n",
    "            'text': '🎯 Segmentação de Mercado - Análise de Clusters 3D',\n",
    "            'x': 0.5\n",
    "        },\n",
    "        scene=dict(\n",
    "            xaxis_title='Preço (₹)',\n",
    "            yaxis_title='Duração (horas)',\n",
    "            zaxis_title='Eficiência (₹/hora)'\n",
    "        ),\n",
    "        height=700,\n",
    "        template=\"plotly_white\"\n",
    "    )\n",
    "    \n",
    "    return fig, df_clustered, cluster_stats\n",
    "\n",
    "# Criar análise de clustering\n",
    "clustering_fig, df_with_clusters, cluster_summary = create_market_segmentation_analysis()\n",
    "clustering_fig.show()\n",
    "\n",
    "print(\"📊 RESUMO DOS CLUSTERS (SEGMENTOS DE MERCADO):\")\n",
    "print(cluster_summary.round(1))\n",
    "\n",
    "# Distribuição dos clusters\n",
    "cluster_distribution = df_with_clusters['cluster_name'].value_counts()\n",
    "print(f\"\\n🎯 DISTRIBUIÇÃO DOS SEGMENTOS:\")\n",
    "for segment, count in cluster_distribution.items():\n",
    "    pct = count / len(df_with_clusters) * 100\n",
    "    print(f\"   {segment}: {count} voos ({pct:.1f}%)\")\n",
    "\n",
    "print(\"\\n✅ Análise de segmentação de mercado criada\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Gráfico 8: Matriz de Comparação de Segmentos\n",
    "def create_segment_comparison_matrix():\n",
    "    \"\"\"Criar matriz de comparação entre segmentos\"\"\"\n",
    "    \n",
    "    # Análise por categoria de preço e duração\n",
    "    segment_analysis = df.groupby(['price_category', 'duration_category']).agg({\n",
    "        'price': ['count', 'mean'],\n",
    "        'efficiency_score': 'mean',\n",
    "        'is_direct': 'mean'\n",
    "    }).round(2)\n",
    "    \n",
    "    segment_analysis.columns = ['Flight_Count', 'Avg_Price', 'Avg_Efficiency', 'Direct_Rate']\n",
    "    segment_analysis = segment_analysis.reset_index()\n",
    "    \n",
    "    # Criar heatmap de contagem\n",
    "    count_matrix = df.pivot_table(\n",
    "        values='price',\n",
    "        index='price_category',\n",
    "        columns='duration_category',\n",
    "        aggfunc='count',\n",
    "        fill_value=0\n",
    "    )\n",
    "    \n",
    "    # Subplots para diferentes métricas\n",
    "    fig = make_subplots(\n",
    "        rows=2, cols=2,\n",
    "        subplot_titles=[\n",
    "            '📊 Volume de Voos por Segmento',\n",
    "            '💰 Preço Médio por Segmento',\n",
    "            '⚡ Eficiência por Segmento',\n",
    "            '🎯 Taxa de Voos Diretos por Segmento'\n",
    "        ],\n",
    "        specs=[[{\"type\": \"heatmap\"}, {\"type\": \"heatmap\"}],\n",
    "               [{\"type\": \"heatmap\"}, {\"type\": \"heatmap\"}]]\n",
    "    )\n",
    "    \n",
    "    # 1. Volume de voos\n",
    "    fig.add_trace(\n",
    "        go.Heatmap(\n",
    "            z=count_matrix.values,\n",
    "            x=count_matrix.columns,\n",
    "            y=count_matrix.index,\n",
    "            colorscale='Blues',\n",
    "            text=count_matrix.values,\n",
    "            texttemplate='%{text}',\n",
    "            textfont={\"size\": 12},\n",
    "            showscale=False\n",
    "        ),\n",
    "        row=1, col=1\n",
    "    )\n",
    "    \n",
    "    # 2. Preço médio\n",
    "    price_matrix = df.pivot_table(\n",
    "        values='price',\n",
    "        index='price_category',\n",
    "        columns='duration_category',\n",
    "        aggfunc='mean',\n",
    "        fill_value=0\n",
    "    )\n",
    "    \n",
    "    fig.add_trace(\n",
    "        go.Heatmap(\n",
    "            z=price_matrix.values,\n",
    "            x=price_matrix.columns,\n",
    "            y=price_matrix.index,\n",
    "            colorscale='Reds',\n",
    "            text=price_matrix.round(0).values.astype(int),\n",
    "            texttemplate='₹%{text}',\n",
    "            textfont={\"size\": 10},\n",
    "            showscale=False\n",
    "        ),\n",
    "        row=1, col=2\n",
    "    )\n",
    "    \n",
    "    # 3. Eficiência\n",
    "    efficiency_matrix = df.pivot_table(\n",
    "        values='efficiency_score',\n",
    "        index='price_category',\n",
    "        columns='duration_category',\n",
    "        aggfunc='mean',\n",
    "        fill_value=0\n",
    "    )\n",
    "    \n",
    "    fig.add_trace(\n",
    "        go.Heatmap(\n",
    "            z=efficiency_matrix.values,\n",
    "            x=efficiency_matrix.columns,\n",
    "            y=efficiency_matrix.index,\n",
    "            colorscale='Greens',\n",
    "            text=efficiency_matrix.round(0).values.astype(int),\n",
    "            texttemplate='₹%{text}/h',\n",
    "            textfont={\"size\": 10},\n",
    "            showscale=False\n",
    "        ),\n",
    "        row=2, col=1\n",
    "    )\n",
    "    \n",
    "    # 4. Taxa de voos diretos\n",
    "    direct_matrix = df.pivot_table(\n",
    "        values='is_direct',\n",
    "        index='price_category',\n",
    "        columns='duration_category',\n",
    "        aggfunc='mean',\n",
    "        fill_value=0\n",
    "    )\n",
    "    \n",
    "    fig.add_trace(\n",
    "        go.Heatmap(\n",
    "            z=direct_matrix.values * 100,\n",
    "            x=direct_matrix.columns,\n",
    "            y=direct_matrix.index,\n",
    "            colorscale='Purples',\n",
    "            text=(direct_matrix * 100).round(1).values,\n",
    "            texttemplate='%{text}%',\n",
    "            textfont={\"size\": 10},\n",
    "            showscale=False\n",
    "        ),\n",
    "        row=2, col=2\n",
    "    )\n",
    "    \n",
    "    # Layout\n",
    "    fig.update_layout(\n",
    "        height=800,\n",
    "        title_text=\"🎯 Matriz de Análise de Segmentos de Mercado\",\n",
    "        title_x=0.5,\n",
    "        template=\"plotly_white\"\n",
    "    )\n",
    "    \n",
    "    # Labels dos eixos\n",
    "    for i in range(1, 3):\n",
    "        for j in range(1, 3):\n",
    "            fig.update_xaxes(title_text=\"Categoria de Duração\", row=i, col=j)\n",
    "            fig.update_yaxes(title_text=\"Categoria de Preço\", row=i, col=j)\n",
    "    \n",
    "    return fig, segment_analysis\n",
    "\n",
    "# Criar matriz de segmentos\n",
    "segment_matrix, segment_data = create_segment_comparison_matrix()\n",
    "segment_matrix.show()\n",
    "\n",
    "print(\"📊 DADOS DETALHADOS DOS SEGMENTOS:\")\n",
    "print(segment_data)\n",
    "print(\"\\n✅ Matriz de comparação de segmentos criada\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 💡 5. Visualizações para Recomendações"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Gráfico 9: Sistema de Recomendação Visual\n",
    "def create_recommendation_system():\n",
    "    \"\"\"Criar sistema visual de recomendações\"\"\"\n",
    "    \n",
    "    # Calcular scores de recomendação\n",
    "    df_rec = df.copy()\n",
    "    \n",
    "    # Score baseado em múltiplos fatores (0-100)\n",
    "    # Fator preço (invertido - menor preço = melhor score)\n",
    "    price_score = (1 - (df_rec['price'] - df_rec['price'].min()) / (df_rec['price'].max() - df_rec['price'].min())) * 30\n",
    "    \n",
    "    # Fator duração (invertido - menor duração = melhor score)\n",
    "    duration_score = (1 - (df_rec['duration'] - df_rec['duration'].min()) / (df_rec['duration'].max() - df_rec['duration'].min())) * 25\n",
    "    \n",
    "    # Fator voo direto\n",
    "    direct_score = df_rec['is_direct'] * 25\n",
    "    \n",
    "    # Fator horário premium (invertido para economia)\n",
    "    time_score = (1 - df_rec['is_premium_time']) * 20\n",
    "    \n",
    "    # Score total\n",
    "    df_rec['recommendation_score'] = (price_score + duration_score + direct_score + time_score).round(1)\n",
    "    \n",
    "    # Categorizar recomendações\n",
    "    df_rec['recommendation_category'] = pd.cut(\n",
    "        df_rec['recommendation_score'],\n",
    "        bins=[0, 40, 70, 100],\n",
    "        labels=['Basic', 'Good', 'Excellent']\n",
    "    )\n",
    "    \n",
    "    # Top 10 recomendações\n",
    "    top_recommendations = df_rec.nlargest(10, 'recommendation_score')\n",
    "    \n",
    "    # Criar visualização\n",
    "    fig = make_subplots(\n",
    "        rows=2, cols=2,\n",
    "        subplot_titles=[\n",
    "            '🌟 Top 10 Melhores Opções',\n",
    "            '📊 Distribuição de Scores',\n",
    "            '⚖️ Preço vs Score de Recomendação',\n",
    "            '🎯 Recomendações por Categoria'
        ],
        specs=[[{"type": "bar"}, {"type": "histogram"}],
               [{"type": "scatter"}, {"type": "pie"}]]
    )
    
    # 1. Top 10 melhores opções
    fig.add_trace(
        go.Bar(
            x=top_recommendations['recommendation_score'],
            y=[f"{row['airline']} {row['flight']}" for _, row in top_recommendations.iterrows()],
            orientation='h',
            name='Score',
            marker_color=DASHBOARD_COLORS['success'],
            text=[f"{score:.1f}" for score in top_recommendations['recommendation_score']],
            textposition='auto'
        ),
        row=1, col=1
    )
    
    # 2. Distribuição de scores
    fig.add_trace(
        go.Histogram(
            x=df_rec['recommendation_score'],
            nbinsx=20,
            name='Distribuição',
            marker_color=DASHBOARD_COLORS['info'],
            opacity=0.7
        ),
        row=1, col=2
    )
    
    # 3. Scatter: Preço vs Score
    color_map = {'Basic': DASHBOARD_COLORS['danger'], 
                 'Good': DASHBOARD_COLORS['warning'], 
                 'Excellent': DASHBOARD_COLORS['success']}
    
    for category in df_rec['recommendation_category'].unique():
        if pd.notna(category):
            cat_data = df_rec[df_rec['recommendation_category'] == category]
            fig.add_trace(
                go.Scatter(
                    x=cat_data['price'],
                    y=cat_data['recommendation_score'],
                    mode='markers',
                    name=category,
                    marker=dict(
                        color=color_map[category],
                        size=8,
                        opacity=0.7
                    ),
                    hovertemplate='<b>%{text}</b><br>Preço: ₹%{x:.0f}<br>Score: %{y:.1f}<extra></extra>',
                    text=[f"{row['airline']} {row['flight']}" for _, row in cat_data.iterrows()]
                ),
                row=2, col=1
            )
    
    # 4. Distribuição por categoria
    rec_distribution = df_rec['recommendation_category'].value_counts()
    fig.add_trace(
        go.Pie(
            labels=rec_distribution.index,
            values=rec_distribution.values,
            name="Categorias",
            marker_colors=[color_map.get(cat, '#gray') for cat in rec_distribution.index],
            textinfo='label+percent'
        ),
        row=2, col=2
    )
    
    # Layout
    fig.update_layout(
        height=900,
        title_text="💡 Sistema de Recomendações Inteligente",
        title_x=0.5,
        showlegend=True,
        template="plotly_white"
    )
    
    # Atualizar eixos
    fig.update_xaxes(title_text="Score de Recomendação", row=1, col=1)
    fig.update_yaxes(title_text="Voo", row=1, col=1)
    fig.update_xaxes(title_text="Score", row=1, col=2)
    fig.update_yaxes(title_text="Frequência", row=1, col=2)
    fig.update_xaxes(title_text="Preço (₹)", row=2, col=1)
    fig.update_yaxes(title_text="Score de Recomendação", row=2, col=1)
    
    return fig, df_rec, top_recommendations

# Criar sistema de recomendações
recommendation_fig, df_with_scores, best_flights = create_recommendation_system()
recommendation_fig.show()

print("🌟 TOP 10 RECOMENDAÇÕES:")
print("=" * 60)
for idx, (_, flight) in enumerate(best_flights.iterrows(), 1):
    print(f"{idx:2d}. {flight['airline']} {flight['flight']}")
    print(f"    💰 Preço: ₹{flight['price']:,.0f} | ⏱️ Duração: {flight['duration']:.1f}h | 🎯 Score: {flight['recommendation_score']:.1f}")
    print(f"    🛑 {'Direto' if flight['is_direct'] else 'Com parada'} | 🕐 {flight['departure_time']}")
    print()

# Estatísticas do sistema de recomendação
rec_stats = df_with_scores['recommendation_category'].value_counts()
print("📊 DISTRIBUIÇÃO DAS RECOMENDAÇÕES:")
for category, count in rec_stats.items():
    pct = count / len(df_with_scores) * 100
    print(f"   {category}: {count} voos ({pct:.1f}%)")

print("\\n✅ Sistema de recomendações criado")
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 📱 6. Componentes Específicos para Streamlit"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Função para criar filtros dinâmicos (preparação para Streamlit)\n",
    "def create_filter_options():\n",
    "    \"\"\"Criar opções para filtros dinâmicos do Streamlit\"\"\"\n",
    "    \n",
    "    filter_options = {\n",
    "        'airlines': {\n",
    "            'options': sorted(df['airline'].unique().tolist()),\n",
    "            'default': df['airline'].unique().tolist(),\n",
    "            'type': 'multiselect',\n",
    "            'label': '✈️ Selecione as Companhias Aéreas'\n",
    "        },\n",
    "        'price_range': {\n",
    "            'min_value': float(df['price'].min()),\n",
    "            'max_value': float(df['price'].max()),\n",
    "            'default': (float(df['price'].min()), float(df['price'].max())),\n",
    "            'type': 'slider',\n",
    "            'label': '💰 Faixa de Preço (₹)'\n",
    "        },\n",
    "        'departure_times': {\n",
    "            'options': ['Early_Morning', 'Morning', 'Afternoon', 'Evening', 'Night'],\n",
    "            'default': ['Early_Morning', 'Morning', 'Afternoon', 'Evening', 'Night'],\n",
    "            'type': 'multiselect',\n",
    "            'label': '🕐 Horários de Partida'\n",
    "        },\n",
    "        'stops': {\n",
    "            'options': ['zero', 'one'],\n",
    "            'labels': ['Voos Diretos', 'Voos com Parada'],\n",
    "            'default': ['zero', 'one'],\n",
    "            'type': 'multiselect',\n",
    "            'label': '🛑 Tipo de Voo'\n",
    "        },\n",
    "        'duration_range': {\n",
    "            'min_value': float(df['duration'].min()),\n",
    "            'max_value': float(df['duration'].max()),\n",
    "            'default': (float(df['duration'].min()), float(df['duration'].max())),\n",
    "            'type': 'slider',\n",
    "            'label': '⏱️ Duração (horas)'\n",
    "        }\n",
    "    }\n",
    "    \n",
    "    return filter_options\n",
    "\n",
    "# Gerar opções de filtros\n",
    "streamlit_filters = create_filter_options()\n",
    "\n",
    "print(\"📱 CONFIGURAÇÕES PARA FILTROS DO STREAMLIT:\")\n",
    "print(\"=\" * 50)\n",
    "for filter_name, config in streamlit_filters.items():\n",
    "    print(f\"🔧 {filter_name}:\")\n",
    "    print(f\"   Tipo: {config['type']}\")\n",
    "    print(f\"   Label: {config['label']}\")\n",
    "    if 'options' in config:\n",
    "        print(f\"   Opções: {len(config['options'])} items\")\n",
    "    elif 'min_value' in config:\n",
    "        print(f\"   Range: {config['min_value']:.1f} - {config['max_value']:.1f}\")\n",
    "    print()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Função para aplicar filtros (simulação para Streamlit)\n",
    "def apply_filters(data, filters_applied):\n",
    "    \"\"\"Aplicar filtros aos dados (função para usar no Streamlit)\"\"\"\n",
    "    \n",
    "    filtered_data = data.copy()\n",
    "    \n",
    "    # Filtro por companhias\n",
    "    if 'airlines' in filters_applied:\n",
    "        filtered_data = filtered_data[filtered_data['airline'].isin(filters_applied['airlines'])]\n",
    "    \n",
    "    # Filtro por faixa de preço\n",
    "    if 'price_range' in filters_applied:\n",
    "        min_price, max_price = filters_applied['price_range']\n",
    "        filtered_data = filtered_data[\n",
    "            (filtered_data['price'] >= min_price) & \n",
    "            (filtered_data['price'] <= max_price)\n",
    "        ]\n",
    "    \n",
    "    # Filtro por horários\n",
    "    if 'departure_times' in filters_applied:\n",
    "        filtered_data = filtered_data[filtered_data['departure_time'].isin(filters_applied['departure_times'])]\n",
    "    \n",
    "    # Filtro por paradas\n",
    "    if 'stops' in filters_applied:\n",
    "        filtered_data = filtered_data[filtered_data['stops'].isin(filters_applied['stops'])]\n",
    "    \n",
    "    # Filtro por duração\n",
    "    if 'duration_range' in filters_applied:\n",
    "        min_duration, max_duration = filters_applied['duration_range']\n",
    "        filtered_data = filtered_data[\n",
    "            (filtered_data['duration'] >= min_duration) & \n",
    "            (filtered_data['duration'] <= max_duration)\n",
    "        ]\n",
    "    \n",
    "    return filtered_data\n",
    "\n",
    "# Teste da função de filtros\n",
    "sample_filters = {\n",
    "    'airlines': ['Vistara', 'Air_India'],\n",
    "    'price_range': (10000, 15000),\n",
    "    'stops': ['zero']\n",
    "}\n",
    "\n",
    "filtered_sample = apply_filters(df, sample_filters)\n",
    "\n",
    "print(\"🧪 TESTE DE FILTROS:\")\n",
    "print(f\"Dataset original: {len(df)} voos\")\n",
    "print(f\"Dataset filtrado: {len(filtered_sample)} voos\")\n",
    "print(f\"Filtros aplicados: {list(sample_filters.keys())}\")\n",
    "print(\"\\n✅ Sistema de filtros testado com sucesso\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Função para criar visualizações responsivas (específica para Streamlit)\n",
    "def create_responsive_charts_config():\n",
    "    \"\"\"Configurações para charts responsivos no Streamlit\"\"\"\n",
    "    \n",
    "    responsive_config = {\n",
    "        'plotly_config': {\n",
    "            'displayModeBar': True,\n",
    "            'displaylogo': False,\n",
    "            'modeBarButtonsToRemove': [\n",
    "                'pan2d', 'lasso2d', 'select2d', 'autoScale2d'\n",
    "            ],\n",
    "            'toImageButtonOptions': {\n",
    "                'format': 'png',\n",
    "                'filename': 'airlines_chart',\n",
    "                'height': 500,\n",
    "                'width': 700,\n",
    "                'scale': 2\n",
    "            }\n",
    "        },\n",
    "        'layout_updates': {\n",
    "            'autosize': True,\n",
    "            'margin': dict(l=50, r=50, t=50, b=50),\n",
    "            'font': dict(size=12),\n",
    "            'template': 'plotly_white'\n",
    "        },\n",
    "        'chart_height': {\n",
    "            'small': 400,\n",
    "            'medium': 500,\n",
    "            'large': 600,\n",
    "            'extra_large': 800\n",
    "        }\n",
    "    }\n",
    "    \n",
    "    return responsive_config\n",
    "\n",
    "streamlit_chart_config = create_responsive_charts_config()\n",
    "\n",
    "print(\"📱 CONFIGURAÇÕES RESPONSIVAS PARA STREAMLIT:\")\n",
    "print(\"=\" * 50)\n",
    "print(\"🖼️ Plotly Config:\")\n",
    "for key, value in streamlit_chart_config['plotly_config'].items():\n",
    "    if key != 'modeBarButtonsToRemove':\n",
    "        print(f\"   {key}: {value}\")\n",
    "\n",
    "print(\"\\n📏 Alturas de Charts:\")\n",
    "for size, height in streamlit_chart_config['chart_height'].items():\n",
    "    print(f\"   {size}: {height}px\")\n",
    "\n",
    "print(\"\\n✅ Configurações responsivas criadas\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 💾 7. Exportação e Preparação para Dashboard"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Compilar todas as funções de visualização para uso no Streamlit\n",
    "visualization_functions = {\n",
    "    'metrics': {\n",
    "        'function': 'create_metrics_cards',\n",
    "        'description': 'Cards de métricas principais',\n",
    "        'page': 'Overview'\n",
    "    },\n",
    "    'price_distribution': {\n",
    "        'function': 'create_price_distribution_chart',\n",
    "        'description': 'Distribuição de preços com densidade',\n",
    "        'page': 'Overview'\n",
    "    },\n",
    "    'correlation_heatmap': {\n",
    "        'function': 'create_correlation_heatmap',\n",
    "        'description': 'Heatmap de correlações',\n",
    "        'page': 'Overview'\n",
    "    },\n",
    "    'airline_dashboard': {\n",
    "        'function': 'create_airline_comparison_dashboard',\n",
    "        'description': 'Dashboard comparativo de airlines',\n",
    "        'page': 'Airlines Analysis'\n",
    "    },\n",
    "    'violin_plot': {\n",
    "        'function': 'create_advanced_price_distribution',\n",
    "        'description': 'Violin plot de distribuição',\n",
    "        'page': 'Airlines Analysis'\n",
    "    },\n",
    "    'temporal_heatmap': {\n",
    "        'function': 'create_temporal_heatmap',\n",
    "        'description': 'Heatmap temporal',\n",
    "        'page': 'Temporal Analysis'\n",
    "    },\n",
    "    'temporal_patterns': {\n",
    "        'function': 'create_temporal_patterns_analysis',\n",
    "        'description': 'Análise de padrões temporais',\n",
    "        'page': 'Temporal Analysis'\n",
    "    },\n",
    "    'market_segmentation': {\n",
    "        'function': 'create_market_segmentation_analysis',\n",
    "        'description': 'Análise de clustering 3D',\n",
    "        'page': 'Market Segmentation'\n",
    "    },\n",
    "    'segment_matrix': {\n",
    "        'function': 'create_segment_comparison_matrix',\n",
    "        'description': 'Matriz de comparação de segmentos',\n",
    "        'page': 'Market Segmentation'\n",
    "    },\n",
    "    'recommendation_system': {\n",
    "        'function': 'create_recommendation_system',\n",
    "        'description': 'Sistema de recomendações',\n",
    "        'page': 'Recommendations'\n",
    "    }\n",
    "}\n",
    "\n",
    "print(\"📊 INVENTÁRIO DE VISUALIZAÇÕES CRIADAS:\")\n",
    "print(\"=\" * 60)\n",
    "for viz_name, config in visualization_functions.items():\n",
    "    print(f\"📈 {viz_name}:\")\n",
    "    print(f\"   Função: {config['function']}()\")\n",
    "    print(f\"   Descrição: {config['description']}\")\n",
    "    print(f\"   Página: {config['page']}\")\n",
    "    print()\n",
    "\n",
    "print(f\"✅ Total: {len(visualization_functions)} visualizações criadas\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Salvar datasets finais para o dashboard\n",
    "print(\"💾 SALVANDO DATASETS FINAIS PARA DASHBOARD:\")\n",
    "print(\"=\" * 50)\n",
    "\n",
    "# 1. Dataset principal com todas as features\n",
    "final_dataset = df_with_scores[[\n",
    "    'airline', 'flight', 'source_city', 'departure_time', 'stops', \n",
    "    'arrival_time', 'destination_city', 'duration', 'price',\n",
    "    'price_category', 'duration_category', 'efficiency_score', \n",
    "    'is_direct', 'is_premium_time', 'recommendation_score', \n",
    "    'recommendation_category'\n",
    "]].copy()\n",
    "\n",
    "final_dataset.to_csv('../data/processed/dashboard_ready_data.csv', index=False)\n",
    "print(\"✅ Dataset principal salvo: dashboard_ready_data.csv\")\n",
    "\n",
    "# 2. Dados agregados para performance\n",
    "aggregated_data = {\n",
    "    'airline_stats': df.groupby('airline').agg({\n",
    "        'price': ['mean', 'std', 'count', 'min', 'max'],\n",
    "        'duration': ['mean', 'std'],\n",
    "        'efficiency_score': 'mean',\n",
    "        'is_direct': 'sum'\n",
    "    }).round(2),\n",
    "    \n",
    "    'temporal_stats': df.groupby('departure_time').agg({\n",
    "        'price': ['mean', 'std', 'count'],\n",
    "        'duration': 'mean',\n",
    "        'is_direct': ['sum', 'mean']\n",
    "    }).round(2),\n",
    "    \n",
    "    'overall_metrics': {\n",
    "        'total_flights': len(df),\n",
    "        'avg_price': float(df['price'].mean()),\n",
    "        'median_price': float(df['price'].median()),\n",
    "        'price_std': float(df['price'].std()),\n",
    "        'avg_duration': float(df['duration'].mean()),\n",
    "        'direct_flights_pct': float(df['is_direct'].mean() * 100),\n",
    "        'airlines_count': df['airline'].nunique()\n",
    "    }\n",
    "}\n",
    "\n",
    "# Salvar dados agregados\n",
    "import pickle\n",
    "with open('../data/processed/aggregated_dashboard_data.pkl', 'wb') as f:\n",
    "    pickle.dump(aggregated_data, f)\n",
    "print(\"✅ Dados agregados salvos: aggregated_dashboard_data.pkl\")\n",
    "\n",
    "# 3. Configurações de visualização\n",
    "dashboard_config = {\n",
    "    'colors': DASHBOARD_COLORS,\n",
    "    'airline_colors': AIRLINE_COLORS,\n",
    "    'filters': streamlit_filters,\n",
    "    'chart_config': streamlit_chart_config,\n",
    "    'visualizations': visualization_functions\n",
    "}\n",
    "\n",
    "import json\n",
    "with open('../data/processed/dashboard_config.json', 'w') as f:\n",
    "    json.dump(dashboard_config, f, indent=2, default=str)\n",
    "print(\"✅ Configurações salvas: dashboard_config.json\")\n",
    "\n",
    "print(f\"\\n🎯 RESUMO DOS ARQUIVOS GERADOS:\")\n",
    "print(f\"   📊 dashboard_ready_data.csv: {len(final_dataset)} linhas × {len(final_dataset.columns)} colunas\")\n",
    "print(f\"   🗃️ aggregated_dashboard_data.pkl: {len(aggregated_data)} categorias de dados\")\n",
    "print(f\"   ⚙️ dashboard_config.json: Configurações completas\")\n",
    "print(f\"\\n✅ Todos os dados preparados para o Streamlit Dashboard!\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 📋 Resumo das Visualizações Criadas"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Resumo final do que foi criado\n",
    "print(\"🎨 RESUMO COMPLETO - VISUALIZAÇÕES PARA DASHBOARD\")\n",
    "print(\"=\" * 70)\n",
    "\n",
    "summary_by_page = {\n",
    "    '📊 Overview': [\n",
    "        '🏷️ Cards de métricas principais (6 KPIs)',\n",
    "        '📈 Distribuição de preços com densidade e categorias',\n",
    "        '🔗 Heatmap de correlações interativo'\n",
    "    ],\n",
    "    '🏢 Airlines Analysis': [\n",
    "        '📊 Dashboard comparativo multidimensional (4 gráficos)',\n",
    "        '🎻 Violin plots de distribuição avançada',\n",
    "        '📈 Box plots com estatísticas detalhadas'\n",
    "    ],\n",
    "    '⏰ Temporal Analysis': [\n",
    "        '🔥 Heatmap temporal (horário × airline)',\n",
    "        '📊 Dashboard de padrões temporais (4 métricas)',\n",
    "        '📈 Análise de tendências por horário'\n",
    "    ],\n",
    "    '🎯 Market Segmentation': [\n",
    "        '🌐 Clustering 3D interativo',\n",
    "        '📊 Matriz de comparação de segmentos (4 heatmaps)',\n",
    "        '🏷️ Categorização automática de voos'\n",
    "    ],\n",
    "    '💡 Recommendations': [\n",
    "        '🌟 Sistema de scores de recomendação',\n",
    "        '🎯 Top 10 melhores opções',\n",
    "        '📊 Dashboard de recomendações (4 visualizações)'\n",
    "    ]\n",
    "}\n",
    "\n",
    "for page, features in summary_by_page.items():\n",
    "    print(f\"\\n{page}:\")\n",
    "    for feature in features:\n",
    "        print(f\"   {feature}\")\n",
    "\n",
    "print(f\"\\n🔧 RECURSOS TÉCNICOS IMPLEMENTADOS:\")\n",
    "print(f\"   ✅ Sistema de filtros dinâmicos (5 tipos)\")\n",
    "print(f\"   ✅ Configurações responsivas para mobile\")\n",
    "print(f\"   ✅ Paleta de cores personalizada\")\n",
    "print(f\"   ✅ Hover tooltips informativos\")\n",
    "print(f\"   ✅ Exportação de gráficos em alta qualidade\")\n",
    "print(f\"   ✅ Performance otimizada com dados agregados\")\n",
    "\n",
    "print(f\"\\n📊 ESTATÍSTICAS FINAIS:\")\n",
    "print(f\"   🎨 Total de funções de visualização: {len(visualization_functions)}\")\n",
    "print(f\"   📄 Páginas do dashboard: {len(summary_by_page)}\")\n",
    "print(f\"   🎯 Visualizações individuais: ~25 gráficos\")\n",
    "print(f\"   💾 Datasets preparados: 3 arquivos\")\n",
    "print(f\"   ⚙️ Configurações: 4 categorias\")\n",
    "\n",
    "print(f\"\\n🚀 PRÓXIMO PASSO:\")\n",
    "print(f\"   📝 Criar notebook 04_insights_validation.ipynb\")\n",
    "print(f\"   🔍 Validar insights com análises estatísticas\")\n",
    "print(f\"   📊 Gerar relatório executivo final\")\n",
    "print(f\"   🎯 Preparar apresentação dos resultados\")\n",
    "\n",
    "print(f\"\\n✅ Desenvolvimento de visualizações COMPLETO!\")\n",
    "print(f\"🎯 Pronto para implementar no Streamlit Dashboard\")\n",{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 📊 03 - Visualization Development: Airlines Dashboard\n",
    "## Desenvolvimento de Visualizações Interativas para Dashboard\n",
    "\n",
    "**Objetivo**: Criar visualizações interativas e impactantes para o dashboard Streamlit, focando na experiência do usuário e insights acionáveis.\n",
    "\n",
    "**Autor**: [Seu Nome]  \n",
    "**Data**: $(date +\"%Y-%m-%d\")  \n",
    "**Versão**: 1.0\n",
    "\n",
    "---"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 🔧 Setup e Importações"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Importações principais\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "import plotly.express as px\n",
    "import plotly.graph_objects as go\n",
    "from plotly.subplots import make_subplots\n",
    "import plotly.figure_factory as ff\n",
    "\n",
    "# Importações para análises avançadas\n",
    "from scipy import stats\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "from sklearn.cluster import KMeans\n",
    "from sklearn.decomposition import PCA\n",
    "\n",
    "import warnings\n",
    "warnings.filterwarnings('ignore')\n",
    "\n",
    "# Configurações de visualização\n",
    "plt.style.use('seaborn-v0_8')\n",
    "pd.set_option('display.max_columns', None)\n",
    "\n",
    "# Paleta de cores personalizada para o dashboard\n",
    "DASHBOARD_COLORS = {\n",
    "    'primary': '#1f77b4',\n",
    "    'secondary': '#ff7f0e', \n",
    "    'success': '#2ca02c',\n",
    "    'danger': '#d62728',\n",
    "    'warning': '#ff7f0e',\n",
    "    'info': '#17a2b8',\n",
    "    'light': '#f8f9fa',\n",
    "    'dark': '#343a40'\n",
    "}\n",
    "\n",
    "# Paleta para airlines\n",
    "AIRLINE_COLORS = {\n",
    "    'SpiceJet': '#FF6B6B',\n",
    "    'Vistara': '#4ECDC4', \n",
    "    'AirAsia': '#45B7D1',\n",
    "    'GO_FIRST': '#96CEB4',\n",
    "    'Indigo': '#FFEAA7',\n",
    "    'Air_India': '#DDA0DD',\n",
    "    'Others': '#95A5A6'\n",
    "}\n",
    "\n",
    "print(\"🎨 Configurações de visualização carregadas com sucesso!\")\n",
    "print(f\"🎨 Paleta principal: {list(DASHBOARD_COLORS.keys())}\")\n",
    "print(f\"✈️ Paleta airlines: {list(AIRLINE_COLORS.keys())}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 📥 Carregamento e Preparação dos Dados"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Carregar dados enriquecidos do notebook anterior\n",
    "df = pd.read_csv('../data/processed/flights_with_features.csv')\n",
    "\n",
    "# Carregar resultados estatísticos\n",
    "import json\n",
    "with open('../data/processed/statistical_analysis_results.json', 'r') as f:\n",
    "    stats_results = json.load(f)\n",
    "\n",
    "print(f\"✅ Dataset carregado: {df.shape[0]} linhas × {df.shape[1]} colunas\")\n",
    "print(f\"📊 Features adicionais disponíveis: price_category, duration_category, efficiency_score, etc.\")\n",
    "print(f\"🔬 Resultados estatísticos carregados: {len(stats_results)} categorias de análise\")\n",
    "\n",
    "# Verificar novas features\n",
    "new_features = ['price_category', 'duration_category', 'efficiency_score', 'is_direct', 'is_premium_time']\n",
    "print(f\"\\n🔍 Verificação das novas features:\")\n",
    "for feature in new_features:\n",
    "    if feature in df.columns:\n",
    "        print(f\"   ✅ {feature}: {df[feature].dtype}\")\n",
    "        if df[feature].dtype == 'object':\n",
    "            print(f\"      Valores: {df[feature].unique()}\")\n",
    "    else:\n",
    "        print(f\"   ❌ {feature}: Não encontrada\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 📊 1. Visualizações de Overview Executivo"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Função para criar cards de métricas\n",
    "def create_metrics_cards():\n",
    "    \"\"\"Criar dados para cards de métricas principais\"\"\"\n",
    "    \n",
    "    metrics = {\n",
    "        'total_flights': {\n",
    "            'value': len(df),\n",
    "            'label': 'Total de Voos',\n",
    "            'icon': '✈️',\n",
    "            'color': DASHBOARD_COLORS['primary']\n",
    "        },\n",
    "        'avg_price': {\n",
    "            'value': f\"₹{df['price'].mean():,.0f}\",\n",
    "            'label': 'Preço Médio',\n",
    "            'icon': '💰',\n",
    "            'color': DASHBOARD_COLORS['success'],\n",
    "            'delta': f\"{((df['price'].mean() - df['price'].median()) / df['price'].median() * 100):+.1f}%\"\n",
    "        },\n",
    "        'airlines_count': {\n",
    "            'value': df['airline'].nunique(),\n",
    "            'label': 'Companhias',\n",
    "            'icon': '🏢',\n",
    "            'color': DASHBOARD_COLORS['info']\n",
    "        },\n",
    "        'direct_flights_pct': {\n",
    "            'value': f\"{(df['is_direct'].mean() * 100):.1f}%\",\n",
    "            'label': 'Voos Diretos',\n",
    "            'icon': '🎯',\n",
    "            'color': DASHBOARD_COLORS['warning']\n",
    "        },\n",
    "        'avg_duration': {\n",
    "            'value': f\"{df['duration'].mean():.1f}h\",\n",
    "            'label': 'Duração Média',\n",
    "            'icon': '⏱️',\n",
    "            'color': DASHBOARD_COLORS['secondary']\n",
    "        },\n",
    "        'price_range': {\n",
    "            'value': f\"₹{df['price'].max() - df['price'].min():,.0f}\",\n",
    "            'label': 'Amplitude de Preços',\n",
    "            'icon': '📊',\n",
    "            'color': DASHBOARD_COLORS['danger']\n",
    "        }\n",
    "    }\n",
    "    \n",
    "    return metrics\n",
    "\n",
    "# Gerar métricas\n",
    "dashboard_metrics = create_metrics_cards()\n",
    "\n",
    "print(\"📈 MÉTRICAS PRINCIPAIS DO DASHBOARD:\")\n",
    "print(\"=\" * 50)\n",
    "for key, metric in dashboard_metrics.items():\n",
    "    delta_info = f\" ({metric.get('delta', 'N/A')})\" if 'delta' in metric else \"\"\n",
    "    print(f\"{metric['icon']} {metric['label']}: {metric['value']}{delta_info}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Gráfico 1: Distribuição de Preços com Densidade\n",
    "def create_price_distribution_chart():\n",
    "    \"\"\"Criar gráfico de distribuição de preços com curva de densidade\"\"\"\n",
    "    \n",
    "    fig = make_subplots(\n",
    "        rows=2, cols=1,\n",
    "        subplot_titles=['Distribuição de Preços', 'Distribuição por Categoria'],\n",
    "        vertical_spacing=0.15,\n",
    "        specs=[[{\"secondary_y\": True}], [{\"type\": \"bar\"}]]\n",
    "    )\n",
    "    \n",
    "    # Histograma principal\n",
    "    fig.add_trace(\n",
    "        go.Histogram(\n",
    "            x=df['price'],\n",
    "            nbinsx=30,\n",
    "            name='Frequência',\n",
    "            opacity=0.7,\n",
    "            marker_color=DASHBOARD_COLORS['primary']\n",
    "        ),\n",
    "        row=1, col=1\n",
    "    )\n",
    "    \n",
    "    # Linha de média e mediana\n",
    "    mean_price = df['price'].mean()\n",
    "    median_price = df['price'].median()\n",
    "    \n",
    "    fig.add_vline(\n",
    "        x=mean_price, \n",
    "        line_dash=\"dash\", \n",
    "        line_color=DASHBOARD_COLORS['danger'],\n",
    "        annotation_text=f\"Média: ₹{mean_price:,.0f}\",\n",
    "        row=1, col=1\n",
    "    )\n",
    "    \n",
    "    fig.add_vline(\n",
    "        x=median_price, \n",
    "        line_dash=\"dot\", \n",
    "        line_color=DASHBOARD_COLORS['success'],\n",
    "        annotation_text=f\"Mediana: ₹{median_price:,.0f}\",\n",
    "        row=1, col=1\n",
    "    )\n",
    "    \n",
    "    # Gráfico de barras por categoria\n",
    "    price_cat_counts = df['price_category'].value_counts()\n",
    "    \n",
    "    fig.add_trace(\n",
    "        go.Bar(\n",
    "            x=price_cat_counts.index,\n",
    "            y=price_cat_counts.values,\n",
    "            name='Voos por Categoria',\n",
    "            marker_color=[DASHBOARD_COLORS['success'], DASHBOARD_COLORS['warning'], DASHBOARD_COLORS['danger']],\n",
    "            text=price_cat_counts.values,\n",
    "            textposition='auto'\n",
    "        ),\n",
    "        row=2, col=1\n",
    "    )\n",
    "    \n",
    "    # Layout\n",
    "    fig.update_layout(\n",
    "        height=700,\n",
    "        title_text=\"📊 Análise de Distribuição de Preços\",\n",
    "        title_x=0.5,\n",
    "        showlegend=True,\n",
    "        template=\"plotly_white\"\n",
    "    )\n",
    "    \n",
    "    fig.update_xaxes(title_text=\"Preço (₹)\", row=1, col=1)\n",
    "    fig.update_yaxes(title_text=\"Frequência\", row=1, col=1)\n",
    "    fig.update_xaxes(title_text=\"Categoria de Preço\", row=2, col=1)\n",
    "    fig.update_yaxes(title_text=\"Número de Voos\", row=2, col=1)\n",
    "    \n",
    "    return fig\n",
    "\n",
    "# Criar e exibir o gráfico\n",
    "price_distribution_fig = create_price_distribution_chart()\n",
    "price_distribution_fig.show()\n",
    "\n",
    "print(\"✅ Gráfico de distribuição de preços criado\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Gráfico 2: Heatmap de Correlação Interativo\n",
    "def create_correlation_heatmap():\n",
    "    \"\"\"Criar heatmap interativo de correlações\"\"\"\n",
    "    \n",
    "    # Selecionar variáveis numéricas relevantes\n",
    "    numeric_cols = ['price', 'duration', 'efficiency_score', 'is_direct', 'is_premium_time']\n",
    "    corr_matrix = df[numeric_cols].corr()\n",
    "    \n",
    "    # Criar heatmap\n",
    "    fig = go.Figure(data=go.Heatmap(\n",
    "        z=corr_matrix.values,\n",
    "        x=corr_matrix.columns,\n",
    "        y=corr_matrix.columns,\n",
    "        colorscale='RdBu',\n",
    "        zmid=0,\n",
    "        text=corr_matrix.round(3).values,\n",
    "        texttemplate='%{text}',\n",
    "        textfont={\"size\": 12},\n",
    "        hoverongaps=False,\n",
    "        hovertemplate='<b>%{y} vs %{x}</b><br>Correlação: %{z:.3f}<extra></extra>'\n",
    "    ))\n",
    "    \n",
    "    fig.update_layout(\n",
    "        title={\n",
    "            'text': '🔗 Matriz de Correlação - Variáveis Chave',\n",
    "            'x': 0.5,\n",
    "            'font': {'size': 16}\n",
    "        },\n",
    "        height=500,\n",
    "        template=\"plotly_white\"\n",
    "    )\n",
    "    \n",
    "    return fig\n",
    "\n",
    "# Criar e exibir heatmap\n",
    "correlation_heatmap = create_correlation_heatmap()\n",
    "correlation_heatmap.show()\n",
    "\n",
    "print(\"✅ Heatmap de correlação criado\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 🏢 2. Visualizações de Análise por Airlines"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Gráfico 3: Comparação Multidimensional das Airlines\n",
    "def create_airline_comparison_dashboard():\n",
    "    \"\"\"Criar dashboard comparativo das airlines\"\"\"\n",
    "    \n",
    "    # Preparar dados agregados por airline\n",
    "    airline_stats = df.groupby('airline').agg({\n",
    "        'price': ['mean', 'std', 'count'],\n",
    "        'duration': 'mean',\n",
    "        'efficiency_score': 'mean',\n",
    "        'is_direct': 'sum'\n",
    "    }).round(2)\n",
    "    \n",
    "    airline_stats.columns = ['Price_Mean', 'Price_Std', 'Flight_Count', 'Duration_Mean', 'Efficiency_Mean', 'Direct_Flights']\n",
    "    airline_stats = airline_stats.reset_index()\n",
    "    \n",
    "    # Calcular market share\n",
    "    airline_stats['Market_Share'] = (airline_stats['Flight_Count'] / airline_stats['Flight_Count'].sum() * 100).round(1)\n",
    "    \n",
    "    # Subplots\n",
    "    fig = make_subplots(\n",
    "        rows=2, cols=2,\n",
    "        subplot_titles=[\n",
    "            '💰 Preço Médio por Airline',\n",
    "            '📊 Market Share (%)', \n",
    "            '⚡ Eficiência (₹/hora)',\n",
    "            '🎯 Voos Diretos Oferecidos'\n",
    "        ],\n",
    "        specs=[\n",
    "            [{\"type\": \"bar\"}, {\"type\": \"pie\"}],\n",
    "            [{\"type\": \"scatter\"}, {\"type\": \"bar\"}]\n",
    "        ]\n",
    "    )\n",
    "    \n",
    "    # 1. Preço médio por airline\n",
    "    airline_stats_sorted = airline_stats.sort_values('Price_Mean', ascending=False)\n",
    "    \n",
    "    colors_list = [AIRLINE_COLORS.get(airline, AIRLINE_COLORS['Others']) for airline in airline_stats_sorted['airline']]\n",
    "    \n",
    "    fig.add_trace(\n",
    "        go.Bar(\n",
    "            x=airline_stats_sorted['airline'],\n",
    "            y=airline_stats_sorted['Price_Mean'],\n",
    "            name='Preço Médio',\n",
    "            marker_color=colors_list,\n",
    "            text=[f'₹{x:,.0f}' for x in airline_stats_sorted['Price_Mean']],\n",
    "            textposition='auto',\n",
    "            error_y=dict(\n",
    "                type='data',\n",
    "                array=airline_stats_sorted['Price_Std'],\n",
    "                visible=True\n",
    "            )\n",
    "        ),\n",
    "        row=1, col=1\n",
    "    )\n",
    "    \n",
    "    # 2. Market share\n",
    "    fig.add_trace(\n",
    "        go.Pie(\n",
    "            labels=airline_stats['airline'],\n",
    "            values=airline_stats['Market_Share'],\n",
    "            name=\"Market Share\",\n",
    "            marker_colors=[AIRLINE_COLORS.get(airline, AIRLINE_COLORS['Others']) for airline in airline_stats['airline']],\n",
    "            textinfo='label+percent',\n",
    "            hovertemplate='<b>%{label}</b><br>Participação: %{value}%<br>Voos: %{customdata}<extra></extra>',\n",
    "            customdata=airline_stats['Flight_Count']\n",
    "        ),\n",
    "        row=1, col=2\n",
    "    )\n",
    "    \n",
    "    # 3. Scatter: Eficiência vs Duração\n",
    "    fig.add_trace(\n",
    "        go.Scatter(\n",
    "            x=airline_stats['Duration_Mean'],\n",
    "            y=airline_stats['Efficiency_Mean'],\n",
    "            mode='markers+text',\n",
    "            name='Eficiência',\n",
    "            text=airline_stats['airline'],\n",
    "            textposition='top center',\n",
    "            marker=dict(\n",
    "                size=airline_stats['Flight_Count'] * 2,\n",
    "                color=[AIRLINE_COLORS.get(airline, AIRLINE_COLORS['Others']) for airline in airline_stats['airline']],\n",
    "                opacity=0.7,\n",
    "                line=dict(width=2, color='white')\n",
    "            ),\n",
    "            hovertemplate='<b>%{text}</b><br>Duração Média: %{x:.1f}h<br>Eficiência: ₹%{y:.0f}/h<br>Voos: %{marker.size}<extra></extra>'\n",
    "        ),\n",
    "        row=2, col=1\n",
    "    )\n",
    "    \n",
    "    # 4. Voos diretos por airline\n",
    "    fig.add_trace(\n",
    "        go.Bar(\n",
    "            x=airline_stats['airline'],\n",
    "            y=airline_stats['Direct_Flights'],\n",
    "            name='Voos Diretos',\n",
    "            marker_color=[AIRLINE_COLORS.get(airline, AIRLINE_COLORS['Others']) for airline in airline_stats['airline']],\n",
    "            text=airline_stats['Direct_Flights'],\n",
    "            textposition='auto'\n",
    "        ),\n",
    "        row=2, col=2\n",
    "    )\n",
    "    \n",
    "    # Layout geral\n",
    "    fig.update_layout(\n",
    "        height=800,\n",
    "        title_text=\"🏢 Dashboard Comparativo - Airlines\",\n",
    "        title_x=0.5,\n",
    "        showlegend=False,\n",
    "        template=\"plotly_white\"\n",
    "    )\n",
    "    \n",
    "    # Atualizar eixos\n",
    "    fig.update_xaxes(title_text=\"Airline\", row=1, col=1, tickangle=-45)\n",
    "    fig.update_yaxes(title_text=\"Preço Médio (₹)\", row=1, col=1)\n",
    "    \n",
    "    fig.update_xaxes(title_text=\"Duração Média (h)\", row=2, col=1)\n",
    "    fig.update_yaxes(title_text=\"Eficiência (₹/h)\", row=2, col=1)\n",
    "    \n",
    "    fig.update_xaxes(title_text=\"Airline\", row=2, col=2, tickangle=-45)\n",
    "    fig.update_yaxes(title_text=\"Número de Voos Diretos\", row=2, col=2)\n",
    "    \n",
    "    return fig, airline_stats\n",
    "\n",
    "# Criar dashboard de airlines\n",
    "airline_dashboard, airline_data = create_airline_comparison_dashboard()\n",
    "airline_dashboard.show()\n",
    "\n",
    "# Mostrar dados resumidos\n",
    "print(\"📊 RESUMO DAS AIRLINES:\")\n",
    "print(airline_data.round(1))\n",
    "\n",
    "print(\"\\n✅ Dashboard comparativo de airlines criado\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Gráfico 4: Box Plot Interativo com Violin Plot\n",
    "def create_advanced_price_distribution():\n",
    "    \"\"\"Criar visualização avançada de distribuição de preços por airline\"\"\"\n",
    "    \n",
    "    fig = go.Figure()\n",
    "    \n",
    "    # Adicionar violin plot para cada airline\n",
    "    airlines = df['airline'].unique()\n",
    "    \n",
    "    for airline in airlines:\n",
    "        airline_data = df[df['airline'] == airline]['price']\n",
    "        \n",
    "        fig.add_trace(go.Violin(\n",
    "            y=airline_data,\n",
    "            name=airline,\n",
    "            box_visible=True,\n",
    "            meanline_visible=True,\n",
    "            fillcolor=AIRLINE_COLORS.get(airline, AIRLINE_COLORS['Others']),\n",
    "            opacity=0.6,\n",
    "            x0=airline\n",
    "        ))\n",
    "    \n",
    "    # Adicionar linha de preço médio geral
    overall_mean = df['price'].mean()
    fig.add_hline(
        y=overall_mean,
        line_dash="dash",
        line_color=DASHBOARD_COLORS['danger'],
        annotation_text=f"Média Geral: ₹{overall_mean:,.0f}",
        annotation_position="top right"
    )
    
    fig.update_layout(
        title={
            'text': '🎻 Distribuição Avançada de Preços por Airline',
            'x': 0.5
        },
        xaxis_title="Companhia Aérea",
        yaxis_title="Preço (₹)",
        height=600,
        template="plotly_white",
        showlegend=False
    )
    
    fig.update_xaxes(tickangle=-45)
    
    return fig

# Criar violin plot
violin_plot = create_advanced_price_distribution()
violin_plot.show()

print("✅ Violin plot de distribuição de preços criado")\n",
    "