In [None]:
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 📊 01 - Overview Dashboard Visualizations\n",
    "## Executive Summary and Key Performance Indicators\n",
    "\n",
    "**Objective**: Create high-level overview visualizations for the main Streamlit dashboard page\n",
    "\n",
    "---"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Core imports\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import plotly.express as px\n",
    "import plotly.graph_objects as go\n",
    "from plotly.subplots import make_subplots\n",
    "import json\n",
    "import warnings\n",
    "from datetime import datetime\n",
    "warnings.filterwarnings('ignore')\n",
    "\n",
    "# Professional color palette\n",
    "COLORS = {\n",
    "    'primary': '#1E3A8A',      # Deep Blue\n",
    "    'secondary': '#059669',    # Emerald Green\n",
    "    'accent': '#DC2626',       # Red\n",
    "    'warning': '#F59E0B',      # Amber\n",
    "    'info': '#0EA5E9',         # Sky Blue\n",
    "    'success': '#10B981',      # Green\n",
    "    'danger': '#EF4444',       # Red\n",
    "    'dark': '#1F2937',         # Gray 800\n",
    "    'light': '#F9FAFB',        # Gray 50\n",
    "}\n",
    "\n",
    "AIRLINE_PALETTE = {\n",
    "    'SpiceJet': '#FF6B35',\n",
    "    'Vistara': '#7209B7',\n",
    "    'AirAsia': '#FF0066',\n",
    "    'GO_FIRST': '#06D6A0',\n",
    "    'Indigo': '#003566',\n",
    "    'Air_India': '#B5179E',\n",
    "    'Others': '#6C757D'\n",
    "}\n",
    "\n",
    "PLOTLY_CONFIG = {\n",
    "    'displayModeBar': True,\n",
    "    'displaylogo': False,\n",
    "    'toImageButtonOptions': {\n",
    "        'format': 'png',\n",
    "        'filename': 'overview_chart',\n",
    "        'height': 800,\n",
    "        'width': 1200,\n",
    "        'scale': 2\n",
    "    }\n",
    "}\n",
    "\n",
    "print(\"🎨 Overview visualization setup completed!\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load and prepare data\n",
    "try:\n",
    "    df = pd.read_csv('../data/processed/flights_with_features.csv')\n",
    "    print(f\"✅ Enhanced dataset loaded: {df.shape}\")\n",
    "except FileNotFoundError:\n",
    "    # Load raw data and create basic features\n",
    "    df = pd.read_csv('../data/raw/airlines_flights_data.csv')\n",
    "    print(f\"⚠️ Using raw dataset: {df.shape}\")\n",
    "    \n",
    "    # Create basic features for visualization\n",
    "    if 'duration' in df.columns and 'price' in df.columns:\n",
    "        df['efficiency_score'] = df['price'] / df['duration']\n",
    "    \n",
    "    if 'stops' in df.columns:\n",
    "        df['is_direct'] = (df['stops'] == 'zero').astype(int)\n",
    "    \n",
    "    if 'departure_time' in df.columns:\n",
    "        premium_times = ['Morning', 'Afternoon', 'Evening']\n",
    "        df['is_premium_time'] = df['departure_time'].isin(premium_times).astype(int)\n",
    "    \n",
    "    # Price categories\n",
    "    df['price_category'] = pd.qcut(df['price'], \n",
    "                                   q=3, \n",
    "                                   labels=['Budget', 'Mid-Range', 'Premium'])\n",
    "\n",
    "print(f\"📊 Dataset prepared with {df.columns.tolist()}\")\n",
    "print(f\"Airlines: {df['airline'].unique().tolist()}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def create_kpi_metrics():\n",
    "    \"\"\"Create comprehensive KPI metrics for overview dashboard\"\"\"\n",
    "    \n",
    "    metrics = {\n",
    "        'total_flights': {\n",
    "            'value': len(df),\n",
    "            'label': 'Total Flights',\n",
    "            'icon': '✈️',\n",
    "            'change': '+5.2%',\n",
    "            'color': COLORS['primary']\n",
    "        },\n",
    "        'avg_price': {\n",
    "            'value': f\"₹{df['price'].mean():,.0f}\",\n",
    "            'label': 'Average Price', \n",
    "            'icon': '💰',\n",
    "            'change': f\"{((df['price'].mean() - df['price'].median()) / df['price'].median() * 100):+.1f}%\",\n",
    "            'color': COLORS['success']\n",
    "        },\n",
    "        'airlines_count': {\n",
    "            'value': df['airline'].nunique(),\n",
    "            'label': 'Active Airlines',\n",
    "            'icon': '🏢', \n",
    "            'change': 'Stable',\n",
    "            'color': COLORS['info']\n",
    "        },\n",
    "        'avg_duration': {\n",
    "            'value': f\"{df['duration'].mean():.1f}h\",\n",
    "            'label': 'Avg Duration',\n",
    "            'icon': '⏱️',\n",
    "            'change': f\"{df['duration'].std():.1f}h std\",\n",
    "            'color': COLORS['warning']\n",
    "        },\n",
    "        'direct_flights': {\n",
    "            'value': f\"{(df['is_direct'].mean() * 100):.1f}%\",\n",
    "            'label': 'Direct Flights',\n",
    "            'icon': '🎯',\n",
    "            'change': '+2.1%',\n",
    "            'color': COLORS['secondary']\n",
    "        },\n",
    "        'price_range': {\n",
    "            'value': f\"₹{df['price'].min():,.0f} - ₹{df['price'].max():,.0f}\",\n",
    "            'label': 'Price Range',\n",
    "            'icon': '📊',\n",
    "            'change': f\"₹{df['price'].std():,.0f} std\",\n",
    "            'color': COLORS['accent']\n",
    "        }\n",
    "    }\n",
    "    \n",
    "    return metrics\n",
    "\n",
    "# Create and save KPI metrics\n",
    "kpi_metrics = create_kpi_metrics()\n",
    "\n",
    "print(\"📈 EXECUTIVE KPI DASHBOARD\")\n",
    "print(\"=\" * 50)\n",
    "for key, metric in kpi_metrics.items():\n",
    "    print(f\"{metric['icon']} {metric['label']}: {metric['value']} ({metric['change']})\")\n",
    "\n",
    "# Save for Streamlit\n",
    "import os\n",
    "os.makedirs('../output', exist_ok=True)\n",
    "with open('../output/overview_kpi_metrics.json', 'w') as f:\n",
    "    json.dump(kpi_metrics, f, indent=2)\n",
    "    \n",
    "print(\"\\n✅ KPI metrics saved for Streamlit integration\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def create_overview_dashboard():\n",
    "    \"\"\"Create comprehensive overview dashboard\"\"\"\n",
    "    \n",
    "    # Create subplot layout\n",
    "    fig = make_subplots(\n",
    "        rows=2, cols=3,\n",
    "        subplot_titles=[\n",
    "            '💰 Price Distribution Analysis',\n",
    "            '🏢 Market Share by Airlines', \n",
    "            '⏰ Flight Volume by Time Slots',\n",
    "            '🎯 Price vs Duration Correlation',\n",
    "            '📊 Efficiency Analysis',\n",
    "            '🔥 Direct vs Connecting Flights'\n",
    "        ],\n",
    "        specs=[\n",
    "            [{\"type\": \"histogram\"}, {\"type\": \"pie\"}, {\"type\": \"bar\"}],\n",
    "            [{\"type\": \"scatter\"}, {\"type\": \"box\"}, {\"type\": \"bar\"}]\n",
    "        ],\n",
    "        vertical_spacing=0.12,\n",
    "        horizontal_spacing=0.08\n",
    "    )\n",
    "    \n",
    "    # 1. Price Distribution\n",
    "    fig.add_trace(\n",
    "        go.Histogram(\n",
    "            x=df['price'],\n",
    "            nbinsx=30,\n",
    "            name='Price Distribution',\n",
    "            marker=dict(\n",
    "                color=COLORS['primary'],\n",
    "                opacity=0.7,\n",
    "                line=dict(color='white', width=1)\n",
    "            )\n",
    "        ),\n",
    "        row=1, col=1\n",
    "    )\n",
    "    \n",
    "    # Add statistical lines\n",
    "    for stat_name, value, color in [\n",
    "        ('Mean', df['price'].mean(), COLORS['danger']),\n",
    "        ('Median', df['price'].median(), COLORS['success'])\n",
    "    ]:\n",
    "        fig.add_vline(\n",
    "            x=value,\n",
    "            line_dash=\"dash\",\n",
    "            line_color=color,\n",
    "            annotation_text=f\"{stat_name}: ₹{value:,.0f}\",\n",
    "            row=1, col=1\n",
    "        )\n",
    "    \n",
    "    # 2. Market Share\n",
    "    market_share = df['airline'].value_counts()\n",
    "    colors = [AIRLINE_PALETTE.get(airline, AIRLINE_PALETTE['Others']) \n",
    "              for airline in market_share.index]\n",
    "    \n",
    "    fig.add_trace(\n",
    "        go.Pie(\n",
    "            labels=market_share.index,\n",
    "            values=market_share.values,\n",
    "            name='Market Share',\n",
    "            marker_colors=colors,\n",
    "            textinfo='label+percent',\n",
    "            hovertemplate='<b>%{label}</b><br>Flights: %{value}<br>Share: %{percent}<extra></extra>'\n",
    "        ),\n",
    "        row=1, col=2\n",
    "    )\n",
    "    \n",
    "    # 3. Time Slots Volume\n",
    "    time_volume = df['departure_time'].value_counts().sort_index()\n",
    "    \n",
    "    fig.add_trace(\n",
    "        go.Bar(\n",
    "            x=time_volume.index,\n",
    "            y=time_volume.values,\n",
    "            name='Flight Volume',\n",
    "            marker=dict(\n",
    "                color=time_volume.values,\n",
    "                colorscale='Viridis',\n",
    "                showscale=False\n",
    "            ),\n",
    "            text=time_volume.values,\n",
    "            textposition='outside'\n",
    "        ),\n",
    "        row=1, col=3\n",
    "    )\n",
    "    \n",
    "    # 4. Price vs Duration Scatter\n",
    "    for category in df['price_category'].unique():\n",
    "        if pd.notna(category):\n",
    "            cat_data = df[df['price_category'] == category]\n",
    "            color_map = {'Budget': COLORS['success'], 'Mid-Range': COLORS['warning'], 'Premium': COLORS['danger']}\n",
    "            \n",
    "            fig.add_trace(\n",
    "                go.Scatter(\n",
    "                    x=cat_data['duration'],\n",
    "                    y=cat_data['price'],\n",
    "                    mode='markers',\n",
    "                    name=f'{category} Flights',\n",
    "                    marker=dict(\n",
    "                        size=8,\n",
    "                        color=color_map.get(category, COLORS['dark']),\n",
    "                        opacity=0.6,\n",
    "                        line=dict(width=1, color='white')\n",
    "                    ),\n",
    "                    hovertemplate='<b>%{text}</b><br>Duration: %{x:.1f}h<br>Price: ₹%{y:,.0f}<extra></extra>',\n",
    "                    text=cat_data['airline']\n",
    "                ),\n",
    "                row=2, col=1\n",
    "            )\n",
    "    \n",
    "    # 5. Efficiency by Airline (Box Plot)\n",
    "    top_airlines = df['airline'].value_counts().head(6).index\n",
    "    for airline in top_airlines:\n",
    "        airline_data = df[df['airline'] == airline]\n",
    "        color = AIRLINE_PALETTE.get(airline, AIRLINE_PALETTE['Others'])\n",
    "        \n",
    "        fig.add_trace(\n",
    "            go.Box(\n",
    "                y=airline_data['efficiency_score'],\n",
    "                name=airline,\n",
    "                marker_color=color,\n",
    "                boxpoints='outliers'\n",
    "            ),\n",
    "            row=2, col=2\n",
    "        )\n",
    "    \n",
    "    # 6. Direct vs Connecting Flights\n",
    "    direct_analysis = df.groupby(['airline', 'is_direct']).size().unstack(fill_value=0)\n",
    "    direct_analysis.columns = ['Connecting', 'Direct']\n",
    "    top_airlines_direct = df['airline'].value_counts().head(5).index\n",
    "    direct_analysis_filtered = direct_analysis.loc[top_airlines_direct]\n",
    "    \n",
    "    fig.add_trace(\n",
    "        go.Bar(\n",
    "            x=direct_analysis_filtered.index,\n",
    "            y=direct_analysis_filtered['Direct'],\n",
    "            name='Direct Flights',\n",
    "            marker_color=COLORS['success']\n",
    "        ),\n",
    "        row=2, col=3\n",
    "    )\n",
    "    \n",
    "    fig.add_trace(\n",
    "        go.Bar(\n",
    "            x=direct_analysis_filtered.index,\n",
    "            y=direct_analysis_filtered['Connecting'],\n",
    "            name='Connecting Flights',\n",
    "            marker_color=COLORS['warning']\n",
    "        ),\n",
    "        row=2, col=3\n",
    "    )\n",
    "    \n",
    "    # Update layout\n",
    "    fig.update_layout(\n",
    "        height=800,\n",
    "        title={\n",
    "            'text': '📊 Airlines Market Overview Dashboard',\n",
    "            'x': 0.5,\n",
    "            'font': {'size': 24, 'color': COLORS['dark']}\n",
    "        },\n",
    "        template='plotly_white',\n",
    "        showlegend=True,\n",
    "        legend=dict(\n",
    "            orientation=\"h\",\n",
    "            yanchor=\"bottom\",\n",
    "            y=1.02,\n",
    "            xanchor=\"right\",\n",
    "            x=1\n",
    "        )\n",
    "    )\n",
    "    \n",
    "    # Update axes labels\n",
    "    fig.update_xaxes(title_text=\"Price (₹)\", row=1, col=1)\n",
    "    fig.update_yaxes(title_text=\"Frequency\", row=1, col=1)\n",
    "    \n",
    "    fig.update_xaxes(title_text=\"Time Slots\", row=1, col=3, tickangle=45)\n",
    "    fig.update_yaxes(title_text=\"Number of Flights\", row=1, col=3)\n",
    "    \n",
    "    fig.update_xaxes(title_text=\"Duration (hours)\", row=2, col=1)\n",
    "    fig.update_yaxes(title_text=\"Price (₹)\", row=2, col=1)\n",
    "    \n",
    "    fig.update_yaxes(title_text=\"Efficiency Score (₹/hour)\", row=2, col=2)\n",
    "    \n",
    "    fig.update_xaxes(title_text=\"Airlines\", row=2, col=3, tickangle=45)\n",
    "    fig.update_yaxes(title_text=\"Number of Flights\", row=2, col=3)\n",
    "    \n",
    "    return fig\n",
    "\n",
    "# Create overview dashboard\n",
    "overview_fig = create_overview_dashboard()\n",
    "overview_fig.show(config=PLOTLY_CONFIG)\n",
    "\n",
    "print(\"\\n✅ Overview dashboard created successfully!\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create summary statistics for overview\n",
    "def create_overview_summary():\n",
    "    \"\"\"Generate comprehensive summary statistics\"\"\"\n",
    "    \n",
    "    summary = {\n",
    "        'dataset_info': {\n",
    "            'total_records': len(df),\n",
    "            'airlines_count': df['airline'].nunique(),\n",
    "            'time_slots': df['departure_time'].nunique(),\n",
    "            'data_quality': {\n",
    "                'missing_values': df.isnull().sum().sum(),\n",
    "                'duplicate_records': df.duplicated().sum(),\n",
    "                'completeness': f\"{((len(df) - df.isnull().sum().sum()) / (len(df) * len(df.columns)) * 100):.1f}%\"\n",
    "            }\n",
    "        },\n",
    "        'price_analysis': {\n",
    "            'mean': float(df['price'].mean()),\n",
    "            'median': float(df['price'].median()),\n",
    "            'std': float(df['price'].std()),\n",
    "            'min': float(df['price'].min()),\n",
    "            'max': float(df['price'].max()),\n",
    "            'q25': float(df['price'].quantile(0.25)),\n",
    "            'q75': float(df['price'].quantile(0.75)),\n",
    "            'iqr': float(df['price'].quantile(0.75) - df['price'].quantile(0.25))\n",
    "        },\n",
    "        'duration_analysis': {\n",
    "            'mean': float(df['duration'].mean()),\n",
    "            'median': float(df['duration'].median()),\n",
    "            'std': float(df['duration'].std()),\n",
    "            'min': float(df['duration'].min()),\n",
    "            'max': float(df['duration'].max())\n",
    "        },\n",
    "        'market_insights': {\n",
    "            'top_airline': df['airline'].value_counts().index[0],\n",
    "            'top_airline_share': float(df['airline'].value_counts().iloc[0] / len(df) * 100),\n",
    "            'peak_time_slot': df['departure_time'].value_counts().index[0],\n",
    "            'direct_flights_percentage': float(df['is_direct'].mean() * 100),\n",
    "            'premium_time_percentage': float(df['is_premium_time'].mean() * 100)\n",
    "        }\n",
    "    }\n",
    "    \n",
    "    return summary\n",
    "\n",
    "# Generate and save summary\n",
    "overview_summary = create_overview_summary()\n",
    "\n",
    "with open('../output/overview_summary.json', 'w') as f:\n",
    "    json.dump(overview_summary, f, indent=2)\n",
    "\n",
    "print(\"📊 OVERVIEW SUMMARY STATISTICS\")\n",
    "print(\"=\" * 50)\n",
    "print(f\"📈 Total Flights: {overview_summary['dataset_info']['total_records']:,}\")\n",
    "print(f\"🏢 Airlines: {overview_summary['dataset_info']['airlines_count']}\")\n",
    "print(f\"💰 Average Price: ₹{overview_summary['price_analysis']['mean']:,.0f}\")\n",
    "print(f\"⏱️ Average Duration: {overview_summary['duration_analysis']['mean']:.1f}h\")\n",
    "print(f\"🎯 Direct Flights: {overview_summary['market_insights']['direct_flights_percentage']:.1f}%\")\n",
    "print(f\"👑 Market Leader: {overview_summary['market_insights']['top_airline']} ({overview_summary['market_insights']['top_airline_share']:.1f}%)\")\n",
    "\n",
    "print(\"\\n✅ Overview summary statistics generated and saved\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Export data for Streamlit overview page\n",
    "overview_data = {\n",
    "    'price_distribution': df['price'].tolist(),\n",
    "    'market_share': df['airline'].value_counts().to_dict(),\n",
    "    'time_distribution': df['departure_time'].value_counts().to_dict(),\n",
    "    'price_categories': df['price_category'].value_counts().to_dict(),\n",
    "    'airline_colors': AIRLINE_PALETTE\n",
    "}\n",
    "\n",
    "with open('../output/overview_data.json', 'w') as f:\n",
    "    json.dump(overview_data, f, indent=2)\n",
    "\n",
    "# Create correlation matrix for overview\n",
    "numeric_cols = ['price', 'duration', 'efficiency_score', 'is_direct', 'is_premium_time']\n",
    "correlation_matrix = df[numeric_cols].corr()\n",
    "\n",
    "correlation_fig = go.Figure(data=go.Heatmap(\n",
    "    z=correlation_matrix.values,\n",
    "    x=correlation_matrix.columns,\n",
    "    y=correlation_matrix.columns,\n",
    "    colorscale='RdBu_r',\n",
    "    zmid=0,\n",
    "    text=correlation_matrix.round(3).values,\n",
    "    texttemplate='%{text}',\n",
    "    textfont={\"size\": 12},\n",
    "    hovertemplate='<b>%{y}</b> vs <b>%{x}</b><br>Correlation: %{z:.3f}<extra></extra>'\n",
    "))\n",
    "\n",
    "correlation_fig.update_layout(\n",
    "    title='📊 Feature Correlation Matrix',\n",
    "    xaxis_title='Features',\n",
    "    yaxis_title='Features',\n",
    "    width=600,\n",
    "    height=500\n",
    ")\n",
    "\n",
    "correlation_fig.show(config=PLOTLY_CONFIG)\n",
    "\n",
    "print(\"\\n🔗 CORRELATION INSIGHTS:\")\n",
    "print(f\"Price-Duration correlation: {correlation_matrix.loc['price', 'duration']:.3f}\")\n",
    "print(f\"Price-Efficiency correlation: {correlation_matrix.loc['price', 'efficiency_score']:.3f}\")\n",
    "print(f\"Direct flights-Price correlation: {correlation_matrix.loc['is_direct', 'price']:.3f}\")\n",
    "\n",
    "print(\"\\n✅ Overview visualizations notebook completed!\")\n",
    "print(\"📁 Generated files:\")\n",
    "print(\"   • ../output/overview_kpi_metrics.json\")\n",
    "print(\"   • ../output/overview_summary.json\")\n",
    "print(\"   • ../output/overview_data.json\")"
   ]
  }
 ],
 \"metadata\": {\n",
  \"kernelspec\": {\n",
   \"display_name\": \"Python 3\",\n",
   \"language\": \"python\",\n",
   \"name\": \"python3\"\n",
  },\n",
  \"language_info\": {\n",
   \"codemirror_mode\": {\n",
    \"name\": \"ipython\",\n",
    \"version\": 3\n",
   },\n",
   \"file_extension\": \".py\",\n",
   \"mimetype\": \"text/x-python\",\n",
   \"name\": \"python\",\n",
   \"nbconvert_exporter\": \"python\",\n",
   \"pygments_lexer\": \"ipython3\",\n",
   \"version\": \"3.8.0\"\n",
  }\n",
 },\n",
 \"nbformat\": 4,\n",
 \"nbformat_minor\": 4\n",
"}

In [None]:
# Advanced Visualization Development: Airlines Dashboard
# Complete implementation of sophisticated data visualizations for portfolio

# ============================================================================
# SETUP AND ADVANCED CONFIGURATIONS
# ============================================================================

# Main imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.figure_factory as ff

# Statistical analysis and machine learning
from scipy import stats
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.metrics import silhouette_score

# Utilities
import json
import warnings
from datetime import datetime, timedelta
import itertools
import os

warnings.filterwarnings('ignore')

# Advanced visualization configurations
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)

# Professional color palette
COLORS = {
    'primary': '#1E3A8A',      # Deep Blue
    'secondary': '#059669',    # Emerald Green
    'accent': '#DC2626',       # Red
    'warning': '#F59E0B',      # Amber
    'info': '#0EA5E9',         # Sky Blue
    'success': '#10B981',      # Green
    'danger': '#EF4444',       # Red
    'dark': '#1F2937',         # Gray 800
    'light': '#F9FAFB',        # Gray 50
    'gradient_start': '#667EEA',
    'gradient_end': '#764BA2'
}

# Airline-specific color palette
AIRLINE_PALETTE = {
    'SpiceJet': '#FF6B35',      # Vibrant Orange
    'Vistara': '#7209B7',       # Purple
    'AirAsia': '#FF0066',       # Hot Pink  
    'GO_FIRST': '#06D6A0',      # Mint Green
    'Indigo': '#003566',        # Navy Blue
    'Air_India': '#B5179E',     # Magenta
    'Others': '#6C757D'         # Gray
}

# Plotly configuration for professional quality
PLOTLY_CONFIG = {
    'displayModeBar': True,
    'displaylogo': False,
    'modeBarButtonsToRemove': ['pan2d', 'lasso2d'],
    'toImageButtonOptions': {
        'format': 'png',
        'filename': 'custom_image',
        'height': 800,
        'width': 1200,
        'scale': 2
    }
}

print("Advanced configurations loaded successfully!")
print(f"Color palette: {len(COLORS)} colors")
print(f"Airline palette: {len(AIRLINE_PALETTE)} companies")

# ============================================================================
# DATA LOADING AND ADVANCED PREPARATION
# ============================================================================

# Robust data loading with error handling
try:
    df = pd.read_csv('../data/processed/flights_with_features.csv')
    print(f"Main dataset loaded: {df.shape}")
except FileNotFoundError:
    print("Warning: Processed file not found, using raw data...")
    df = pd.read_csv('../data/raw/airlines_flights_data.csv')

# Load statistical results if available
try:
    with open('../data/processed/statistical_analysis_results.json', 'r') as f:
        stats_results = json.load(f)
    print(f"Statistical results loaded: {len(stats_results)} analyses")
except FileNotFoundError:
    stats_results = {}
    print("Warning: Statistical results not found - will be calculated")

# Advanced feature engineering
def create_advanced_features(df):
    """Create advanced features for visualization"""
    df_enhanced = df.copy()
    
    # Price efficiency per hour
    if 'duration' in df.columns and 'price' in df.columns:
        df_enhanced['efficiency_score'] = df['price'] / df['duration']
    
    # Price categorization
    if 'price' in df.columns:
        df_enhanced['price_category'] = pd.qcut(df['price'], 
                                               q=3, 
                                               labels=['Budget', 'Mid-Range', 'Premium'])
    
    # Duration categorization
    if 'duration' in df.columns:
        df_enhanced['duration_category'] = pd.qcut(df['duration'],
                                                  q=3,
                                                  labels=['Short', 'Medium', 'Long'])
    
    # Direct flight indicator
    if 'stops' in df.columns:
        df_enhanced['is_direct'] = (df['stops'] == 'zero').astype(int)
    
    # Premium time indicator
    if 'departure_time' in df.columns:
        premium_times = ['Morning', 'Afternoon', 'Evening']
        df_enhanced['is_premium_time'] = df['departure_time'].isin(premium_times).astype(int)
    
    # Attractiveness score
    if all(col in df_enhanced.columns for col in ['efficiency_score', 'is_direct', 'is_premium_time']):
        # Normalize efficiency score (lower is better)
        eff_normalized = 1 - ((df_enhanced['efficiency_score'] - df_enhanced['efficiency_score'].min()) / 
                             (df_enhanced['efficiency_score'].max() - df_enhanced['efficiency_score'].min()))
        
        df_enhanced['attractiveness_score'] = (
            eff_normalized * 0.4 +  # 40% price efficiency
            df_enhanced['is_direct'] * 0.3 +  # 30% direct flight
            (1 - df_enhanced['is_premium_time']) * 0.2 +  # 20% non-premium time
            np.random.random(len(df_enhanced)) * 0.1  # 10% random factor
        )
    
    return df_enhanced

# Apply feature engineering
df = create_advanced_features(df)

print("\nFEATURES CREATED:")
new_features = ['efficiency_score', 'price_category', 'duration_category', 
                'is_direct', 'is_premium_time', 'attractiveness_score']
for feature in new_features:
    if feature in df.columns:
        print(f"   ✓ {feature}: {df[feature].dtype}")
    else:
        print(f"   ✗ {feature}: Not created")

print(f"\nFINAL DATASET SUMMARY:")
print(f"   • Rows: {len(df):,}")
print(f"   • Columns: {len(df.columns)}")
print(f"   • Airlines: {df['airline'].nunique()}")
print(f"   • Average Price: ₹{df['price'].mean():,.0f}")
print(f"   • Average Duration: {df['duration'].mean():.1f}h")

# ============================================================================
# 1. EXECUTIVE KPI DASHBOARD
# ============================================================================

def create_executive_kpi_metrics():
    """Create KPI metrics for executive dashboard"""
    
    metrics = {
        'total_flights': {
            'value': len(df),
            'label': 'Total Flights',
            'icon': '✈️',
            'change': '+5.2%',
            'color': COLORS['primary']
        },
        'avg_price': {
            'value': f"₹{df['price'].mean():,.0f}",
            'label': 'Average Price', 
            'icon': '💰',
            'change': f"{((df['price'].mean() - df['price'].median()) / df['price'].median() * 100):+.1f}%",
            'color': COLORS['success']
        },
        'market_leaders': {
            'value': df['airline'].nunique(),
            'label': 'Active Airlines',
            'icon': '🏢', 
            'change': 'Stable',
            'color': COLORS['info']
        },
        'efficiency_leader': {
            'value': df.groupby('airline')['efficiency_score'].mean().idxmin(),
            'label': 'Most Efficient',
            'icon': '⚡',
            'change': f"₹{df.groupby('airline')['efficiency_score'].mean().min():.0f}/hr",
            'color': COLORS['warning']
        },
        'direct_flights': {
            'value': f"{(df['is_direct'].mean() * 100):.1f}%",
            'label': 'Direct Flights',
            'icon': '🎯',
            'change': '+2.1%',
            'color': COLORS['secondary']
        },
        'peak_demand': {
            'value': df['departure_time'].mode()[0],
            'label': 'Peak Time Slot',
            'icon': '⏰',
            'change': f"{df[df['departure_time'] == df['departure_time'].mode()[0]].shape[0]} flights",
            'color': COLORS['accent']
        }
    }
    
    return metrics

# Create and save KPI metrics
kpi_metrics = create_executive_kpi_metrics()

print("EXECUTIVE KPI DASHBOARD")
print("=" * 50)
for key, metric in kpi_metrics.items():
    print(f"{metric['icon']} {metric['label']}: {metric['value']} ({metric['change']})")

# Save metrics for Streamlit
os.makedirs('../output', exist_ok=True)
with open('../output/streamlit_kpi_metrics.json', 'w') as f:
    json.dump(kpi_metrics, f, indent=2)
    
print("\n✅ KPI metrics saved for Streamlit integration")

# ============================================================================
# 2. ADVANCED PRICE ANALYSIS
# ============================================================================

def create_advanced_price_analysis():
    """Create advanced price distribution analysis"""
    
    # Create professional subplot layout
    fig = make_subplots(
        rows=2, cols=2,
        subplot_titles=[
            '💰 Price Distribution with Statistical Overlays',
            '📊 Price Categories Performance', 
            '🎯 Outlier Detection & Analysis',
            '📈 Price Trends by Market Segment'
        ],
        specs=[
            [{"secondary_y": True}, {"type": "bar"}],
            [{"type": "box"}, {"type": "scatter"}]
        ],
        vertical_spacing=0.12,
        horizontal_spacing=0.1
    )
    
    # 1. Main distribution with density
    fig.add_trace(
        go.Histogram(
            x=df['price'],
            nbinsx=35,
            name='Price Frequency',
            opacity=0.7,
            marker=dict(
                color=COLORS['primary'],
                line=dict(color='white', width=1)
            )
        ),
        row=1, col=1
    )
    
    # Add density curve
    from scipy.stats import gaussian_kde
    density = gaussian_kde(df['price'])
    x_range = np.linspace(df['price'].min(), df['price'].max(), 100)
    density_values = density(x_range) * len(df) * (df['price'].max() - df['price'].min()) / 35
    
    fig.add_trace(
        go.Scatter(
            x=x_range,
            y=density_values,
            mode='lines',
            name='Density Curve',
            line=dict(color=COLORS['danger'], width=3),
            yaxis='y2'
        ),
        row=1, col=1, secondary_y=True
    )
    
    # Statistical lines
    stats_lines = {
        'Mean': (df['price'].mean(), COLORS['warning']),
        'Median': (df['price'].median(), COLORS['success']),
        'Q75': (df['price'].quantile(0.75), COLORS['info']),
        'Q25': (df['price'].quantile(0.25), COLORS['secondary'])
    }
    
    for stat_name, (value, color) in stats_lines.items():
        fig.add_vline(
            x=value,
            line_dash="dash",
            line_color=color,
            annotation_text=f"{stat_name}: ₹{value:,.0f}",
            annotation_position="top",
            row=1, col=1
        )
    
    # 2. Performance by category
    cat_performance = df.groupby('price_category').agg({
        'price': ['count', 'mean'],
        'attractiveness_score': 'mean',
        'is_direct': 'mean'
    }).round(2)
    
    cat_performance.columns = ['flight_count', 'avg_price', 'attractiveness', 'direct_rate']
    cat_performance = cat_performance.reset_index()
    
    colors_cat = [COLORS['success'], COLORS['warning'], COLORS['danger']]
    
    fig.add_trace(
        go.Bar(
            x=cat_performance['price_category'],
            y=cat_performance['flight_count'],
            name='Flight Volume',
            marker_color=colors_cat,
            text=[f"{x}\nflights" for x in cat_performance['flight_count']],
            textposition='auto'
        ),
        row=1, col=2
    )
    
    # 3. Outlier analysis by airline
    for i, airline in enumerate(df['airline'].unique()[:6]):  # Top 6 airlines
        airline_data = df[df['airline'] == airline]['price']
        color = AIRLINE_PALETTE.get(airline, AIRLINE_PALETTE['Others'])
        
        fig.add_trace(
            go.Box(
                y=airline_data,
                name=airline,
                marker_color=color,
                boxpoints='outliers',
                jitter=0.3,
                pointpos=-1.8
            ),
            row=2, col=1
        )
    
    # 4. Price vs Attractiveness by segment
    for category in df['price_category'].unique():
        cat_data = df[df['price_category'] == category]
        color = colors_cat[list(df['price_category'].unique()).index(category)]
        
        fig.add_trace(
            go.Scatter(
                x=cat_data['attractiveness_score'],
                y=cat_data['price'],
                mode='markers',
                name=f'{category} Segment',
                marker=dict(
                    size=8,
                    color=color,
                    opacity=0.6,
                    line=dict(width=1, color='white')
                ),
                hovertemplate='<b>%{text}</b><br>' +
                             'Price: ₹%{y:,.0f}<br>' +
                             'Attractiveness: %{x:.3f}<br>' +
                             '<extra></extra>',
                text=cat_data['airline']
            ),
            row=2, col=2
        )
    
    # Refined layout
    fig.update_layout(
        height=800,
        title={
            'text': '💎 Advanced Price Analysis Dashboard',
            'x': 0.5,
            'font': {'size': 24, 'color': COLORS['dark']}
        },
        template='plotly_white',
        font=dict(size=12),
        showlegend=True
    )
    
    # Update axes
    fig.update_xaxes(title_text="Price (₹)", row=1, col=1)
    fig.update_yaxes(title_text="Frequency", row=1, col=1)
    fig.update_yaxes(title_text="Density", secondary_y=True, row=1, col=1)
    
    fig.update_xaxes(title_text="Price Category", row=1, col=2)
    fig.update_yaxes(title_text="Number of Flights", row=1, col=2)
    
    fig.update_yaxes(title_text="Price (₹)", row=2, col=1)
    
    fig.update_xaxes(title_text="Attractiveness Score", row=2, col=2)
    fig.update_yaxes(title_text="Price (₹)", row=2, col=2)
    
    return fig, cat_performance

# Create advanced price analysis
advanced_price_fig, price_performance = create_advanced_price_analysis()
# advanced_price_fig.show(config=PLOTLY_CONFIG)

print("\n📊 PERFORMANCE BY CATEGORY:")
print(price_performance.to_string(index=False))
print("\n✅ Advanced price analysis created")

# ============================================================================
# 3. COMPREHENSIVE AIRLINE PERFORMANCE DASHBOARD
# ============================================================================

def create_comprehensive_airline_dashboard():
    """Create comprehensive airline performance analysis"""
    
    # Prepare detailed metrics by airline
    airline_metrics = df.groupby('airline').agg({
        'price': ['mean', 'std', 'min', 'max', 'count'],
        'duration': ['mean', 'std'],
        'efficiency_score': ['mean', 'std'],
        'attractiveness_score': 'mean',
        'is_direct': ['sum', 'mean'],
        'is_premium_time': 'mean'
    }).round(2)
    
    airline_metrics.columns = [
        'price_mean', 'price_std', 'price_min', 'price_max', 'flight_count',
        'duration_mean', 'duration_std', 'efficiency_mean', 'efficiency_std',
        'attractiveness_mean', 'direct_count', 'direct_rate', 'premium_rate'
    ]
    airline_metrics = airline_metrics.reset_index()
    
    # Market share
    airline_metrics['market_share'] = (
        airline_metrics['flight_count'] / airline_metrics['flight_count'].sum() * 100
    ).round(1)
    
    # Performance rankings
    airline_metrics['efficiency_rank'] = airline_metrics['efficiency_mean'].rank()
    airline_metrics['attractiveness_rank'] = airline_metrics['attractiveness_mean'].rank(ascending=False)
    
    # Create comprehensive dashboard
    fig = make_subplots(
        rows=3, cols=2,
        subplot_titles=[
            '💰 Price Performance Matrix',
            '🎯 Market Share & Volume',
            '⚡ Operational Efficiency',
            '📊 Service Quality Index',
            '🔥 Competitive Positioning',
            '📈 Performance Radar'
        ],
        specs=[
            [{"type": "scatter"}, {"type": "pie"}],
            [{"type": "bar"}, {"type": "bar"}],
            [{"type": "scatter"}, {"type": "scatterpolar"}]
        ],
        vertical_spacing=0.08,
        horizontal_spacing=0.1
    )
    
    # 1. Price Performance Matrix
    for i, airline in enumerate(airline_metrics['airline']):
        color = AIRLINE_PALETTE.get(airline, AIRLINE_PALETTE['Others'])
        
        fig.add_trace(
            go.Scatter(
                x=[airline_metrics[airline_metrics['airline'] == airline]['price_mean'].iloc[0]],
                y=[airline_metrics[airline_metrics['airline'] == airline]['price_std'].iloc[0]],
                mode='markers+text',
                name=airline,
                text=[airline],
                textposition='top center',
                marker=dict(
                    size=airline_metrics[airline_metrics['airline'] == airline]['market_share'].iloc[0] * 2,
                    color=color,
                    opacity=0.7,
                    line=dict(width=2, color='white')
                ),
                hovertemplate=f'<b>{airline}</b><br>' +
                             'Avg Price: ₹%{x:,.0f}<br>' +
                             'Price Volatility: ₹%{y:.0f}<br>' +
                             f'Market Share: {airline_metrics[airline_metrics["airline"] == airline]["market_share"].iloc[0]:.1f}%' +
                             '<extra></extra>'
            ),
            row=1, col=1
        )
    
    # 2. Market Share Pie Chart
    fig.add_trace(
        go.Pie(
            labels=airline_metrics['airline'],
            values=airline_metrics['market_share'],
            name='Market Share',
            marker_colors=[AIRLINE_PALETTE.get(airline, AIRLINE_PALETTE['Others']) 
                          for airline in airline_metrics['airline']],
            textinfo='label+percent',
            hovertemplate='<b>%{label}</b><br>Share: %{percent}<br>Flights: %{value:.1f}%<extra></extra>'
        ),
        row=1, col=2
    )
    
    # 3. Operational Efficiency
    fig.add_trace(
        go.Bar(
            x=airline_metrics['airline'],
            y=airline_metrics['efficiency_mean'],
            name='Efficiency Score',
            marker=dict(
                color=[AIRLINE_PALETTE.get(airline, AIRLINE_PALETTE['Others']) 
                      for airline in airline_metrics['airline']],
                opacity=0.8
            ),
            text=[f'₹{x:.0f}/hr' for x in airline_metrics['efficiency_mean']],
            textposition='outside'
        ),
        row=2, col=1
    )
    
    # 4. Service Quality (Direct Flights Rate)
    fig.add_trace(
        go.Bar(
            x=airline_metrics['airline'],
            y=airline_metrics['direct_rate'] * 100,
            name='Direct Flights %',
            marker=dict(
                color=airline_metrics['direct_rate'] * 100,
                colorscale='RdYlGn',
                showscale=False
            ),
            text=[f'{x*100:.1f}%' for x in airline_metrics['direct_rate']],
            textposition='outside'
        ),
        row=2, col=2
    )
    
    # 5. Competitive Positioning
    fig.add_trace(
        go.Scatter(
            x=airline_metrics['attractiveness_mean'],
            y=airline_metrics['market_share'],
            mode='markers+text',
            name='Positioning',
            text=airline_metrics['airline'],
            textposition='top center',
            marker=dict(
                size=airline_metrics['flight_count'] / 10,
                color=[AIRLINE_PALETTE.get(airline, AIRLINE_PALETTE['Others']) 
                      for airline in airline_metrics['airline']],
                opacity=0.7,
                line=dict(width=2, color='white')
            ),
            hovertemplate='<b>%{text}</b><br>' +
                         'Attractiveness: %{x:.3f}<br>' +
                         'Market Share: %{y:.1f}%<br>' +
                         '<extra></extra>'
        ),
        row=3, col=1
    )
    
    # 6. Performance Radar for top 3 airlines
    top_3_airlines = airline_metrics.nlargest(3, 'flight_count')['airline'].tolist()
    
    categories = ['Price Score', 'Efficiency', 'Direct Flights', 'Market Share', 'Attractiveness']
    
    for airline in top_3_airlines:
        airline_data = airline_metrics[airline_metrics['airline'] == airline].iloc[0]
        
        # Normalize scores for radar chart (0-100 scale)
        values = [
            100 - ((airline_data['price_mean'] - airline_metrics['price_mean'].min()) / 
                   (airline_metrics['price_mean'].max() - airline_metrics['price_mean'].min()) * 100),  # Lower price is better
            100 - ((airline_data['efficiency_mean'] - airline_metrics['efficiency_mean'].min()) / 
                   (airline_metrics['efficiency_mean'].max() - airline_metrics['efficiency_mean'].min()) * 100),  # Lower efficiency score is better
            airline_data['direct_rate'] * 100,
            airline_data['market_share'] * 2,  # Scale for visibility
            airline_data['attractiveness_mean'] * 100
        ]
        
        fig.add_trace(
            go.Scatterpolar(
                r=values,
                theta=categories,
                fill='toself',
                name=airline,
                line_color=AIRLINE_PALETTE.get(airline, AIRLINE_PALETTE['Others']),
                opacity=0.6
            ),
            row=3, col=2
        )
    
    # Layout configuration
    fig.update_layout(
        height=1200,
        title={
            'text': '🏢 Comprehensive Airline Performance Dashboard',
            'x': 0.5,
            'font': {'size': 26, 'color': COLORS['dark']}
        },
        template='plotly_white',
        showlegend=True
    )
    
    # Update axes
    fig.update_xaxes(title_text="Average Price (₹)", row=1, col=1)
    fig.update_yaxes(title_text="Price Volatility (₹)", row=1, col=1)
    
    fig.update_xaxes(title_text="Airlines", row=2, col=1, tickangle=45)
    fig.update_yaxes(title_text="Efficiency Score (₹/hour)", row=2, col=1)
    
    fig.update_xaxes(title_text="Airlines", row=2, col=2, tickangle=45)
    fig.update_yaxes(title_text="Direct Flights (%)", row=2, col=2)
    
    fig.update_xaxes(title_text="Attractiveness Score", row=3, col=1)
    fig.update_yaxes(title_text="Market Share (%)", row=3, col=1)
    
    fig.update_polars(radialaxis=dict(visible=True, range=[0, 100]), row=3, col=2)
    
    return fig, airline_metrics

# Create comprehensive airline dashboard
airline_dashboard_fig, airline_performance = create_comprehensive_airline_dashboard()
# airline_dashboard_fig.show(config=PLOTLY_CONFIG)

print("\n🏢 AIRLINE PERFORMANCE SUMMARY:")
print("=" * 60)
print(airline_performance[['airline', 'flight_count', 'market_share', 'price_mean', 
                          'efficiency_mean', 'direct_rate']].to_string(index=False))

# Save airline performance data
airline_performance.to_csv('../output/airline_performance_metrics.csv', index=False)
print("\n✅ Airline performance data saved")

# ============================================================================
# 4. TEMPORAL INTELLIGENCE DASHBOARD
# ============================================================================

def create_temporal_intelligence_dashboard():
    """Create intelligent temporal analysis dashboard"""
    
    # Detailed temporal analysis
    temporal_analysis = df.groupby(['departure_time', 'airline']).agg({
        'price': ['mean', 'count', 'std'],
        'duration': 'mean',
        'efficiency_score': 'mean',
        'is_direct': 'sum',
        'attractiveness_score': 'mean'
    }).round(2)
    
    temporal_analysis.columns = [
        'avg_price', 'flight_count', 'price_volatility',
        'avg_duration', 'efficiency', 'direct_flights', 'attractiveness'
    ]
    temporal_analysis = temporal_analysis.reset_index()
    
    # Create price heatmap data
    price_heatmap_data = df.pivot_table(
        values='price',
        index='airline',
        columns='departure_time',
        aggfunc='mean',
        fill_value=0
    )
    
    # Logical time ordering
    time_order = ['Early_Morning', 'Morning', 'Afternoon', 'Evening', 'Night']
    available_times = [t for t in time_order if t in price_heatmap_data.columns]
    if available_times:
        price_heatmap_data = price_heatmap_data[available_times]
    
    # Create advanced temporal dashboard
    fig = make_subplots(
        rows=3, cols=2,
        subplot_titles=[
            '🔥 Price Heatmap: Airlines vs Time Slots',
            '📈 Temporal Demand Patterns',
            '⚡ Efficiency Trends Throughout Day',
            '🎯 Direct Flights Availability',
            '📊 Peak Hours Analysis',
            '🌟 Time-based Recommendations Score'
        ],
        specs=[
            [{"type": "heatmap"}, {"type": "scatter"}],
            [{"type": "bar"}, {"type": "bar"}],
            [{"type": "scatter"}, {"type": "bar"}]
        ],
        vertical_spacing=0.08,
        horizontal_spacing=0.1
    )
    
    # 1. Price heatmap
    fig.add_trace(
        go.Heatmap(
            z=price_heatmap_data.values,
            x=price_heatmap_data.columns,
            y=price_heatmap_data.index,
            colorscale='RdYlBu_r',
            text=price_heatmap_data.round(0).values.astype(int),
            texttemplate='₹%{text}',
            textfont={"size": 10},
            hovertemplate='<b>%{y}</b><br>Time: %{x}<br>Avg Price: ₹%{z:,.0f}<extra></extra>',
            colorbar=dict(title="Average Price (₹)", x=0.48)
        ),
        row=1, col=1
    )
    
    # 2. Temporal demand patterns
    time_demand = df.groupby('departure_time').agg({
        'price': ['count', 'mean']
    }).round(2)
    time_demand.columns = ['flight_count', 'avg_price']
    time_demand = time_demand.reset_index()
    
    fig.add_trace(
        go.Scatter(
            x=time_demand['flight_count'],
            y=time_demand['avg_price'],
            mode='markers+lines+text',
            text=time_demand['departure_time'],
            textposition='top center',
            marker=dict(
                size=time_demand['flight_count'] / 10,
                color=time_demand['avg_price'],
                colorscale='Viridis',
                showscale=False,
                line=dict(width=2, color='white')
            ),
            line=dict(color=COLORS['primary'], width=2),
            name='Demand Pattern'
        ),
        row=1, col=2
    )
    
    # 3. Efficiency trends
    time_efficiency = df.groupby('departure_time')['efficiency_score'].mean().reset_index()
    
    fig.add_trace(
        go.Bar(
            x=time_efficiency['departure_time'],
            y=time_efficiency['efficiency_score'],
            name='Efficiency Score',
            marker=dict(
                color=time_efficiency['efficiency_score'],
                colorscale='RdYlGn_r',
                showscale=False
            ),
            text=[f'₹{x:.0f}/hr' for x in time_efficiency['efficiency_score']],
            textposition='outside'
        ),
        row=2, col=1
    )
    
    # 4. Direct flights availability
    direct_availability = df.groupby('departure_time').agg({
        'is_direct': ['sum', 'count']
    })
    direct_availability.columns = ['direct_count', 'total_count']
    direct_availability['direct_rate'] = (
        direct_availability['direct_count'] / direct_availability['total_count'] * 100
    ).round(1)
    direct_availability = direct_availability.reset_index()
    
    fig.add_trace(
        go.Bar(
            x=direct_availability['departure_time'],
            y=direct_availability['direct_rate'],
            name='Direct Flights %',
            marker_color=COLORS['success'],
            text=[f'{x:.1f}%' for x in direct_availability['direct_rate']],
            textposition='outside'
        ),
        row=2, col=2
    )
    
    # 5. Peak hours analysis
    peak_analysis = df.groupby('departure_time').agg({
        'price': 'count',
        'attractiveness_score': 'mean'
    }).reset_index()
    peak_analysis.columns = ['departure_time', 'volume', 'attractiveness']
    
    # Identify peaks
    peak_threshold = peak_analysis['volume'].quantile(0.75)
    peak_analysis['is_peak'] = peak_analysis['volume'] > peak_threshold
    
    colors_peak = [COLORS['danger'] if is_peak else COLORS['info'] 
                   for is_peak in peak_analysis['is_peak']]
    
    fig.add_trace(
        go.Scatter(
            x=peak_analysis['volume'],
            y=peak_analysis['attractiveness'],
            mode='markers+text',
            text=peak_analysis['departure_time'],
            textposition='top center',
            marker=dict(
                size=15,
                color=colors_peak,
                opacity=0.8,
                line=dict(width=2, color='white')
            ),
            name='Peak Analysis'
        ),
        row=3, col=1
    )
    
    # 6. Time-based recommendation score
    recommendation_score = df.groupby('departure_time')['attractiveness_score'].mean().reset_index()
    recommendation_score = recommendation_score.sort_values('attractiveness_score', ascending=False)
    
    fig.add_trace(
        go.Bar(
            x=recommendation_score['departure_time'],
            y=recommendation_score['attractiveness_score'],
            name='Recommendation Score',
            marker=dict(
                color=recommendation_score['attractiveness_score'],
                colorscale='RdYlGn',
                showscale=True,
                colorbar=dict(title="Score", x=1.02)
            ),
            text=[f'{x:.3f}' for x in recommendation_score['attractiveness_score']],
            textposition='outside'
        ),
        row=3, col=2
    )
    
    # Layout configuration
    fig.update_layout(
        height=1200,
        title={
            'text': 'Temporal Intelligence Dashboard - Advanced Time Analytics',
            'x': 0.5,
            'font': {'size': 26, 'color': COLORS['dark']}
        },
        template='plotly_white',
        showlegend=False
    )
    
    # Update axes
    fig.update_yaxes(title_text="Airlines", row=1, col=1)
    fig.update_xaxes(title_text="Time Slots", row=1, col=1)
    
    fig.update_xaxes(title_text="Flight Volume", row=1, col=2)
    fig.update_yaxes(title_text="Average Price (₹)", row=1, col=2)
    
    fig.update_xaxes(title_text="Time Slots", row=2, col=1, tickangle=45)
    fig.update_yaxes(title_text="Efficiency (₹/hour)", row=2, col=1)
    
    fig.update_xaxes(title_text="Time Slots", row=2, col=2, tickangle=45)
    fig.update_yaxes(title_text="Direct Flights (%)", row=2, col=2)
    
    fig.update_xaxes(title_text="Flight Volume", row=3, col=1)
    fig.update_yaxes(title_text="Attractiveness Score", row=3, col=1)
    
    fig.update_xaxes(title_text="Time Slots", row=3, col=2, tickangle=45)
    fig.update_yaxes(title_text="Recommendation Score", row=3, col=2)
    
    return fig, temporal_analysis, peak_analysis

# Create temporal intelligence dashboard
temporal_fig, temporal_data, peak_hours = create_temporal_intelligence_dashboard()
# temporal_fig.show(config=PLOTLY_CONFIG)

print("\nTEMPORAL INTELLIGENCE ANALYSIS:")
print("=" * 50)
print("\nPEAK HOURS IDENTIFIED:")
peak_times = peak_hours[peak_hours['is_peak']]['departure_time'].tolist()
for time_slot in peak_times:
    volume = peak_hours[peak_hours['departure_time'] == time_slot]['volume'].iloc[0]
    attractiveness = peak_hours[peak_hours['departure_time'] == time_slot]['attractiveness'].iloc[0]
    print(f"  • {time_slot}: {volume} flights (Score: {attractiveness:.3f})")

# Save temporal analysis data
temporal_data.to_csv('../output/temporal_analysis.csv', index=False)
print("\nTemporal intelligence data saved")

# ============================================================================
# 5. ADVANCED CLUSTERING AND MARKET SEGMENTATION
# ============================================================================

def create_advanced_clustering_analysis():
    """Create advanced clustering and market segmentation analysis"""
    
    # Prepare data for clustering
    clustering_features = ['price', 'duration', 'efficiency_score', 
                          'is_direct', 'is_premium_time']
    
    # Check available features
    available_features = [f for f in clustering_features if f in df.columns]
    print(f"Available features for clustering: {available_features}")
    
    if len(available_features) < 3:
        print("Creating basic features for clustering...")
        available_features = ['price', 'duration']
        if 'stops' in df.columns:
            df['is_direct'] = (df['stops'] == 'zero').astype(int)
            available_features.append('is_direct')
    
    X = df[available_features].copy()
    
    # Handle missing values
    X = X.fillna(X.mean())
    
    # Normalize data
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    
    # Determine optimal number of clusters using Elbow Method
    inertias = []
    silhouette_scores = []
    k_range = range(2, 8)
    
    for k in k_range:
        kmeans = KMeans(n_clusters=k, random_state=42, n_init=10)
        kmeans.fit(X_scaled)
        inertias.append(kmeans.inertia_)
        silhouette_scores.append(silhouette_score(X_scaled, kmeans.labels_))
    
    # Choose optimal k (highest silhouette score)
    optimal_k = k_range[np.argmax(silhouette_scores)]
    print(f"Optimal number of clusters: {optimal_k} (Silhouette Score: {max(silhouette_scores):.3f})")
    
    # Apply clustering with optimal k
    kmeans_final = KMeans(n_clusters=optimal_k, random_state=42, n_init=10)
    clusters = kmeans_final.fit_predict(X_scaled)
    
    # Add clusters to dataframe
    df_clustered = df.copy()
    df_clustered['cluster'] = clusters
    
    # Cluster analysis
    cluster_analysis = df_clustered.groupby('cluster').agg({
        'price': ['mean', 'count'],
        'duration': 'mean',
        'airline': lambda x: x.mode()[0] if len(x.mode()) > 0 else 'Mixed'
    }).round(2)
    
    cluster_analysis.columns = ['avg_price', 'size', 'avg_duration', 'dominant_airline']
    cluster_analysis = cluster_analysis.reset_index()
    
    # Name clusters based on characteristics
    cluster_names = {}
    for i, row in cluster_analysis.iterrows():
        cluster_id = row['cluster']
        price = row['avg_price']
        duration = row['avg_duration']
        
        if price > df['price'].quantile(0.75):
            if duration < df['duration'].median():
                cluster_names[cluster_id] = 'Premium Express'
            else:
                cluster_names[cluster_id] = 'Premium Comfort'
        elif price < df['price'].quantile(0.25):
            cluster_names[cluster_id] = 'Budget Conscious'
        else:
            if duration < df['duration'].median():
                cluster_names[cluster_id] = 'Value Seeker'
            else:
                cluster_names[cluster_id] = 'Standard'
    
    df_clustered['cluster_name'] = df_clustered['cluster'].map(cluster_names)
    
    # PCA for 2D visualization
    pca = PCA(n_components=2)
    X_pca = pca.fit_transform(X_scaled)
    
    # Create visualizations
    fig = make_subplots(
        rows=3, cols=2,
        subplot_titles=[
            'Elbow Method & Silhouette Analysis',
            'Cluster Distribution & Characteristics', 
            '3D Cluster Visualization',
            'PCA Cluster Projection',
            'Cluster Performance Matrix',
            'Airlines Distribution by Cluster'
        ],
        specs=[
            [{"secondary_y": True}, {"type": "pie"}],
            [{"type": "scatter3d"}, {"type": "scatter"}],
            [{"type": "heatmap"}, {"type": "bar"}]
        ],
        vertical_spacing=0.08
    )
    
    # 1. Elbow Method
    fig.add_trace(
        go.Scatter(
            x=list(k_range),
            y=inertias,
            mode='lines+markers',
            name='Inertia',
            line=dict(color=COLORS['primary']),
            marker=dict(size=8)
        ),
        row=1, col=1
    )
    
    fig.add_trace(
        go.Scatter(
            x=list(k_range),
            y=silhouette_scores,
            mode='lines+markers',
            name='Silhouette Score',
            line=dict(color=COLORS['success']),
            marker=dict(size=8),
            yaxis='y2'
        ),
        row=1, col=1, secondary_y=True
    )
    
    # 2. Cluster distribution
    cluster_sizes = df_clustered['cluster_name'].value_counts()
    colors_clusters = px.colors.qualitative.Set3[:len(cluster_sizes)]
    
    fig.add_trace(
        go.Pie(
            labels=cluster_sizes.index,
            values=cluster_sizes.values,
            name='Cluster Distribution',
            marker_colors=colors_clusters,
            textinfo='label+percent+value',
            hovertemplate='<b>%{label}</b><br>Flights: %{value}<br>Percentage: %{percent}<extra></extra>'
        ),
        row=1, col=2
    )
    
    # 3. 3D visualization of clusters
    if len(available_features) >= 3:
        for i, cluster_name in enumerate(cluster_names.values()):
            cluster_data = df_clustered[df_clustered['cluster_name'] == cluster_name]
            
            fig.add_trace(
                go.Scatter3d(
                    x=cluster_data[available_features[0]],
                    y=cluster_data[available_features[1]],
                    z=cluster_data[available_features[2]] if len(available_features) > 2 else cluster_data[available_features[0]],
                    mode='markers',
                    name=cluster_name,
                    marker=dict(
                        size=4,
                        color=colors_clusters[i % len(colors_clusters)],
                        opacity=0.7
                    )
                ),
                row=2, col=1
            )
    
    # 4. PCA Projection
    for i, cluster_name in enumerate(cluster_names.values()):
        cluster_mask = df_clustered['cluster_name'] == cluster_name
        
        fig.add_trace(
            go.Scatter(
                x=X_pca[cluster_mask, 0],
                y=X_pca[cluster_mask, 1],
                mode='markers',
                name=f'{cluster_name} (PCA)',
                marker=dict(
                    size=8,
                    color=colors_clusters[i % len(colors_clusters)],
                    opacity=0.7,
                    line=dict(width=1, color='white')
                ),
                showlegend=False
            ),
            row=2, col=2
        )
    
    # 5. Performance matrix of clusters
    performance_matrix = df_clustered.groupby(['cluster_name', 'airline']).size().unstack(fill_value=0)
    
    fig.add_trace(
        go.Heatmap(
            z=performance_matrix.values,
            x=performance_matrix.columns,
            y=performance_matrix.index,
            colorscale='Blues',
            showscale=False,
            text=performance_matrix.values,
            texttemplate='%{text}',
            hovertemplate='<b>%{y}</b><br>Airline: %{x}<br>Flights: %{z}<extra></extra>'
        ),
        row=3, col=1
    )
    
    # 6. Airlines distribution by cluster
    airline_cluster_dist = df_clustered.groupby('cluster_name')['airline'].value_counts().unstack(fill_value=0)
    
    for airline in airline_cluster_dist.columns:
        fig.add_trace(
            go.Bar(
                x=airline_cluster_dist.index,
                y=airline_cluster_dist[airline],
                name=airline,
                marker_color=AIRLINE_PALETTE.get(airline, AIRLINE_PALETTE['Others'])
            ),
            row=3, col=2
        )
    
    # Layout configuration
    fig.update_layout(
        height=1400,
        title={
            'text': 'Advanced Market Segmentation & Clustering Analysis',
            'x': 0.5,
            'font': {'size': 26, 'color': COLORS['dark']}
        },
        template='plotly_white',
        showlegend=True
    )
    
    # Update axes
    fig.update_xaxes(title_text="Number of Clusters (k)", row=1, col=1)
    fig.update_yaxes(title_text="Inertia", row=1, col=1)
    fig.update_yaxes(title_text="Silhouette Score", secondary_y=True, row=1, col=1)
    
    if len(available_features) >= 3:
        fig.update_scenes(
            xaxis_title=available_features[0],
            yaxis_title=available_features[1],
            zaxis_title=available_features[2] if len(available_features) > 2 else available_features[0],
            row=2, col=1
        )
    
    fig.update_xaxes(title_text=f"PC1 ({pca.explained_variance_ratio_[0]:.1%} variance)", row=2, col=2)
    fig.update_yaxes(title_text=f"PC2 ({pca.explained_variance_ratio_[1]:.1%} variance)", row=2, col=2)
    
    fig.update_xaxes(title_text="Airlines", row=3, col=1)
    fig.update_yaxes(title_text="Cluster Segments", row=3, col=1)
    
    fig.update_xaxes(title_text="Cluster Segments", row=3, col=2)
    fig.update_yaxes(title_text="Number of Flights", row=3, col=2)
    
    return fig, df_clustered, cluster_analysis, cluster_names

# Create advanced clustering analysis
clustering_fig, df_with_clusters, cluster_stats, cluster_mapping = create_advanced_clustering_analysis()
# clustering_fig.show(config=PLOTLY_CONFIG)

print("\nCLUSTER ANALYSIS SUMMARY:")
print("=" * 50)
print(cluster_stats.to_string(index=False))

print("\nCLUSTER CHARACTERISTICS:")
for cluster_id, cluster_name in cluster_mapping.items():
    cluster_data = df_with_clusters[df_with_clusters['cluster'] == cluster_id]
    print(f"\n{cluster_name} (Cluster {cluster_id}):")
    print(f"  • Size: {len(cluster_data)} flights ({len(cluster_data)/len(df)*100:.1f}%)")
    print(f"  • Avg Price: ₹{cluster_data['price'].mean():.0f}")
    print(f"  • Avg Duration: {cluster_data['duration'].mean():.1f}h")
    print(f"  • Top Airline: {cluster_data['airline'].mode()[0]}")

print("\nAdvanced clustering analysis completed")

# ============================================================================
# 6. INTELLIGENT RECOMMENDATION SYSTEM
# ============================================================================

def create_intelligent_recommendation_system():
    """Create intelligent recommendation system with ML"""
    
    # Calculate multi-criteria recommendation scores
    df_rec = df_with_clusters.copy() if 'df_with_clusters' in globals() else df.copy()
    
    # 1. Price Score (0-100, lower price = higher score)
    df_rec['price_score'] = ((df_rec['price'].max() - df_rec['price']) / 
                            (df_rec['price'].max() - df_rec['price'].min()) * 100)
    
    # 2. Duration Score (0-100, shorter duration = higher score)
    df_rec['duration_score'] = ((df_rec['duration'].max() - df_rec['duration']) / 
                               (df_rec['duration'].max() - df_rec['duration'].min()) * 100)
    
    # 3. Convenience Score
    df_rec['convenience_score'] = (
        df_rec['is_direct'] * 50 +  # Direct flight worth 50 points
        (1 - df_rec['is_premium_time']) * 30 +  # Non-premium time worth 30 points
        np.random.random(len(df_rec)) * 20  # Random factor for variability
    )
    
    # 4. Airline Reputation Score (based on market share and efficiency)
    airline_reputation = df_rec.groupby('airline').agg({
        'price': 'count',
        'efficiency_score': 'mean'
    })
    airline_reputation['market_share'] = (airline_reputation['price'] / 
                                         airline_reputation['price'].sum())
    airline_reputation['reputation_score'] = (
        airline_reputation['market_share'] * 50 +
        ((airline_reputation['efficiency_score'].max() - airline_reputation['efficiency_score']) /
         (airline_reputation['efficiency_score'].max() - airline_reputation['efficiency_score'].min())) * 50
    )
    
    df_rec = df_rec.merge(
        airline_reputation[['reputation_score']].reset_index(),
        on='airline'
    )
    
    # 5. Composite Recommendation Score
    weights = {
        'price': 0.35,
        'duration': 0.25, 
        'convenience': 0.20,
        'reputation': 0.20
    }
    
    df_rec['recommendation_score'] = (
        df_rec['price_score'] * weights['price'] +
        df_rec['duration_score'] * weights['duration'] +
        df_rec['convenience_score'] * weights['convenience'] +
        df_rec['reputation_score'] * weights['reputation']
    ).round(2)
    
    # Categorize recommendations
    df_rec['recommendation_tier'] = pd.cut(
        df_rec['recommendation_score'],
        bins=[0, 40, 70, 85, 100],
        labels=['Poor', 'Fair', 'Good', 'Excellent']
    )
    
    # Top recommendations by different profiles
    recommendations = {
        'budget_conscious': df_rec.nlargest(10, 'price_score'),
        'time_sensitive': df_rec.nlargest(10, 'duration_score'), 
        'convenience_seeker': df_rec.nlargest(10, 'convenience_score'),
        'balanced': df_rec.nlargest(10, 'recommendation_score')
    }
    
    # Create recommendation dashboard
    fig = make_subplots(
        rows=3, cols=2,
        subplot_titles=[
            'Top Recommendations Overview',
            'Recommendation Distribution',
            'Price vs Recommendation Score',
            'Multi-Criteria Analysis',
            'Personalized Recommendations',
            'Recommendation Trends by Airline'
        ],
        specs=[
            [{"type": "bar"}, {"type": "pie"}],
            [{"type": "scatter"}, {"type": "scatterpolar"}],
            [{"type": "bar"}, {"type": "bar"}]
        ],
        vertical_spacing=0.1
    )
    
    # 1. Top 10 Overall Recommendations
    top_10 = df_rec.nlargest(10, 'recommendation_score')
    
    fig.add_trace(
        go.Bar(
            x=top_10['recommendation_score'],
            y=[f"{row['airline']} ({row['departure_time']})" for _, row in top_10.iterrows()],
            orientation='h',
            name='Top Recommendations',
            marker=dict(
                color=top_10['recommendation_score'],
                colorscale='RdYlGn',
                showscale=True,
                colorbar=dict(title="Score", x=0.48)
            ),
            text=[f"{score:.1f}" for score in top_10['recommendation_score']],
            textposition='inside'
        ),
        row=1, col=1
    )
    
    # 2. Distribution by tier
    tier_distribution = df_rec['recommendation_tier'].value_counts()
    tier_colors = {'Excellent': COLORS['success'], 'Good': COLORS['info'], 
                   'Fair': COLORS['warning'], 'Poor': COLORS['danger']}
    
    fig.add_trace(
        go.Pie(
            labels=tier_distribution.index,
            values=tier_distribution.values,
            name='Recommendation Tiers',
            marker_colors=[tier_colors.get(tier, COLORS['dark']) for tier in tier_distribution.index],
            textinfo='label+percent+value'
        ),
        row=1, col=2
    )
    
    # 3. Price vs Recommendation Score
    for tier in df_rec['recommendation_tier'].unique():
        if pd.notna(tier):
            tier_data = df_rec[df_rec['recommendation_tier'] == tier]
            
            fig.add_trace(
                go.Scatter(
                    x=tier_data['price'],
                    y=tier_data['recommendation_score'],
                    mode='markers',
                    name=tier,
                    marker=dict(
                        size=8,
                        color=tier_colors.get(tier, COLORS['dark']),
                        opacity=0.7,
                        line=dict(width=1, color='white')
                    ),
                    hovertemplate='<b>%{text}</b><br>' +
                                 'Price: ₹%{x:,.0f}<br>' +
                                 'Score: %{y:.1f}<br>' +
                                 'Tier: ' + tier + '<br>' +
                                 '<extra></extra>',
                    text=tier_data['airline']
                ),
                row=2, col=1
            )
    
    # 4. Multi-Criteria Radar Chart (Top 3 Airlines)
    top_airlines_rec = df_rec.groupby('airline')['recommendation_score'].mean().nlargest(3)
    
    criteria = ['Price Score', 'Duration Score', 'Convenience Score', 'Reputation Score']
    
    for i, airline in enumerate(top_airlines_rec.index):
        airline_data = df_rec[df_rec['airline'] == airline]
        
        values = [
            airline_data['price_score'].mean(),
            airline_data['duration_score'].mean(), 
            airline_data['convenience_score'].mean(),
            airline_data['reputation_score'].mean()
        ]
        
        fig.add_trace(
            go.Scatterpolar(
                r=values,
                theta=criteria,
                fill='toself',
                name=airline,
                line_color=AIRLINE_PALETTE.get(airline, AIRLINE_PALETTE['Others']),
                opacity=0.6
            ),
            row=2, col=2
        )
    
    # 5. Personalized Recommendations by Profile
    profiles = ['budget_conscious', 'time_sensitive', 'convenience_seeker', 'balanced']
    profile_scores = []
    
    for profile in profiles:
        if profile == 'budget_conscious':
            score = df_rec['price_score'].mean()
        elif profile == 'time_sensitive':
            score = df_rec['duration_score'].mean()
        elif profile == 'convenience_seeker':
            score = df_rec['convenience_score'].mean()
        else:
            score = df_rec['recommendation_score'].mean()
        
        profile_scores.append(score)
    
    fig.add_trace(
        go.Bar(
            x=profiles,
            y=profile_scores,
            name='Profile Scores',
            marker=dict(
                color=profile_scores,
                colorscale='Viridis',
                showscale=False
            ),
            text=[f'{score:.1f}' for score in profile_scores],
            textposition='outside'
        ),
        row=3, col=1
    )
    
    # 6. Recommendation Trends by Airline
    airline_rec_trends = df_rec.groupby('airline').agg({
        'recommendation_score': 'mean',
        'price_score': 'mean',
        'duration_score': 'mean',
        'convenience_score': 'mean'
    }).round(1)
    
    fig.add_trace(
        go.Bar(
            x=airline_rec_trends.index,
            y=airline_rec_trends['recommendation_score'],
            name='Overall Score',
            marker_color=[AIRLINE_PALETTE.get(airline, AIRLINE_PALETTE['Others']) 
                         for airline in airline_rec_trends.index],
            text=airline_rec_trends['recommendation_score'].values,
            textposition='outside'
        ),
        row=3, col=2
    )
    
    # Layout configuration
    fig.update_layout(
        height=1400,
        title={
            'text': 'Intelligent Flight Recommendation System',
            'x': 0.5,
            'font': {'size': 28, 'color': COLORS['dark']}
        },
        template='plotly_white',
        showlegend=True
    )
    
    # Update axes
    fig.update_xaxes(title_text="Recommendation Score", row=1, col=1)
    fig.update_yaxes(title_text="Flight Options", row=1, col=1)
    
    fig.update_xaxes(title_text="Price (₹)", row=2, col=1)
    fig.update_yaxes(title_text="Recommendation Score", row=2, col=1)
    
    fig.update_polars(radialaxis=dict(visible=True, range=[0, 100]), row=2, col=2)
    
    fig.update_xaxes(title_text="User Profiles", row=3, col=1, tickangle=45)
    fig.update_yaxes(title_text="Average Score", row=3, col=1)
    
    fig.update_xaxes(title_text="Airlines", row=3, col=2, tickangle=45)
    fig.update_yaxes(title_text="Recommendation Score", row=3, col=2)
    
    return fig, df_rec, recommendations

# Create intelligent recommendation system
recommendation_fig, df_recommendations, user_recommendations = create_intelligent_recommendation_system()
# recommendation_fig.show(config=PLOTLY_CONFIG)

print("\nINTELLIGENT RECOMMENDATION SYSTEM")
print("=" * 60)

print("\nTOP 5 OVERALL RECOMMENDATIONS:")
top_overall = df_recommendations.nlargest(5, 'recommendation_score')
for i, (_, flight) in enumerate(top_overall.iterrows(), 1):
    print(f"{i}. {flight['airline']} - {flight['departure_time']}")
    print(f"   Score: {flight['recommendation_score']:.1f} | Price: ₹{flight['price']:,.0f} | Duration: {flight['duration']:.1f}h")

print("\nPERSONALIZED RECOMMENDATIONS:")
for profile, recs in user_recommendations.items():
    print(f"\n{profile.replace('_', ' ').title()} (Top 3):")
    for i, (_, flight) in enumerate(recs.head(3).iterrows(), 1):
        print(f"  {i}. {flight['airline']} - ₹{flight['price']:,.0f} ({flight['duration']:.1f}h)")

# Save recommendation data for Streamlit
df_recommendations[['airline', 'departure_time', 'price', 'duration', 
                   'recommendation_score', 'recommendation_tier']].to_csv(
    '../output/flight_recommendations.csv', index=False
)

print("\nIntelligent recommendation system created and data saved")

# ============================================================================
# 7. FINAL DATA EXPORT AND SUMMARY
# ============================================================================

# Export all visualizations data for Streamlit
export_data = {
    'kpi_metrics': kpi_metrics,
    'price_performance': price_performance.to_dict('records'),
    'airline_performance': airline_performance.to_dict('records'),
    'temporal_analysis': temporal_data.to_dict('records'),
    'cluster_analysis': cluster_stats.to_dict('records'),
    'cluster_mapping': cluster_mapping,
    'recommendation_summary': {
        'total_flights': len(df_recommendations),
        'avg_score': df_recommendations['recommendation_score'].mean(),
        'tiers