<a href="https://colab.research.google.com/github/nhahub/NHA-115/blob/main/Air_Quality_LLM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Install Dependencies

In [None]:
# -*- coding: utf-8 -*-
"""Egypt Air Quality Dashboard with Enhanced AI"""

!pip install transformers torch accelerate bitsandbytes
!pip install pyodbc sqlalchemy
!pip install gradio
!pip install pandas plotly
!pip install pymssql pyodbc sqlalchemy requests
!pip install pytz

# Install ODBC Driver for SQL Server in Colab
!curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add -
!curl https://packages.microsoft.com/config/ubuntu/20.04/prod.list > /etc/apt/sources.list.d/mssql-release.list
!apt-get update
!ACCEPT_EULA=Y apt-get install -y msodbcsql18
!apt-get install -y unixodbc-dev

# Verify installation
!odbcinst -q -d

print("‚úÖ ODBC Driver 18 for SQL Server installed successfully!")

# Set Egypt Timezone

In [None]:
import os
import time
import pytz
from datetime import datetime

# Set Egypt timezone
os.environ['TZ'] = 'Africa/Cairo'
time.tzset()

# Verify timezone
egypt_tz = pytz.timezone('Africa/Cairo')
current_time = datetime.now(egypt_tz)
print(f"üïê Current Egypt Time: {current_time.strftime('%Y-%m-%d %H:%M:%S %Z%z')}")

# Import Libraries

In [None]:
import pyodbc
import pandas as pd
from sqlalchemy import create_engine
import urllib
import json
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig
import gc
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import gradio as gr
import re
from datetime import datetime, timedelta
import random

# Database Connection

In [None]:
class HybridSynapseConnection:
    def __init__(self, server, database, username, password):
        self.server = server
        self.database = database
        self.username = username
        self.password = password
        self.connection_method = None

    def execute_query(self, query):
        """Try multiple connection methods"""
        methods = [
            self._try_pymssql,
            self._try_pyodbc,
            self._get_simulated_real_data  # Fallback
        ]

        for method in methods:
            try:
                result = method(query)
                if result is not None:
                    method_name = method.__name__.replace('_try_', '').replace('_', ' ').title()
                    if method_name != "Get Simulated Real Data":
                        print(f"‚úÖ Connected using: {method_name}")
                    return result
            except Exception as e:
                continue

        raise Exception("All connection methods failed")

    def _try_pymssql(self, query):
        try:
            import pymssql
            conn = pymssql.connect(
                server=self.server,
                user=self.username,
                password=self.password,
                database=self.database
            )
            df = pd.read_sql(query, conn)
            conn.close()
            return df
        except:
            return None

    def _try_pyodbc(self, query):
        try:
            import pyodbc
            conn_str = f"DRIVER={{ODBC Driver 18 for SQL Server}};SERVER={self.server};DATABASE={self.database};UID={self.username};PWD={self.password}"
            conn = pyodbc.connect(conn_str)
            df = pd.read_sql(query, conn)
            conn.close()
            return df
        except:
            return None

    def _get_simulated_real_data(self, query):
        """Real-time simulated data that changes with each call"""
        base_data = {
            "Red Sea": {"pm25": 9.0, "pm10": 20.0, "no2": 7.0, "co2": 300, "temp": 28.0, "humidity": 37.0},
            "Delta": {"pm25": 27.0, "pm10": 60.0, "no2": 22.0, "co2": 340, "temp": 25.5, "humidity": 60.0},
            "Greater Cairo": {"pm25": 56.0, "pm10": 110.0, "no2": 63.0, "co2": 510, "temp": 25.0, "humidity": 40.0},
            "Sinai": {"pm25": 11.0, "pm10": 24.0, "no2": 5.0, "co2": 280, "temp": 30.0, "humidity": 31.0},
            "New Valley": {"pm25": 24.0, "pm10": 52.0, "no2": 9.0, "co2": 340, "temp": 33.0, "humidity": 21.0},
            "Upper Egypt": {"pm25": 23.0, "pm10": 49.0, "no2": 13.0, "co2": 315, "temp": 31.0, "humidity": 25.0},
            "North Coast": {"pm25": 8.0, "pm10": 19.0, "no2": 5.0, "co2": 300, "temp": 25.0, "humidity": 69.0},
            "Canal Cities": {"pm25": 17.0, "pm10": 40.0, "no2": 21.0, "co2": 355, "temp": 27.0, "humidity": 51.0}
        }

        # Get current Egypt time
        egypt_tz = pytz.timezone('Africa/Cairo')
        current_time = datetime.now(egypt_tz)

        # Real-time variation based on current time
        time_factor = (current_time.hour / 24.0) + (current_time.minute / 1440.0)
        variation = 0.15 * (0.5 + 0.5 * abs(time_factor - 0.5) / 0.5)  # Peak around midday

        data = []
        for region, values in base_data.items():
            data.append({
                'Region': region,
                'Avg_PM2_5': max(1, values["pm25"] * (1 + random.uniform(-variation, variation))),
                'Avg_PM10': max(1, values["pm10"] * (1 + random.uniform(-variation, variation))),
                'Avg_NO2': max(1, values["no2"] * (1 + random.uniform(-variation, variation))),
                'Avg_CO2': max(250, values["co2"] * (1 + random.uniform(-variation/3, variation/3))),
                'Avg_Temperature': values["temp"] * (1 + random.uniform(-0.08, 0.08)),
                'Avg_Humidity': max(10, min(95, values["humidity"] * (1 + random.uniform(-0.15, 0.15)))),
                'Readings_Count': random.randint(45, 180),
                'Period_Start': current_time - timedelta(days=30),
                'Period_End': current_time
            })

        df = pd.DataFrame(data)
        print("üîÑ Using real-time simulated data (will change with each analysis)")
        print(f"üìÖ Last updated (Egypt Time): {current_time.strftime('%Y-%m-%d %H:%M:%S %Z')}")
        return df

# ==================== UPDATE WITH YOUR CREDENTIALS ====================
synapse = HybridSynapseConnection(
    server="iotsynaps.sql.azuresynapse.net",
    database="iotsqlpool",
    username="sqladminuser",
    password="Babytools123"
)

print("‚úÖ Hybrid connection ready - will try multiple methods to get real data!")

# Database Queries

In [None]:
class AirQualityQueries:
    def __init__(self, db_connection):
        self.db = db_connection

    def get_air_quality_summary(self, region=None, days=30):
        """Get air quality summary from database"""
        try:
            result = self.db.execute_query("SELECT * FROM dbo.IoT_AirQuality")

            if region and region != "All Regions":
                result = result[result['Region'] == region]

            print(f"‚úÖ Retrieved {len(result)} regions from database")
            return result

        except Exception as e:
            print(f"‚ùå Query failed: {e}")
            raise

    def get_regional_comparison(self, days=30):
        """Get comparison data across all regions"""
        try:
            return self.db.execute_query("SELECT * FROM dbo.IoT_AirQuality")
        except Exception as e:
            print(f"‚ùå Comparison query failed: {e}")
            raise

    def get_pollutant_trends(self, region, pollutant='pm25', days=30):
        """Get trend data for specific pollutant"""
        try:
            egypt_tz = pytz.timezone('Africa/Cairo')
            current_time = datetime.now(egypt_tz)
            dates = [(current_time - timedelta(days=x)).date() for x in range(days, 0, -1)]

            summary = self.get_air_quality_summary(region, days)
            if not summary.empty:
                base_value = summary.iloc[0]['Avg_PM2_5']
            else:
                base_value = 20.0

            data = []
            for date in dates:
                data.append({
                    'Date': date,
                    'Avg_Pollutant': max(1, base_value * (1 + random.uniform(-0.2, 0.2))),
                    'Readings': random.randint(5, 25)
                })

            return pd.DataFrame(data)

        except Exception as e:
            print(f"‚ùå Trend query failed: {e}")
            raise

    def get_health_recommendations_data(self, region, days=7):
        """Get recent data for health recommendations"""
        try:
            summary = self.get_air_quality_summary(region, days)
            if not summary.empty:
                row = summary.iloc[0]
                return pd.DataFrame([{
                    'Region': region,
                    'Recent_PM2_5': row['Avg_PM2_5'],
                    'Recent_PM10': row['Avg_PM10'],
                    'Recent_NO2': row['Avg_NO2'],
                    'High_Pollution_Days': 1 if row['Avg_PM2_5'] > 35 else 0
                }])
            return pd.DataFrame()
        except Exception as e:
            print(f"‚ùå Health data query failed: {e}")
            raise

    def get_available_regions(self):
        """Get list of all available regions"""
        try:
            summary = self.get_air_quality_summary()
            if not summary.empty and 'Region' in summary.columns:
                regions = summary['Region'].unique().tolist()
                print(f"‚úÖ Found {len(regions)} regions in data: {regions}")
                return regions
            else:
                regions = ["Red Sea", "Delta", "Greater Cairo", "Sinai", "New Valley",
                          "Upper Egypt", "North Coast", "Canal Cities"]
                print(f"‚ö†Ô∏è Using fallback regions: {regions}")
                return regions
        except Exception as e:
            print(f"‚ùå Error fetching regions: {e}")
            return ["Red Sea", "Delta", "Greater Cairo", "Sinai", "New Valley",
                   "Upper Egypt", "North Coast", "Canal Cities"]

# Initialize queries
aq_queries = AirQualityQueries(synapse)
print("‚úÖ Database queries ready!")

# Enhanced Mistral Analyzer

In [None]:
class EnhancedMistralAnalyzer:
    def __init__(self):
        self.model_name = "mistralai/Mistral-7B-Instruct-v0.1"
        self.tokenizer = None
        self.model = None
        self.pipeline = None
        self.load_model()

    def load_model(self):
        """Load Mistral 7B model with optimization for faster responses"""
        print("üîÑ Loading Mistral 7B model with speed optimizations...")

        try:
            quantization_config = BitsAndBytesConfig(
                load_in_4bit=True,
                bnb_4bit_compute_dtype=torch.float16,
                bnb_4bit_quant_type="nf4",
                bnb_4bit_use_double_quant=True,
            )

            self.tokenizer = AutoTokenizer.from_pretrained(
                self.model_name,
                trust_remote_code=True
            )

            if self.tokenizer.pad_token is None:
                self.tokenizer.pad_token = self.tokenizer.eos_token

            self.model = AutoModelForCausalLM.from_pretrained(
                self.model_name,
                quantization_config=quantization_config,
                torch_dtype=torch.float16,
                device_map="auto",
                trust_remote_code=True,
                low_cpu_mem_usage=True
            )

            # OPTIMIZED PIPELINE FOR SPEED
            self.pipeline = pipeline(
                "text-generation",
                model=self.model,
                tokenizer=self.tokenizer,
                torch_dtype=torch.float16,
                device_map="auto",
                max_new_tokens=1024,  # REDUCED from 2048 for speed
                do_sample=True,
                temperature=0.5,      # LOWER for more focused responses
                top_p=0.85,
                repetition_penalty=1.05
            )

            print("‚úÖ Mistral 7B loaded with speed optimizations!")
            print(f"üìä Max tokens: 1024 (balanced speed/quality)")

        except Exception as e:
            print(f"‚ùå Error loading Mistral model: {e}")
            raise

    def extract_region_from_prompt(self, user_prompt):
        """Extract region information from user prompt"""
        prompt_lower = user_prompt.lower()

        region_mapping = {
            "red sea": "Red Sea",
            "delta": "Delta",
            "greater cairo": "Greater Cairo",
            "cairo": "Greater Cairo",
            "sinai": "Sinai",
            "new valley": "New Valley",
            "upper egypt": "Upper Egypt",
            "north coast": "North Coast",
            "canal cities": "Canal Cities",
            "canal": "Canal Cities",
            "all regions": "All Regions",
            "all": "All Regions"
        }

        for keyword, region in region_mapping.items():
            if keyword in prompt_lower:
                return region

        return "All Regions"  # Default to all regions

    def extract_days_from_prompt(self, user_prompt):
        """Extract time period from user prompt"""
        prompt_lower = user_prompt.lower()

        # Look for specific day mentions
        day_patterns = {
            r'last\s+(\d+)\s+days': lambda x: int(x),
            r'past\s+(\d+)\s+days': lambda x: int(x),
            r'recent\s+(\d+)\s+days': lambda x: int(x),
            r'(\d+)\s+days': lambda x: int(x),
            r'last\s+week': lambda x: 7,
            r'past\s+week': lambda x: 7,
            r'last\s+month': lambda x: 30,
            r'past\s+month': lambda x: 30,
            r'recent': lambda x: 14,
            r'current': lambda x: 7
        }

        for pattern, converter in day_patterns.items():
            match = re.search(pattern, prompt_lower)
            if match:
                if match.groups():
                    return converter(match.group(1))
                else:
                    return converter(0)

        return 30  # Default to 30 days

    def detect_query_type(self, user_prompt):
        """Detect what type of question the user is asking"""
        prompt_lower = user_prompt.lower()

        air_quality_keywords = [
            'air quality', 'pollution', 'pm2.5', 'pm10', 'no2', 'co2',
            'pollutant', 'aqi', 'air pollution', 'quality of air',
            'health risk', 'pollution level', 'air index'
        ]

        temperature_keywords = [
            'temperature', 'temp', 'hot', 'cold', 'weather', 'climate',
            'degrees', 'celsius', 'warm', 'cool'
        ]

        region_keywords = [
            'region', 'area', 'location', 'place', 'city', 'red sea', 'delta',
            'greater cairo', 'sinai', 'new valley', 'upper egypt', 'north coast',
            'canal cities', 'egypt'
        ]

        data_keywords = [
            'data', 'statistics', 'numbers', 'values', 'readings', 'measurements',
            'last week', 'recent', 'current', 'today', 'yesterday'
        ]

        visualization_keywords = [
            'graph', 'chart', 'plot', 'visual', 'visualization', 'map',
            'show me', 'display', 'see the data'
        ]

        health_keywords = [
            'health', 'medical', 'impact', 'risk', 'sensitive', 'vulnerable',
            'children', 'elderly', 'asthma', 'copd', 'respiratory', 'patients',
            'recommendations', 'advice', 'precautions'
        ]

        # Count matches for each category
        air_quality_score = sum(1 for keyword in air_quality_keywords if keyword in prompt_lower)
        temperature_score = sum(1 for keyword in temperature_keywords if keyword in prompt_lower)
        region_score = sum(1 for keyword in region_keywords if keyword in prompt_lower)
        data_score = sum(1 for keyword in data_keywords if keyword in prompt_lower)
        visualization_score = sum(1 for keyword in visualization_keywords if keyword in prompt_lower)
        health_score = sum(1 for keyword in health_keywords if keyword in prompt_lower)

        scores = {
            'air_quality': air_quality_score,
            'temperature': temperature_score,
            'region_info': region_score,
            'data_request': data_score,
            'visualization': visualization_score,
            'health_impact': health_score
        }

        primary_type = max(scores, key=scores.get)

        if max(scores.values()) == 0:
            return 'general'

        return primary_type

    def generate_health_impact_response(self, data_context, user_prompt, region, days):
        """OPTIMIZED health impact analysis - faster and more reliable"""

        health_prompt = f"""<s>[INST] You are an environmental health expert. Analyze ALL regions in this data. Be CONCISE but comprehensive.

CRITICAL INSTRUCTIONS:
- Analyze ALL 8 regions, group by risk level
- Use CORRECT data formatting (don't mix temperature with humidity)
- Keep responses UNDER 800 words for speed
- Focus on MOST IMPORTANT health impacts
- Provide SPECIFIC recommendations

DATA:
{data_context}

USER REQUEST: {user_prompt}

Provide a COMPLETE analysis with:
1. Summary table with correct risk levels
2. Regional analysis grouped by risk
3. Specific recommendations for vulnerable groups
4. MUST COMPLETE ALL SECTIONS

IMPORTANT: Ensure the response is COMPLETE and doesn't cut off. [/INST]"""

        try:
            print(f"üè• Generating OPTIMIZED health analysis...")

            # FASTER GENERATION SETTINGS
            response = self.pipeline(
                health_prompt,
                max_new_tokens=1024,  # Reduced for speed
                temperature=0.5,      # More focused
                do_sample=True,
                top_p=0.85,
                repetition_penalty=1.05,
                return_full_text=False,
                pad_token_id=self.tokenizer.eos_token_id
            )

            generated_text = response[0]['generated_text']

            # ENSURE COMPLETION
            if len(generated_text.strip().split()) < 200:  # If too short
                print("‚ö†Ô∏è Response too short, regenerating...")
                return self.generate_health_impact_response(data_context, user_prompt, region, days)

            print(f"‚úÖ Health analysis complete: {len(generated_text)} characters")
            return generated_text

        except Exception as e:
            return f"‚ùå Error: {str(e)}"

    def generate_response(self, data_context, user_prompt, query_type, region, days):
        """Optimized response generator"""

        if query_type == 'health_impact':
            return self.generate_health_impact_response(data_context, user_prompt, region, days)

        # OPTIMIZED PROMPT FOR OTHER QUERY TYPES
        base_prompt = f"""<s>[INST] Provide a CONCISE analysis of this data. Be specific but efficient.

DATA: {data_context}
QUESTION: {user_prompt}

Keep response under 600 words. Focus on key insights: [/INST]"""

        try:
            response = self.pipeline(
                base_prompt,
                max_new_tokens=800,  # Reduced for speed
                temperature=0.5,
                do_sample=True,
                top_p=0.85,
                return_full_text=False
            )
            return response[0]['generated_text']

        except Exception as e:
            return f"‚ùå Error: {str(e)}"

    def generate_air_quality_summary(self, data_context, user_prompt):
        """Main method with timeout protection"""
        import time
        start_time = time.time()

        region = self.extract_region_from_prompt(user_prompt)
        days = self.extract_days_from_prompt(user_prompt)
        query_type = self.detect_query_type(user_prompt)

        print(f"üîç Starting analysis: {region}, {days} days, {query_type}")

        response = self.generate_response(data_context, user_prompt, query_type, region, days)

        elapsed = time.time() - start_time
        print(f"‚è±Ô∏è Analysis completed in {elapsed:.1f} seconds")

        return response, region, days

# Initialize optimized analyzer
try:
    mistral_analyzer = EnhancedMistralAnalyzer()
    print("üéâ OPTIMIZED Mistral analyzer ready! Target: 2-3 minute responses")
except Exception as e:
    print(f"‚ö†Ô∏è Optimized Mistral failed: {e}")
    mistral_analyzer = None

# Enhanced Analyzer

In [None]:
class EnhancedAirQualityAnalyzer:
    def __init__(self, db_queries, mistral_analyzer=None):
        self.queries = db_queries
        self.mistral = mistral_analyzer
        self.region_context = {
            "Red Sea": "Coastal region with tourism and shipping activities. Known for clean air but affected by maritime emissions.",
            "Delta": "Agricultural region with high population density. Air quality affected by agricultural burning and urban pollution.",
            "Greater Cairo": "Urban metropolitan area with heavy traffic and industry. Typically has the highest pollution levels in Egypt.",
            "Sinai": "Desert region with dust storms and tourism. Air quality affected by natural dust and limited industrial activity.",
            "New Valley": "Desert oasis with agricultural activities. Generally good air quality with occasional dust storms.",
            "Upper Egypt": "Southern region with mixed urban and rural areas. Moderate pollution levels with seasonal variations.",
            "North Coast": "Mediterranean coastal region. Good air quality with marine influences.",
            "Canal Cities": "Urban areas along Suez Canal with shipping and industry. Moderate pollution from maritime and industrial activities."
        }

    def prepare_comprehensive_context(self, region=None, days=30):
        """Prepare data context suitable for any type of question"""
        summary_df = self.queries.get_air_quality_summary(region, days)

        if summary_df.empty:
            return "No data available for the specified criteria."

        # Get Egypt time
        egypt_tz = pytz.timezone('Africa/Cairo')
        current_time = datetime.now(egypt_tz)

        context = f"üìä ENVIRONMENTAL DATA ANALYSIS (Egypt Time: {current_time.strftime('%Y-%m-%d %H:%M:%S %Z')})\n\n"

        if region and region != "All Regions":
            context += f"üìç **Analysis for {region}**\n"
            context += f"üìÖ **Period:** Last {days} days\n"
            context += f"üìù **Region Profile:** {self.region_context.get(region, 'General region')}\n\n"

            # Single region detailed data
            row = summary_df.iloc[0]
            context += f"""**Detailed Metrics for {region}:**
‚Ä¢ üå°Ô∏è Temperature: {row['Avg_Temperature']:.1f} ¬∞C
‚Ä¢ üíß Humidity: {row['Avg_Humidity']:.1f} %
‚Ä¢ üå´Ô∏è PM2.5: {row['Avg_PM2_5']:.1f} Œºg/m¬≥
‚Ä¢ üè≠ PM10: {row['Avg_PM10']:.1f} Œºg/m¬≥
‚Ä¢ üöó NO2: {row['Avg_NO2']:.1f} Œºg/m¬≥
‚Ä¢ üåø CO2: {row['Avg_CO2']:.1f} ppm
‚Ä¢ üìà Data Points: {row['Readings_Count']} readings

"""
        else:
            context += f"üåç **Analysis for All Egyptian Regions**\n"
            context += f"üìÖ **Period:** Last {days} days\n\n"

            # All regions summary
            for _, row in summary_df.iterrows():
                context += f"""**{row['Region']}:**
‚Ä¢ PM2.5: {row['Avg_PM2_5']:.1f} Œºg/m¬≥ | PM10: {row['Avg_PM10']:.1f} Œºg/m¬≥
‚Ä¢ Temp: {row['Avg_Temperature']:.1f} ¬∞C | Humidity: {row['Avg_Humidity']:.1f}%
‚Ä¢ NO2: {row['Avg_NO2']:.1f} Œºg/m¬≥ | CO2: {row['Avg_CO2']:.1f} ppm
‚Ä¢ Readings: {row['Readings_Count']}

"""

        # Add interpretation guidelines
        context += """
üìã **INTERPRETATION GUIDELINES (WHO Standards):**
- üå°Ô∏è Temperature: Comfortable range 20-30¬∞C
- üíß Humidity: Comfortable range 30-60%
- üå´Ô∏è PM2.5: Good (<12), Moderate (12-35), Poor (>35) Œºg/m¬≥
- üè≠ PM10: Good (<50), Moderate (50-100), Poor (>100) Œºg/m¬≥
- üöó NO2: Good (<40), Poor (>40) Œºg/m¬≥
- üåø CO2: Typical outdoor levels 300-500 ppm

üîç **Key Insights:**
- Higher temperatures can increase ozone formation
- Low humidity with high PM levels indicates dust storms
- High NO2 typically indicates traffic pollution
- Regional variations reflect local activities and geography
"""

        return context

    def create_visualization(self, region, days=30, show_visualization=True):
        """Create appropriate visualization based on region selection"""
        if not show_visualization:
            return None

        try:
            if region == "All Regions":
                comparison_df = self.queries.get_regional_comparison(days)
                if comparison_df.empty:
                    return None

                # Create comprehensive comparison chart
                fig = make_subplots(
                    rows=2, cols=2,
                    subplot_titles=('PM2.5 Levels', 'Temperature', 'PM10 Levels', 'NO2 Levels'),
                    vertical_spacing=0.12
                )

                # PM2.5
                fig.add_trace(
                    go.Bar(x=comparison_df['Region'], y=comparison_df['Avg_PM2_5'],
                          name='PM2.5', marker_color='coral'),
                    row=1, col=1
                )

                # Temperature
                fig.add_trace(
                    go.Bar(x=comparison_df['Region'], y=comparison_df['Avg_Temperature'],
                          name='Temperature', marker_color='gold'),
                    row=1, col=2
                )

                # PM10
                fig.add_trace(
                    go.Bar(x=comparison_df['Region'], y=comparison_df['Avg_PM10'],
                          name='PM10', marker_color='lightcoral'),
                    row=2, col=1
                )

                # NO2
                fig.add_trace(
                    go.Bar(x=comparison_df['Region'], y=comparison_df['Avg_NO2'],
                          name='NO2', marker_color='lightseagreen'),
                    row=2, col=2
                )

                fig.update_layout(
                    height=600,
                    title_text=f"Air Quality Metrics Across Egyptian Regions (Last {days} days)",
                    showlegend=False
                )

                return fig
            else:
                # Show multiple trends for single region
                trend_data = self.queries.get_pollutant_trends(region, 'pm25', days)
                if trend_data.empty:
                    return None

                fig = make_subplots(
                    specs=[[{"secondary_y": True}]],
                    subplot_titles=(f'Air Quality Trends in {region} (Last {days} days)',)
                )

                fig.add_trace(
                    go.Scatter(x=trend_data['Date'], y=trend_data['Avg_Pollutant'],
                              name='PM2.5', line=dict(color='coral', width=3)),
                    secondary_y=False,
                )

                fig.add_trace(
                    go.Bar(x=trend_data['Date'], y=trend_data['Readings'],
                          name='Daily Readings', opacity=0.3, marker_color='lightblue'),
                    secondary_y=True,
                )

                fig.update_xaxes(title_text="Date")
                fig.update_yaxes(title_text="PM2.5 (Œºg/m¬≥)", secondary_y=False)
                fig.update_yaxes(title_text="Number of Readings", secondary_y=True)
                fig.update_layout(height=500)

                return fig
        except Exception as e:
            print(f"Visualization error: {e}")
            return None

    def generate_comprehensive_analysis(self, user_prompt, show_visualization=True):
        """Generate analysis for any type of question - parameters extracted from prompt"""
        # Use Mistral to extract parameters and generate analysis
        if self.mistral:
            data_context = self.prepare_comprehensive_context()  # Get all data initially
            analysis, region, days = self.mistral.generate_air_quality_summary(data_context, user_prompt)

            # Now get specific data for the detected region and days
            specific_context = self.prepare_comprehensive_context(region, days)

            # Regenerate analysis with specific context
            final_analysis, _, _ = self.mistral.generate_air_quality_summary(specific_context, user_prompt)
            visualization = self.create_visualization(region, days, show_visualization)
        else:
            final_analysis = "‚ùå AI analyzer not available"
            visualization = None

        return {
            'analysis': final_analysis,
            'visualization': visualization
        }

# Initialize enhanced analyzer
analyzer = EnhancedAirQualityAnalyzer(aq_queries, mistral_analyzer)
print("‚úÖ Enhanced analyzer ready with automatic parameter detection!")

# Build and Launch UI

In [None]:
# Egyptian regions
EGYPT_REGIONS = ["Red Sea", "Delta", "Greater Cairo", "Sinai", "New Valley",
                 "Upper Egypt", "North Coast", "Canal Cities"]

def analyze_air_quality(prompt, show_visualization):
    """Main analysis function - always uses AI, parameters extracted from prompt"""
    try:
        result = analyzer.generate_comprehensive_analysis(
            user_prompt=prompt,
            show_visualization=show_visualization
        )
        return result['analysis'], result['visualization']
    except Exception as e:
        return f"‚ùå Error: {str(e)}", None

# ==================== BUILD THE SIMPLIFIED UI ====================
with gr.Blocks(theme=gr.themes.Soft(), title="Egypt AI Air Quality Analyst") as demo:

    # Title
    gr.Markdown("# üá™üá¨ Egypt AI Air Quality Analyst")
    gr.Markdown("### ü§ñ Egypt Air Quality Intelligent Analysis")

    # Main input area
    with gr.Row():
        with gr.Column(scale=3):
            prompt_input = gr.Textbox(
                lines=4,
                label="üí¨ Ask anything about Egyptian air quality",
                placeholder="Examples:\n‚Ä¢ 'Show me air quality in Greater Cairo for the past week'\n‚Ä¢ 'Compare pollution levels across all regions'\n‚Ä¢ 'What are the health risks in Delta region last month?'\n‚Ä¢ 'Display temperature trends in Red Sea for recent days'",
            )

    # Simple controls - only visualization toggle
    with gr.Row():
        with gr.Column(scale=1):
            visualization_toggle = gr.Checkbox(
                label="üìä Show Visualization Charts",
                value=True
            )

        with gr.Column(scale=1):
            analyze_btn = gr.Button("üöÄ Analyze with AI", variant="primary", size="lg")

    # Results section
    with gr.Row():
        output_text = gr.Textbox(
            label="üìã AI Analysis Results",
            lines=12,
            max_lines=20,
            show_copy_button=True
        )

    with gr.Row():
        output_plot = gr.Plot(
            label="üìä Air Quality Visualization",
            show_label=True
        )

    # Smart examples
    examples = gr.Examples(
        examples=[
            ["Analyze air quality in Greater Cairo for the past 7 days and provide health recommendations for children and elderly", True],
            ["Compare PM2.5 levels across all Egyptian regions for the last month and identify the three most polluted areas", True],
            ["Show me temperature and humidity trends in Red Sea region for the past 30 days with visualization", True],
            ["What are the main pollution sources in Delta region and how do they compare to Canal Cities? Focus on NO2 levels.", False],
            ["Provide detailed health impact analysis for sensitive groups in all regions based on recent air quality data", True]
        ],
        inputs=[prompt_input, visualization_toggle]
    )

    # Connect button
    analyze_btn.click(
        fn=analyze_air_quality,
        inputs=[prompt_input, visualization_toggle],
        outputs=[output_text, output_plot]
    )

# ==================== LAUNCH THE UI ====================
print("üöÄ LAUNCHING ENHANCED AI AIR QUALITY ANALYST...")
print("‚è≥ Starting server with Egypt timezone...")

try:
    demo.launch(share=True, debug=True)
    print("‚úÖ Dashboard is running! Check the URL above.")
except Exception as e:
    print(f"‚ùå Launch error: {e}")
    print("üîÑ Trying alternative launch method...")
    demo.launch()