## 1. Import Libraries

In [19]:
import pandas as pd
import numpy as np
import warnings
import os
import json
import joblib
from datetime import datetime

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Geospatial libraries
try:
    import folium
    from folium import plugins
    from folium.plugins import HeatMap, HeatMapWithTime, MarkerCluster, FastMarkerCluster
    FOLIUM_AVAILABLE = True
    print("‚úÖ Folium available for interactive maps")
except ImportError:
    print("‚ö†Ô∏è Folium not installed. Run: pip install folium")
    FOLIUM_AVAILABLE = False

try:
    import geopandas as gpd
    from shapely.geometry import Point
    GEOPANDAS_AVAILABLE = True
    print("‚úÖ GeoPandas available")
except ImportError:
    print("‚ö†Ô∏è GeoPandas not installed. Run: pip install geopandas")
    GEOPANDAS_AVAILABLE = False

try:
    import branca.colormap as cm
    BRANCA_AVAILABLE = True
    print("‚úÖ Branca colormap available")
except ImportError:
    BRANCA_AVAILABLE = False

warnings.filterwarnings('ignore')
print("\n‚úÖ All libraries imported successfully!")

‚úÖ Folium available for interactive maps
‚úÖ GeoPandas available
‚úÖ Branca colormap available

‚úÖ All libraries imported successfully!


## 2. Load Data and Model Predictions

In [20]:
# Load the dataset
df = pd.read_csv('data/final/enviroscan_final_dataset.csv')

print("üìä DATASET LOADED")
print("="*50)
print(f"   Total Records: {len(df):,}")
print(f"   Unique Locations: {df['location_id'].nunique()}")
print(f"   Unique States: {df['state'].nunique()}")
print(f"   Unique Districts: {df['district'].nunique()}")

# Check for lat/lon
print(f"\nüìç Location Data:")
print(f"   Latitude range: {df['latitude'].min():.4f} to {df['latitude'].max():.4f}")
print(f"   Longitude range: {df['longitude'].min():.4f} to {df['longitude'].max():.4f}")

# Pollution source distribution
print(f"\nüè≠ Pollution Sources:")
for source, count in df['pollution_source'].value_counts().items():
    pct = count / len(df) * 100
    print(f"   {source}: {count:,} ({pct:.1f}%)")

df.head()

üìä DATASET LOADED
   Total Records: 106,369
   Unique Locations: 49
   Unique States: 19
   Unique Districts: 49

üìç Location Data:
   Latitude range: 8.5149 to 31.6200
   Longitude range: 72.5919 to 94.0990

üè≠ Pollution Sources:
   Vehicular: 42,445 (39.9%)
   Industrial: 23,372 (22.0%)
   Agricultural: 21,389 (20.1%)
   Natural: 12,061 (11.3%)
   Burning: 7,102 (6.7%)


Unnamed: 0,state,district,location_id,location_name,latitude,longitude,datetime_ist,hour,day_of_week,month,...,roads_count,industrial_distance_m,industrial_area_sqm,agricultural_distance_m,agricultural_area_sqm,dump_sites_distance_m,dump_sites_count,pollution_source,source_encoded,confidence
0,Andhra Pradesh,Tirupati,5649,"Tirumala, Tirupati - APPCB",13.67,79.35,2025-11-08 15:15:00+05:30,15,5,11,...,67,396.9,7470.18,9999.0,0.0,976.01,3,Natural,4,Low
1,Andhra Pradesh,Tirupati,5649,"Tirumala, Tirupati - APPCB",13.67,79.35,2025-11-08 15:30:00+05:30,15,5,11,...,67,396.9,7470.18,9999.0,0.0,976.01,3,Burning,3,Low
2,Andhra Pradesh,Tirupati,5649,"Tirumala, Tirupati - APPCB",13.67,79.35,2025-11-08 15:45:00+05:30,15,5,11,...,67,396.9,7470.18,9999.0,0.0,976.01,3,Burning,3,Low
3,Andhra Pradesh,Tirupati,5649,"Tirumala, Tirupati - APPCB",13.67,79.35,2025-11-08 16:00:00+05:30,16,5,11,...,67,396.9,7470.18,9999.0,0.0,976.01,3,Burning,3,Low
4,Andhra Pradesh,Tirupati,5649,"Tirumala, Tirupati - APPCB",13.67,79.35,2025-11-08 16:15:00+05:30,16,5,11,...,67,396.9,7470.18,9999.0,0.0,976.01,3,Burning,3,Low


In [21]:
# Parse datetime
df['datetime_ist'] = pd.to_datetime(df['datetime_ist'])
df['date'] = df['datetime_ist'].dt.date
df['hour'] = df['datetime_ist'].dt.hour

# Calculate AQI proxy (simplified)
# Using PM2.5 as primary indicator
def calculate_aqi_category(pm25):
    if pm25 <= 30:
        return 'Good'
    elif pm25 <= 60:
        return 'Satisfactory'
    elif pm25 <= 90:
        return 'Moderate'
    elif pm25 <= 120:
        return 'Poor'
    elif pm25 <= 250:
        return 'Very Poor'
    else:
        return 'Severe'

df['aqi_category'] = df['pm25'].apply(calculate_aqi_category)

# Create severity score (0-100)
df['severity_score'] = np.clip(df['pm25'] / 3, 0, 100)

print("‚úÖ Data preprocessing complete")
print(f"\nüìä AQI Category Distribution:")
print(df['aqi_category'].value_counts())

‚úÖ Data preprocessing complete

üìä AQI Category Distribution:
aqi_category
Satisfactory    35881
Good            27381
Moderate        25016
Poor             9999
Very Poor        6680
Severe           1412
Name: count, dtype: int64


## 3. Define Visualization Configuration

In [22]:
# Color scheme for pollution sources
SOURCE_COLORS = {
    'Industrial': '#e74c3c',      # Red
    'Vehicular': '#3498db',       # Blue
    'Agricultural': '#2ecc71',    # Green
    'Natural': '#9b59b6',         # Purple
    'Burning': '#f39c12'          # Orange
}

# Icons for pollution sources
SOURCE_ICONS = {
    'Industrial': 'industry',
    'Vehicular': 'car',
    'Agricultural': 'leaf',
    'Natural': 'tree',
    'Burning': 'fire'
}

# AQI category colors
AQI_COLORS = {
    'Good': '#00e400',
    'Satisfactory': '#92d050',
    'Moderate': '#ffff00',
    'Poor': '#ff7e00',
    'Very Poor': '#ff0000',
    'Severe': '#99004c'
}

# India center coordinates
INDIA_CENTER = [20.5937, 78.9629]
DEFAULT_ZOOM = 5

print("‚úÖ Visualization configuration defined")
print(f"\nüé® Source Colors:")
for source, color in SOURCE_COLORS.items():
    print(f"   {source}: {color}")

‚úÖ Visualization configuration defined

üé® Source Colors:
   Industrial: #e74c3c
   Vehicular: #3498db
   Agricultural: #2ecc71
   Natural: #9b59b6
   Burning: #f39c12


## 4. Create Location Summary Data

In [23]:
# Aggregate data by location for map visualization
location_summary = df.groupby(['location_id', 'location_name', 'state', 'district', 'latitude', 'longitude']).agg({
    'pm25': 'mean',
    'pm10': 'mean',
    'no2': 'mean',
    'co': 'mean',
    'so2': 'mean',
    'o3': 'mean',
    'temperature': 'mean',
    'humidity': 'mean',
    'pollution_source': lambda x: x.mode()[0] if len(x.mode()) > 0 else 'Unknown',
    'severity_score': 'mean',
    'datetime_ist': 'count'
}).reset_index()

location_summary.columns = ['location_id', 'location_name', 'state', 'district', 'latitude', 'longitude',
                            'avg_pm25', 'avg_pm10', 'avg_no2', 'avg_co', 'avg_so2', 'avg_o3',
                            'avg_temp', 'avg_humidity', 'dominant_source', 'avg_severity', 'record_count']

# Calculate AQI category for each location
location_summary['aqi_category'] = location_summary['avg_pm25'].apply(calculate_aqi_category)

print("üìç LOCATION SUMMARY CREATED")
print("="*50)
print(f"   Total Locations: {len(location_summary)}")
print(f"\n   Dominant Source Distribution:")
for source, count in location_summary['dominant_source'].value_counts().items():
    print(f"      {source}: {count}")

location_summary.head()

üìç LOCATION SUMMARY CREATED
   Total Locations: 49

   Dominant Source Distribution:
      Vehicular: 21
      Industrial: 13
      Agricultural: 8
      Natural: 4
      Burning: 3


Unnamed: 0,location_id,location_name,state,district,latitude,longitude,avg_pm25,avg_pm10,avg_no2,avg_co,avg_so2,avg_o3,avg_temp,avg_humidity,dominant_source,avg_severity,record_count,aqi_category
0,17,"R K Puram, Delhi - DPCC",Haryana,Faridabad,28.563262,77.186937,243.27668,368.576594,74.301918,0.7,18.417165,38.55812,17.880632,70.5,Vehicular,74.983926,2530,Very Poor
1,5408,"Secretariat, Amaravati - APPCB",Andhra Pradesh,Vijayawada,16.515083,80.518167,61.967304,129.09448,25.401249,0.574947,5.984246,35.40896,22.530446,68.346497,Agricultural,20.655768,2355,Moderate
2,5542,"Civil Line, Jalandhar - PPCB",Punjab,Jalandhar,31.321907,75.578914,72.361215,148.680271,27.436668,1.132228,13.006317,45.276114,19.559174,68.497721,Vehicular,24.120405,2437,Moderate
3,5544,"Model Town, Patiala - PPCB",Punjab,Patiala,30.349388,76.366642,60.81675,159.297557,13.937404,1.129886,25.316411,19.096508,15.716005,64.99405,Industrial,20.27225,2431,Moderate
4,5546,Tata Stadium - Jorapokhar - JSPCB,Jharkhand,Dhanbad,23.707909,86.41467,1.501211,10.346918,0.0,0.000134,0.34322,1.035294,2.495517,10.906214,Natural,0.500404,1379,Good


## 5. Basic Pollution Heatmap

In [24]:
if FOLIUM_AVAILABLE:
    # Create base map
    pollution_heatmap = folium.Map(
        location=INDIA_CENTER,
        zoom_start=DEFAULT_ZOOM,
        tiles='CartoDB positron'
    )
    
    # Prepare heatmap data [lat, lon, intensity]
    heat_data = location_summary[['latitude', 'longitude', 'avg_severity']].values.tolist()
    
    # Add heatmap layer
    HeatMap(
        heat_data,
        min_opacity=0.3,
        max_zoom=13,
        radius=25,
        blur=15,
        gradient={0.2: 'blue', 0.4: 'lime', 0.6: 'yellow', 0.8: 'orange', 1: 'red'}
    ).add_to(pollution_heatmap)
    
    # Add title
    title_html = '''
    <div style="position: fixed; 
                top: 10px; left: 50px; width: 300px;
                background-color: white; padding: 10px;
                border-radius: 5px; box-shadow: 0 2px 5px rgba(0,0,0,0.3);
                z-index: 9999; font-family: Arial;">
        <h4 style="margin: 0;">üå°Ô∏è Pollution Intensity Heatmap</h4>
        <p style="margin: 5px 0 0 0; font-size: 12px;">Based on PM2.5 severity scores</p>
    </div>
    '''
    pollution_heatmap.get_root().html.add_child(folium.Element(title_html))
    
    print("‚úÖ Basic Pollution Heatmap created")
    pollution_heatmap
else:
    print("‚ö†Ô∏è Folium not available")

‚úÖ Basic Pollution Heatmap created


## 6. Source-Specific Marker Map

In [25]:
if FOLIUM_AVAILABLE:
    # Create base map with dark theme
    source_map = folium.Map(
        location=INDIA_CENTER,
        zoom_start=DEFAULT_ZOOM,
        tiles='CartoDB dark_matter'
    )
    
    # Create feature groups for each source (for layer control)
    source_groups = {}
    for source in SOURCE_COLORS.keys():
        source_groups[source] = folium.FeatureGroup(name=f"üè≠ {source}")
    
    # Add markers for each location
    for idx, row in location_summary.iterrows():
        source = row['dominant_source']
        color = SOURCE_COLORS.get(source, '#95a5a6')
        
        # Create popup content
        popup_html = f"""
        <div style="font-family: Arial; width: 200px;">
            <h4 style="margin: 0; color: {color};">{row['location_name']}</h4>
            <p style="margin: 5px 0;"><b>State:</b> {row['state']}</p>
            <p style="margin: 5px 0;"><b>District:</b> {row['district']}</p>
            <hr style="margin: 5px 0;">
            <p style="margin: 5px 0;"><b>Dominant Source:</b> {source}</p>
            <p style="margin: 5px 0;"><b>AQI Category:</b> {row['aqi_category']}</p>
            <p style="margin: 5px 0;"><b>Avg PM2.5:</b> {row['avg_pm25']:.1f} ¬µg/m¬≥</p>
            <p style="margin: 5px 0;"><b>Avg PM10:</b> {row['avg_pm10']:.1f} ¬µg/m¬≥</p>
            <p style="margin: 5px 0;"><b>Records:</b> {row['record_count']:,}</p>
        </div>
        """
        
        # Add circle marker
        folium.CircleMarker(
            location=[row['latitude'], row['longitude']],
            radius=8,
            popup=folium.Popup(popup_html, max_width=250),
            color=color,
            fill=True,
            fillColor=color,
            fillOpacity=0.7,
            weight=2
        ).add_to(source_groups[source] if source in source_groups else source_map)
    
    # Add all feature groups to map
    for group in source_groups.values():
        group.add_to(source_map)
    
    # Add layer control
    folium.LayerControl(collapsed=False).add_to(source_map)
    
    # Add legend
    legend_html = '''
    <div style="position: fixed; 
                bottom: 50px; right: 50px;
                background-color: white; padding: 15px;
                border-radius: 5px; box-shadow: 0 2px 5px rgba(0,0,0,0.3);
                z-index: 9999; font-family: Arial;">
        <h4 style="margin: 0 0 10px 0;">Pollution Sources</h4>
        <p style="margin: 5px 0;"><span style="color: #e74c3c;">‚óè</span> Industrial</p>
        <p style="margin: 5px 0;"><span style="color: #3498db;">‚óè</span> Vehicular</p>
        <p style="margin: 5px 0;"><span style="color: #2ecc71;">‚óè</span> Agricultural</p>
        <p style="margin: 5px 0;"><span style="color: #9b59b6;">‚óè</span> Natural</p>
        <p style="margin: 5px 0;"><span style="color: #f39c12;">‚óè</span> Burning</p>
    </div>
    '''
    source_map.get_root().html.add_child(folium.Element(legend_html))
    
    print("‚úÖ Source-Specific Marker Map created")
    source_map
else:
    print("‚ö†Ô∏è Folium not available")

‚úÖ Source-Specific Marker Map created


## 7. High-Risk Zones Map with AQI Gradient

In [26]:
if FOLIUM_AVAILABLE:
    # Create base map
    risk_map = folium.Map(
        location=INDIA_CENTER,
        zoom_start=DEFAULT_ZOOM,
        tiles='CartoDB positron'
    )
    
    # Create feature groups for AQI categories
    aqi_groups = {}
    for category in AQI_COLORS.keys():
        aqi_groups[category] = folium.FeatureGroup(name=f"{category}")
    
    # Add markers with size based on severity
    for idx, row in location_summary.iterrows():
        category = row['aqi_category']
        color = AQI_COLORS.get(category, '#95a5a6')
        
        # Radius based on severity (5-20)
        radius = 5 + (row['avg_severity'] / 100) * 15
        
        popup_html = f"""
        <div style="font-family: Arial; width: 180px;">
            <h4 style="margin: 0;">{row['location_name']}</h4>
            <p><b>AQI Category:</b> <span style="color: {color}; font-weight: bold;">{category}</span></p>
            <p><b>PM2.5:</b> {row['avg_pm25']:.1f} ¬µg/m¬≥</p>
            <p><b>PM10:</b> {row['avg_pm10']:.1f} ¬µg/m¬≥</p>
            <p><b>Source:</b> {row['dominant_source']}</p>
        </div>
        """
        
        folium.CircleMarker(
            location=[row['latitude'], row['longitude']],
            radius=radius,
            popup=folium.Popup(popup_html, max_width=200),
            color=color,
            fill=True,
            fillColor=color,
            fillOpacity=0.6,
            weight=2
        ).add_to(aqi_groups.get(category, risk_map))
    
    # Add all groups
    for group in aqi_groups.values():
        group.add_to(risk_map)
    
    # Add layer control
    folium.LayerControl(collapsed=False).add_to(risk_map)
    
    # Add AQI legend
    aqi_legend_html = '''
    <div style="position: fixed; 
                bottom: 50px; right: 50px;
                background-color: white; padding: 15px;
                border-radius: 5px; box-shadow: 0 2px 5px rgba(0,0,0,0.3);
                z-index: 9999; font-family: Arial;">
        <h4 style="margin: 0 0 10px 0;">AQI Categories</h4>
        <p style="margin: 3px 0;"><span style="color: #00e400;">‚óè</span> Good (0-30)</p>
        <p style="margin: 3px 0;"><span style="color: #92d050;">‚óè</span> Satisfactory (31-60)</p>
        <p style="margin: 3px 0;"><span style="color: #ffff00;">‚óè</span> Moderate (61-90)</p>
        <p style="margin: 3px 0;"><span style="color: #ff7e00;">‚óè</span> Poor (91-120)</p>
        <p style="margin: 3px 0;"><span style="color: #ff0000;">‚óè</span> Very Poor (121-250)</p>
        <p style="margin: 3px 0;"><span style="color: #99004c;">‚óè</span> Severe (250+)</p>
    </div>
    '''
    risk_map.get_root().html.add_child(folium.Element(aqi_legend_html))
    
    print("‚úÖ High-Risk Zones Map created")
    risk_map
else:
    print("‚ö†Ô∏è Folium not available")

‚úÖ High-Risk Zones Map created


## 8. State-Level Aggregated Map

In [27]:
# Aggregate by state
state_summary = df.groupby('state').agg({
    'latitude': 'mean',
    'longitude': 'mean',
    'pm25': 'mean',
    'pm10': 'mean',
    'location_id': 'nunique',
    'pollution_source': lambda x: x.mode()[0] if len(x.mode()) > 0 else 'Unknown'
}).reset_index()

state_summary.columns = ['state', 'latitude', 'longitude', 'avg_pm25', 'avg_pm10', 'num_locations', 'dominant_source']
state_summary['aqi_category'] = state_summary['avg_pm25'].apply(calculate_aqi_category)

print("üìä STATE-LEVEL SUMMARY")
print("="*50)
state_summary.sort_values('avg_pm25', ascending=False)

üìä STATE-LEVEL SUMMARY


Unnamed: 0,state,latitude,longitude,avg_pm25,avg_pm10,num_locations,dominant_source,aqi_category
6,Haryana,29.45523,76.98247,147.098349,241.147792,3,Vehicular,Very Poor
10,Madhya Pradesh,23.77696,77.870691,101.052739,170.910665,4,Vehicular,Poor
15,Odisha,20.924238,85.532722,78.016587,167.212843,3,Vehicular,Moderate
17,Rajasthan,25.422423,74.724222,68.519443,126.512363,3,Vehicular,Moderate
11,Maharashtra,20.392802,76.213771,67.644788,117.248876,3,Agricultural,Moderate
0,Andhra Pradesh,15.898495,80.98639,65.312162,114.341712,3,Vehicular,Moderate
16,Punjab,30.93302,75.555915,63.718136,132.386965,5,Vehicular,Moderate
3,Bihar,25.427695,86.086406,58.982087,97.461412,2,Agricultural,Satisfactory
5,Gujarat,22.423817,72.74835,57.396373,124.142873,4,Vehicular,Satisfactory
1,Arunachal Pradesh,27.103358,93.679645,52.7517,99.55957,1,Natural,Satisfactory


In [28]:
if FOLIUM_AVAILABLE:
    # State-level map
    state_map = folium.Map(
        location=INDIA_CENTER,
        zoom_start=5,
        tiles='CartoDB positron'
    )
    
    # Add state markers
    for idx, row in state_summary.iterrows():
        color = AQI_COLORS.get(row['aqi_category'], '#95a5a6')
        source_color = SOURCE_COLORS.get(row['dominant_source'], '#95a5a6')
        
        # Size based on number of locations
        radius = 10 + row['num_locations'] * 2
        
        popup_html = f"""
        <div style="font-family: Arial; width: 200px;">
            <h3 style="margin: 0; color: {color};">{row['state']}</h3>
            <hr>
            <p><b>Monitoring Stations:</b> {row['num_locations']}</p>
            <p><b>Avg PM2.5:</b> {row['avg_pm25']:.1f} ¬µg/m¬≥</p>
            <p><b>Avg PM10:</b> {row['avg_pm10']:.1f} ¬µg/m¬≥</p>
            <p><b>AQI:</b> <span style="color: {color};">{row['aqi_category']}</span></p>
            <p><b>Main Source:</b> <span style="color: {source_color};">{row['dominant_source']}</span></p>
        </div>
        """
        
        folium.CircleMarker(
            location=[row['latitude'], row['longitude']],
            radius=radius,
            popup=folium.Popup(popup_html, max_width=220),
            color=color,
            fill=True,
            fillColor=color,
            fillOpacity=0.6,
            weight=3
        ).add_to(state_map)
        
        # Add state label
        folium.Marker(
            location=[row['latitude'], row['longitude']],
            icon=folium.DivIcon(
                html=f'<div style="font-size: 10px; font-weight: bold; color: #333;">{row["state"][:10]}</div>'
            )
        ).add_to(state_map)
    
    print("‚úÖ State-Level Map created")
    state_map
else:
    print("‚ö†Ô∏è Folium not available")

‚úÖ State-Level Map created


## 9. Interactive Clustered Marker Map

In [29]:
if FOLIUM_AVAILABLE:
    # Create clustered map for better performance with many markers
    cluster_map = folium.Map(
        location=INDIA_CENTER,
        zoom_start=DEFAULT_ZOOM,
        tiles='OpenStreetMap'
    )
    
    # Create marker cluster
    marker_cluster = MarkerCluster(name="Monitoring Stations")
    
    for idx, row in location_summary.iterrows():
        source = row['dominant_source']
        color = SOURCE_COLORS.get(source, 'gray')
        
        # Create custom icon
        icon_html = f'''
        <div style="background-color: {color}; 
                    width: 30px; height: 30px; 
                    border-radius: 50%; 
                    display: flex; 
                    align-items: center; 
                    justify-content: center;
                    border: 2px solid white;
                    box-shadow: 0 2px 5px rgba(0,0,0,0.3);">
            <span style="color: white; font-size: 12px; font-weight: bold;">
                {source[0]}
            </span>
        </div>
        '''
        
        popup_html = f"""
        <div style="font-family: Arial; width: 220px;">
            <h4 style="margin: 0 0 10px 0; color: {color};">{row['location_name']}</h4>
            <table style="width: 100%; font-size: 12px;">
                <tr><td><b>State</b></td><td>{row['state']}</td></tr>
                <tr><td><b>District</b></td><td>{row['district']}</td></tr>
                <tr><td><b>Source</b></td><td style="color: {color};">{source}</td></tr>
                <tr><td><b>AQI</b></td><td>{row['aqi_category']}</td></tr>
                <tr><td><b>PM2.5</b></td><td>{row['avg_pm25']:.1f} ¬µg/m¬≥</td></tr>
                <tr><td><b>PM10</b></td><td>{row['avg_pm10']:.1f} ¬µg/m¬≥</td></tr>
                <tr><td><b>NO‚ÇÇ</b></td><td>{row['avg_no2']:.1f} ¬µg/m¬≥</td></tr>
                <tr><td><b>Temp</b></td><td>{row['avg_temp']:.1f}¬∞C</td></tr>
                <tr><td><b>Humidity</b></td><td>{row['avg_humidity']:.1f}%</td></tr>
            </table>
        </div>
        """
        
        folium.Marker(
            location=[row['latitude'], row['longitude']],
            popup=folium.Popup(popup_html, max_width=250),
            icon=folium.DivIcon(html=icon_html, icon_size=(30, 30), icon_anchor=(15, 15))
        ).add_to(marker_cluster)
    
    marker_cluster.add_to(cluster_map)
    
    # Add layer control
    folium.LayerControl().add_to(cluster_map)
    
    print("‚úÖ Clustered Marker Map created")
    cluster_map
else:
    print("‚ö†Ô∏è Folium not available")

‚úÖ Clustered Marker Map created


## 10. Comprehensive Dashboard Map

In [30]:
if FOLIUM_AVAILABLE:
    # Create comprehensive dashboard map with multiple layers
    dashboard_map = folium.Map(
        location=INDIA_CENTER,
        zoom_start=DEFAULT_ZOOM,
        tiles=None  # We'll add custom tile layers
    )
    
    # Add multiple tile layers
    folium.TileLayer('CartoDB positron', name='Light').add_to(dashboard_map)
    folium.TileLayer('CartoDB dark_matter', name='Dark').add_to(dashboard_map)
    folium.TileLayer('OpenStreetMap', name='Street').add_to(dashboard_map)
    
    # Layer 1: Heatmap
    heatmap_layer = folium.FeatureGroup(name='üå°Ô∏è Pollution Heatmap')
    heat_data = location_summary[['latitude', 'longitude', 'avg_severity']].values.tolist()
    HeatMap(
        heat_data,
        min_opacity=0.3,
        radius=20,
        blur=15,
        gradient={0.2: 'blue', 0.4: 'lime', 0.6: 'yellow', 0.8: 'orange', 1: 'red'}
    ).add_to(heatmap_layer)
    heatmap_layer.add_to(dashboard_map)
    
    # Layer 2: Source markers
    source_layer = folium.FeatureGroup(name='üìç Source Markers', show=False)
    for idx, row in location_summary.iterrows():
        source = row['dominant_source']
        color = SOURCE_COLORS.get(source, '#95a5a6')
        folium.CircleMarker(
            location=[row['latitude'], row['longitude']],
            radius=6,
            popup=f"{row['location_name']}: {source}",
            color=color,
            fill=True,
            fillColor=color,
            fillOpacity=0.7
        ).add_to(source_layer)
    source_layer.add_to(dashboard_map)
    
    # Layer 3: AQI risk zones
    risk_layer = folium.FeatureGroup(name='‚ö†Ô∏è Risk Zones', show=False)
    high_risk = location_summary[location_summary['aqi_category'].isin(['Poor', 'Very Poor', 'Severe'])]
    for idx, row in high_risk.iterrows():
        color = AQI_COLORS.get(row['aqi_category'], '#ff0000')
        folium.Circle(
            location=[row['latitude'], row['longitude']],
            radius=15000,  # 15km radius
            popup=f"{row['location_name']}: {row['aqi_category']}",
            color=color,
            fill=True,
            fillColor=color,
            fillOpacity=0.3
        ).add_to(risk_layer)
    risk_layer.add_to(dashboard_map)
    
    # Add layer control
    folium.LayerControl(collapsed=False).add_to(dashboard_map)
    
    # Add comprehensive legend
    legend_html = '''
    <div style="position: fixed; 
                bottom: 20px; left: 20px;
                background-color: white; padding: 15px;
                border-radius: 8px; box-shadow: 0 2px 10px rgba(0,0,0,0.3);
                z-index: 9999; font-family: Arial; font-size: 12px;
                max-height: 400px; overflow-y: auto;">
        <h4 style="margin: 0 0 10px 0;">üó∫Ô∏è EnviroScan Dashboard</h4>
        
        <p style="margin: 10px 0 5px 0; font-weight: bold;">Pollution Sources:</p>
        <p style="margin: 2px 0;"><span style="color: #e74c3c;">‚óè</span> Industrial</p>
        <p style="margin: 2px 0;"><span style="color: #3498db;">‚óè</span> Vehicular</p>
        <p style="margin: 2px 0;"><span style="color: #2ecc71;">‚óè</span> Agricultural</p>
        <p style="margin: 2px 0;"><span style="color: #9b59b6;">‚óè</span> Natural</p>
        <p style="margin: 2px 0;"><span style="color: #f39c12;">‚óè</span> Burning</p>
        
        <p style="margin: 10px 0 5px 0; font-weight: bold;">AQI Categories:</p>
        <p style="margin: 2px 0;"><span style="color: #00e400;">‚óè</span> Good</p>
        <p style="margin: 2px 0;"><span style="color: #ffff00;">‚óè</span> Moderate</p>
        <p style="margin: 2px 0;"><span style="color: #ff7e00;">‚óè</span> Poor</p>
        <p style="margin: 2px 0;"><span style="color: #ff0000;">‚óè</span> Very Poor</p>
        <p style="margin: 2px 0;"><span style="color: #99004c;">‚óè</span> Severe</p>
    </div>
    '''
    dashboard_map.get_root().html.add_child(folium.Element(legend_html))
    
    print("‚úÖ Comprehensive Dashboard Map created")
    dashboard_map
else:
    print("‚ö†Ô∏è Folium not available")

‚úÖ Comprehensive Dashboard Map created


## 11. Filter Functions for Dashboard

In [31]:
def create_filtered_map(data, filter_state=None, filter_source=None, filter_aqi=None):
    """
    Create a filtered map based on user selections
    
    Parameters:
    - filter_state: State name to filter
    - filter_source: Pollution source to filter
    - filter_aqi: AQI category to filter
    """
    filtered = data.copy()
    
    if filter_state:
        filtered = filtered[filtered['state'] == filter_state]
    if filter_source:
        filtered = filtered[filtered['dominant_source'] == filter_source]
    if filter_aqi:
        filtered = filtered[filtered['aqi_category'] == filter_aqi]
    
    if len(filtered) == 0:
        print("‚ö†Ô∏è No data matching filters")
        return None
    
    # Calculate center
    center_lat = filtered['latitude'].mean()
    center_lon = filtered['longitude'].mean()
    
    # Create map
    filtered_map = folium.Map(
        location=[center_lat, center_lon],
        zoom_start=7,
        tiles='CartoDB positron'
    )
    
    # Add markers
    for idx, row in filtered.iterrows():
        source = row['dominant_source']
        color = SOURCE_COLORS.get(source, '#95a5a6')
        
        popup_html = f"""
        <b>{row['location_name']}</b><br>
        State: {row['state']}<br>
        Source: {source}<br>
        PM2.5: {row['avg_pm25']:.1f}<br>
        AQI: {row['aqi_category']}
        """
        
        folium.CircleMarker(
            location=[row['latitude'], row['longitude']],
            radius=8,
            popup=popup_html,
            color=color,
            fill=True,
            fillColor=color,
            fillOpacity=0.7
        ).add_to(filtered_map)
    
    # Add title
    filter_text = []
    if filter_state:
        filter_text.append(f"State: {filter_state}")
    if filter_source:
        filter_text.append(f"Source: {filter_source}")
    if filter_aqi:
        filter_text.append(f"AQI: {filter_aqi}")
    
    title = " | ".join(filter_text) if filter_text else "All Data"
    
    title_html = f'''
    <div style="position: fixed; top: 10px; left: 50px; 
                background-color: white; padding: 10px;
                border-radius: 5px; box-shadow: 0 2px 5px rgba(0,0,0,0.3);
                z-index: 9999;">
        <b>Filter:</b> {title}<br>
        <small>Showing {len(filtered)} locations</small>
    </div>
    '''
    filtered_map.get_root().html.add_child(folium.Element(title_html))
    
    return filtered_map

print("‚úÖ Filter function defined")
print("\nüìä Available Filters:")
print(f"   States: {list(location_summary['state'].unique())}")
print(f"   Sources: {list(location_summary['dominant_source'].unique())}")
print(f"   AQI Categories: {list(location_summary['aqi_category'].unique())}")

‚úÖ Filter function defined

üìä Available Filters:
   States: ['Haryana', 'Andhra Pradesh', 'Punjab', 'Jharkhand', 'Rajasthan', 'Maharashtra', 'Madhya Pradesh', 'Gujarat', 'Kerala', 'Bihar', 'Mizoram', 'Meghalaya', 'Nagaland', 'Karnataka', 'Arunachal Pradesh', 'Chhattisgarh', 'Sikkim', 'Odisha', 'Assam']
   Sources: ['Vehicular', 'Agricultural', 'Industrial', 'Natural', 'Burning']
   AQI Categories: ['Very Poor', 'Moderate', 'Good', 'Poor', 'Satisfactory']


In [32]:
# Example: Filter by state
if FOLIUM_AVAILABLE:
    # Get the state with most locations for demo
    top_state = location_summary['state'].value_counts().index[0]
    
    print(f"üìç Showing filtered map for: {top_state}")
    filtered_demo = create_filtered_map(location_summary, filter_state=top_state)
    filtered_demo

üìç Showing filtered map for: Punjab


In [33]:
# Example: Filter by pollution source - Industrial
if FOLIUM_AVAILABLE:
    print("üìç Showing Industrial pollution sources only")
    industrial_map = create_filtered_map(location_summary, filter_source='Industrial')
    industrial_map

üìç Showing Industrial pollution sources only


## 12. Export Maps

In [34]:
# Create output directory
os.makedirs('maps', exist_ok=True)

print("üíæ EXPORTING MAPS")
print("="*50)

if FOLIUM_AVAILABLE:
    # Save all maps
    maps_to_save = {
        'pollution_heatmap.html': pollution_heatmap,
        'source_markers.html': source_map,
        'risk_zones.html': risk_map,
        'state_level.html': state_map,
        'clustered_markers.html': cluster_map,
        'dashboard_map.html': dashboard_map
    }
    
    for filename, map_obj in maps_to_save.items():
        filepath = f'maps/{filename}'
        map_obj.save(filepath)
        print(f"   ‚úÖ Saved: {filepath}")
    
    print(f"\n‚úÖ All maps exported to 'maps/' directory")
    print("\nüìÅ Files can be embedded in web dashboard using <iframe>")
else:
    print("‚ö†Ô∏è Folium not available, cannot export maps")

üíæ EXPORTING MAPS
   ‚úÖ Saved: maps/pollution_heatmap.html
   ‚úÖ Saved: maps/source_markers.html
   ‚úÖ Saved: maps/risk_zones.html
   ‚úÖ Saved: maps/state_level.html
   ‚úÖ Saved: maps/clustered_markers.html
   ‚úÖ Saved: maps/dashboard_map.html

‚úÖ All maps exported to 'maps/' directory

üìÅ Files can be embedded in web dashboard using <iframe>


In [35]:
# Export location data for dashboard
location_summary.to_csv('data/location_summary.csv', index=False)
print("‚úÖ Location summary exported to data/location_summary.csv")

# Export as GeoJSON for web mapping
geojson_data = {
    "type": "FeatureCollection",
    "features": []
}

for idx, row in location_summary.iterrows():
    feature = {
        "type": "Feature",
        "geometry": {
            "type": "Point",
            "coordinates": [row['longitude'], row['latitude']]
        },
        "properties": {
            "location_id": row['location_id'],
            "location_name": row['location_name'],
            "state": row['state'],
            "district": row['district'],
            "dominant_source": row['dominant_source'],
            "aqi_category": row['aqi_category'],
            "avg_pm25": round(row['avg_pm25'], 2),
            "avg_pm10": round(row['avg_pm10'], 2),
            "avg_severity": round(row['avg_severity'], 2)
        }
    }
    geojson_data['features'].append(feature)

with open('maps/locations.geojson', 'w') as f:
    json.dump(geojson_data, f, indent=2)

print("‚úÖ GeoJSON exported to maps/locations.geojson")

‚úÖ Location summary exported to data/location_summary.csv
‚úÖ GeoJSON exported to maps/locations.geojson


## 13. Summary

In [36]:
print("\n" + "="*70)
print("üéâ GEOSPATIAL VISUALIZATION COMPLETE!")
print("="*70)

print(f"\nüìä DATA SUMMARY:")
print(f"   ‚Ä¢ Total Records: {len(df):,}")
print(f"   ‚Ä¢ Unique Locations: {len(location_summary)}")
print(f"   ‚Ä¢ States Covered: {df['state'].nunique()}")
print(f"   ‚Ä¢ Districts Covered: {df['district'].nunique()}")

print(f"\nüó∫Ô∏è MAPS CREATED:")
print(f"   1. Pollution Heatmap - Intensity visualization")
print(f"   2. Source Markers - Color-coded by pollution source")
print(f"   3. Risk Zones - AQI-based gradient visualization")
print(f"   4. State Level - Aggregated state view")
print(f"   5. Clustered Markers - Performance-optimized")
print(f"   6. Dashboard Map - Multi-layer comprehensive view")

print(f"\nüíæ EXPORTED FILES:")
print(f"   ‚Ä¢ maps/pollution_heatmap.html")
print(f"   ‚Ä¢ maps/source_markers.html")
print(f"   ‚Ä¢ maps/risk_zones.html")
print(f"   ‚Ä¢ maps/state_level.html")
print(f"   ‚Ä¢ maps/clustered_markers.html")
print(f"   ‚Ä¢ maps/dashboard_map.html")
print(f"   ‚Ä¢ maps/locations.geojson")
print(f"   ‚Ä¢ data/location_summary.csv")

print(f"\nüåê WEB INTEGRATION:")
print(f"   Use <iframe src='maps/dashboard_map.html'></iframe> to embed")

print("\n" + "="*70)
print("‚úÖ Ready for dashboard integration!")
print("="*70)


üéâ GEOSPATIAL VISUALIZATION COMPLETE!

üìä DATA SUMMARY:
   ‚Ä¢ Total Records: 106,369
   ‚Ä¢ Unique Locations: 49
   ‚Ä¢ States Covered: 19
   ‚Ä¢ Districts Covered: 49

üó∫Ô∏è MAPS CREATED:
   1. Pollution Heatmap - Intensity visualization
   2. Source Markers - Color-coded by pollution source
   3. Risk Zones - AQI-based gradient visualization
   4. State Level - Aggregated state view
   5. Clustered Markers - Performance-optimized
   6. Dashboard Map - Multi-layer comprehensive view

üíæ EXPORTED FILES:
   ‚Ä¢ maps/pollution_heatmap.html
   ‚Ä¢ maps/source_markers.html
   ‚Ä¢ maps/risk_zones.html
   ‚Ä¢ maps/state_level.html
   ‚Ä¢ maps/clustered_markers.html
   ‚Ä¢ maps/dashboard_map.html
   ‚Ä¢ maps/locations.geojson
   ‚Ä¢ data/location_summary.csv

üåê WEB INTEGRATION:
   Use <iframe src='maps/dashboard_map.html'></iframe> to embed

‚úÖ Ready for dashboard integration!
