In [5]:
!pip install folium

Collecting folium
  Using cached folium-0.20.0-py2.py3-none-any.whl.metadata (4.2 kB)
Collecting branca>=0.6.0 (from folium)
  Using cached branca-0.8.2-py3-none-any.whl.metadata (1.7 kB)
Collecting jinja2>=2.9 (from folium)
  Using cached jinja2-3.1.6-py3-none-any.whl.metadata (2.9 kB)
Collecting requests (from folium)
  Using cached requests-2.32.5-py3-none-any.whl.metadata (4.9 kB)
Collecting xyzservices (from folium)
  Using cached xyzservices-2025.11.0-py3-none-any.whl.metadata (4.3 kB)
Collecting MarkupSafe>=2.0 (from jinja2>=2.9->folium)
  Downloading markupsafe-3.0.3-cp312-cp312-win_amd64.whl.metadata (2.8 kB)
Collecting charset_normalizer<4,>=2 (from requests->folium)
  Using cached charset_normalizer-3.4.4-cp312-cp312-win_amd64.whl.metadata (38 kB)
Collecting idna<4,>=2.5 (from requests->folium)
  Using cached idna-3.11-py3-none-any.whl.metadata (8.4 kB)
Collecting urllib3<3,>=1.21.1 (from requests->folium)
  Using cached urllib3-2.6.2-py3-none-any.whl.metadata (6.6 kB)
Colle

In [6]:
# Geospatial Mapping and Heatmap Visualization
# ================================================
# Module 5: Interactive pollution mapping with Folium

# =================================================
# 1. IMPORTS
# =================================================
import pandas as pd
import numpy as np
import folium
from folium import plugins
from folium.plugins import HeatMap, MarkerCluster, FastMarkerCluster
import joblib
import warnings
warnings.filterwarnings('ignore')
from datetime import datetime
import json

# =================================================
# 2. LOAD DATA AND MODELS
# =================================================

# Load the dataset
DATA_PATH = r"D:\\codes\\brain tumor project yz\\infosys\\Data_set\\Main_data_set\\data_for_training.csv"
df = pd.read_csv(DATA_PATH, encoding="latin1")

# Clean column names
df.columns = (
    df.columns
    .str.strip()
    .str.lower()
    .str.replace(" ", "_")
    .str.replace("(", "")
    .str.replace(")", "")
    .str.replace("¬∞", "")
)

print(f"‚úÖ Dataset loaded: {df.shape}")

# Load trained models and metadata
try:
    rf_model = joblib.load("random_forest_pollution.pkl")
    metadata = joblib.load("model_metadata.pkl")
    print("‚úÖ Models and metadata loaded successfully")
except:
    print("‚ö†Ô∏è Models not found. Using existing predictions from dataset.")
    metadata = {'label_mapping': {0: 'Industrial', 1: 'Vehicular', 2: 'Residential'}}

# =================================================
# 3. DATA PREPARATION
# =================================================

# Ensure we have necessary columns
required_cols = ['latitude', 'longitude', 'pollution_source', 'aqi_value']
df_map = df[required_cols + ['city', 'timestamp'] if 'timestamp' in df.columns else required_cols + ['city']].copy()

# Handle missing values
df_map = df_map.dropna(subset=['latitude', 'longitude', 'aqi_value'])

# Parse timestamp if available
if 'timestamp' in df_map.columns:
    df_map['timestamp'] = pd.to_datetime(df_map['timestamp'], errors='coerce')
    df_map['date'] = df_map['timestamp'].dt.date
    df_map['month'] = df_map['timestamp'].dt.month
    df_map['year'] = df_map['timestamp'].dt.year
else:
    df_map['date'] = 'Unknown'

# Map pollution sources to readable names
source_mapping = {
    0: 'Industrial',
    1: 'Vehicular', 
    2: 'Residential'
}
df_map['source_name'] = df_map['pollution_source'].map(source_mapping)

# Define AQI severity categories
def get_aqi_category(aqi):
    if aqi <= 50:
        return 'Good', '#00e400'
    elif aqi <= 100:
        return 'Moderate', '#ffff00'
    elif aqi <= 150:
        return 'Unhealthy for Sensitive Groups', '#ff7e00'
    elif aqi <= 200:
        return 'Unhealthy', '#ff0000'
    elif aqi <= 300:
        return 'Very Unhealthy', '#8f3f97'
    else:
        return 'Hazardous', '#7e0023'

df_map['aqi_category'], df_map['aqi_color'] = zip(*df_map['aqi_value'].apply(get_aqi_category))

print(f"‚úÖ Prepared {len(df_map)} locations for mapping")
print(f"\nSource distribution:")
print(df_map['source_name'].value_counts())

# =================================================
# 4. ICON AND COLOR MAPPING
# =================================================

# Define markers for each pollution source
source_icons = {
    'Industrial': {
        'icon': 'industry',
        'color': 'red',
        'prefix': 'fa'
    },
    'Vehicular': {
        'icon': 'car',
        'color': 'blue',
        'prefix': 'fa'
    },
    'Residential': {
        'icon': 'home',
        'color': 'green',
        'prefix': 'fa'
    }
}

# =================================================
# 5. CREATE BASE MAP FUNCTION
# =================================================

def create_base_map(center_lat=None, center_lon=None, zoom=6):
    """Create a base folium map"""
    
    if center_lat is None or center_lon is None:
        center_lat = df_map['latitude'].mean()
        center_lon = df_map['longitude'].mean()
    
    # Create map with multiple tile layers
    m = folium.Map(
        location=[center_lat, center_lon],
        zoom_start=zoom,
        tiles='OpenStreetMap'
    )
    
    # Add alternative tile layers
    folium.TileLayer('CartoDB positron', name='Light Map').add_to(m)
    folium.TileLayer('CartoDB dark_matter', name='Dark Map').add_to(m)
    
    return m

# =================================================
# 6. CREATE POLLUTION HEATMAP
# =================================================

def create_pollution_heatmap(data, save_path='pollution_heatmap.html'):
    """
    Create an interactive pollution heatmap
    """
    
    print("\nüó∫Ô∏è Creating Pollution Heatmap...")
    
    # Create base map
    m = create_base_map()
    
    # Prepare heatmap data (latitude, longitude, weight=AQI)
    heat_data = [
        [row['latitude'], row['longitude'], row['aqi_value']] 
        for idx, row in data.iterrows()
    ]
    
    # Add heatmap layer
    HeatMap(
        heat_data,
        name='Pollution Heatmap',
        min_opacity=0.3,
        max_opacity=0.8,
        radius=15,
        blur=20,
        gradient={
            0.0: 'blue',
            0.3: 'lime',
            0.5: 'yellow',
            0.7: 'orange',
            1.0: 'red'
        }
    ).add_to(m)
    
    # Add layer control
    folium.LayerControl().add_to(m)
    
    # Add title
    title_html = '''
    <div style="position: fixed; 
                top: 10px; left: 50px; width: 300px; height: 60px; 
                background-color: white; border:2px solid grey; z-index:9999; 
                font-size:16px; font-weight: bold; padding: 10px">
        <p style="margin: 0;">üåç Air Pollution Heatmap</p>
        <p style="margin: 0; font-size: 12px; font-weight: normal;">
            Red areas indicate higher pollution levels
        </p>
    </div>
    '''
    m.get_root().html.add_child(folium.Element(title_html))
    
    # Save map
    m.save(save_path)
    print(f"‚úÖ Heatmap saved to: {save_path}")
    
    return m

# =================================================
# 7. CREATE SOURCE-SPECIFIC MARKERS MAP
# =================================================

def create_source_markers_map(data, save_path='pollution_sources_map.html'):
    """
    Create map with source-specific markers
    """
    
    print("\nüìç Creating Source-Specific Markers Map...")
    
    # Create base map
    m = create_base_map()
    
    # Create feature groups for each source type
    source_groups = {}
    for source in data['source_name'].unique():
        if pd.notna(source):
            source_groups[source] = folium.FeatureGroup(name=f'{source} Sources')
    
    # Add markers for each location
    for idx, row in data.iterrows():
        source = row['source_name']
        if pd.isna(source):
            continue
            
        # Get icon details
        icon_details = source_icons.get(source, {
            'icon': 'info-sign',
            'color': 'gray',
            'prefix': 'glyphicon'
        })
        
        # Create popup content
        popup_html = f"""
        <div style="width: 200px;">
            <h4 style="margin: 0 0 10px 0; color: {icon_details['color']};">
                {source} Pollution Source
            </h4>
            <hr style="margin: 5px 0;">
            <b>City:</b> {row.get('city', 'Unknown')}<br>
            <b>AQI:</b> {row['aqi_value']:.1f}<br>
            <b>Category:</b> {row['aqi_category']}<br>
            <b>Location:</b> {row['latitude']:.4f}, {row['longitude']:.4f}<br>
            {f"<b>Date:</b> {row.get('date', 'N/A')}<br>" if 'date' in row else ""}
        </div>
        """
        
        # Create marker with custom icon
        folium.Marker(
            location=[row['latitude'], row['longitude']],
            popup=folium.Popup(popup_html, max_width=300),
            tooltip=f"{source}: AQI {row['aqi_value']:.1f}",
            icon=folium.Icon(
                color=icon_details['color'],
                icon=icon_details['icon'],
                prefix=icon_details['prefix']
            )
        ).add_to(source_groups[source])
    
    # Add all feature groups to map
    for group in source_groups.values():
        group.add_to(m)
    
    # Add layer control
    folium.LayerControl(collapsed=False).add_to(m)
    
    # Add legend
    legend_html = '''
    <div style="position: fixed; 
                bottom: 50px; right: 50px; width: 200px; 
                background-color: white; border:2px solid grey; z-index:9999; 
                font-size:14px; padding: 10px">
        <p style="margin: 0; font-weight: bold;">Pollution Sources</p>
        <hr style="margin: 5px 0;">
        <p style="margin: 5px 0;"><i class="fa fa-industry" style="color:red"></i> Industrial</p>
        <p style="margin: 5px 0;"><i class="fa fa-car" style="color:blue"></i> Vehicular</p>
        <p style="margin: 5px 0;"><i class="fa fa-home" style="color:green"></i> Residential</p>
    </div>
    '''
    m.get_root().html.add_child(folium.Element(legend_html))
    
    # Save map
    m.save(save_path)
    print(f"‚úÖ Source markers map saved to: {save_path}")
    
    return m

# =================================================
# 8. CREATE COMBINED MAP WITH BOTH HEATMAP AND MARKERS
# =================================================

def create_combined_map(data, save_path='combined_pollution_map.html'):
    """
    Create comprehensive map with heatmap and source markers
    """
    
    print("\nüåê Creating Combined Map...")
    
    # Create base map
    m = create_base_map()
    
    # Add heatmap layer
    heat_data = [
        [row['latitude'], row['longitude'], row['aqi_value']] 
        for idx, row in data.iterrows()
    ]
    
    HeatMap(
        heat_data,
        name='Pollution Heatmap',
        min_opacity=0.2,
        max_opacity=0.6,
        radius=15,
        blur=20,
        gradient={
            0.0: 'blue',
            0.3: 'lime',
            0.5: 'yellow',
            0.7: 'orange',
            1.0: 'red'
        },
        show=True
    ).add_to(m)
    
    # Create marker clusters for each source type
    for source in data['source_name'].unique():
        if pd.isna(source):
            continue
        
        source_data = data[data['source_name'] == source]
        icon_details = source_icons.get(source, {
            'icon': 'info-sign',
            'color': 'gray',
            'prefix': 'glyphicon'
        })
        
        # Create marker cluster for this source
        marker_cluster = MarkerCluster(
            name=f'{source} Sources',
            show=True
        ).add_to(m)
        
        # Add markers
        for idx, row in source_data.iterrows():
            popup_html = f"""
            <div style="width: 250px;">
                <h4 style="margin: 0 0 10px 0; color: {icon_details['color']};">
                    {source} Pollution
                </h4>
                <hr style="margin: 5px 0;">
                <b>City:</b> {row.get('city', 'Unknown')}<br>
                <b>AQI Value:</b> <span style="color: {row['aqi_color']}; font-weight: bold;">
                    {row['aqi_value']:.1f}
                </span><br>
                <b>AQI Category:</b> {row['aqi_category']}<br>
                <b>Coordinates:</b> ({row['latitude']:.4f}, {row['longitude']:.4f})<br>
                {f"<b>Date:</b> {row.get('date', 'N/A')}<br>" if 'date' in row else ""}
            </div>
            """
            
            folium.Marker(
                location=[row['latitude'], row['longitude']],
                popup=folium.Popup(popup_html, max_width=300),
                tooltip=f"{source}: AQI {row['aqi_value']:.1f}",
                icon=folium.Icon(
                    color=icon_details['color'],
                    icon=icon_details['icon'],
                    prefix=icon_details['prefix']
                )
            ).add_to(marker_cluster)
    
    # Add layer control
    folium.LayerControl(collapsed=False).add_to(m)
    
    # Add comprehensive legend
    legend_html = '''
    <div style="position: fixed; 
                bottom: 50px; right: 50px; width: 250px; 
                background-color: white; border:2px solid grey; z-index:9999; 
                font-size:13px; padding: 15px; border-radius: 5px;">
        <h4 style="margin: 0 0 10px 0;">Legend</h4>
        <hr style="margin: 5px 0;">
        
        <p style="margin: 5px 0; font-weight: bold;">Pollution Sources:</p>
        <p style="margin: 3px 0;"><i class="fa fa-industry" style="color:red"></i> Industrial</p>
        <p style="margin: 3px 0;"><i class="fa fa-car" style="color:blue"></i> Vehicular</p>
        <p style="margin: 3px 0;"><i class="fa fa-home" style="color:green"></i> Residential</p>
        
        <hr style="margin: 10px 0;">
        <p style="margin: 5px 0; font-weight: bold;">AQI Categories:</p>
        <p style="margin: 3px 0;"><span style="color: #00e400;">‚óè</span> Good (0-50)</p>
        <p style="margin: 3px 0;"><span style="color: #ffff00;">‚óè</span> Moderate (51-100)</p>
        <p style="margin: 3px 0;"><span style="color: #ff7e00;">‚óè</span> Unhealthy for Sensitive (101-150)</p>
        <p style="margin: 3px 0;"><span style="color: #ff0000;">‚óè</span> Unhealthy (151-200)</p>
        <p style="margin: 3px 0;"><span style="color: #8f3f97;">‚óè</span> Very Unhealthy (201-300)</p>
        <p style="margin: 3px 0;"><span style="color: #7e0023;">‚óè</span> Hazardous (301+)</p>
    </div>
    '''
    m.get_root().html.add_child(folium.Element(legend_html))
    
    # Add title
    title_html = '''
    <div style="position: fixed; 
                top: 10px; left: 50px; width: 400px; 
                background-color: white; border:2px solid grey; z-index:9999; 
                font-size:16px; font-weight: bold; padding: 15px; border-radius: 5px;">
        <h3 style="margin: 0;">üåç Air Pollution Monitoring Dashboard</h3>
        <p style="margin: 5px 0; font-size: 12px; font-weight: normal;">
            Interactive map showing pollution sources and severity levels
        </p>
    </div>
    '''
    m.get_root().html.add_child(folium.Element(title_html))
    
    # Save map
    m.save(save_path)
    print(f"‚úÖ Combined map saved to: {save_path}")
    
    return m

# =================================================
# 9. CREATE FILTERED MAPS
# =================================================

def create_filtered_map(data, source_filter=None, city_filter=None, 
                       aqi_threshold=None, date_filter=None,
                       save_path='filtered_pollution_map.html'):
    """
    Create map with applied filters
    """
    
    print(f"\nüîç Creating Filtered Map...")
    
    # Apply filters
    filtered_data = data.copy()
    
    if source_filter:
        filtered_data = filtered_data[filtered_data['source_name'] == source_filter]
        print(f"   Filtered by source: {source_filter}")
    
    if city_filter:
        filtered_data = filtered_data[filtered_data['city'] == city_filter]
        print(f"   Filtered by city: {city_filter}")
    
    if aqi_threshold:
        filtered_data = filtered_data[filtered_data['aqi_value'] >= aqi_threshold]
        print(f"   Filtered by AQI >= {aqi_threshold}")
    
    if date_filter and 'date' in filtered_data.columns:
        filtered_data = filtered_data[filtered_data['date'] == date_filter]
        print(f"   Filtered by date: {date_filter}")
    
    print(f"   Locations after filtering: {len(filtered_data)}")
    
    if len(filtered_data) == 0:
        print("‚ö†Ô∏è No data matches the filters!")
        return None
    
    # Create map with filtered data
    return create_combined_map(filtered_data, save_path)

# =================================================
# 10. CREATE HIGH-RISK ZONES MAP
# =================================================

def create_high_risk_zones_map(data, risk_threshold=150, 
                               save_path='high_risk_zones.html'):
    """
    Highlight high-risk pollution zones
    """
    
    print(f"\n‚ö†Ô∏è Creating High-Risk Zones Map (AQI >= {risk_threshold})...")
    
    # Filter high-risk locations
    high_risk = data[data['aqi_value'] >= risk_threshold].copy()
    
    print(f"   Found {len(high_risk)} high-risk locations")
    
    if len(high_risk) == 0:
        print("‚úÖ No high-risk zones found!")
        return None
    
    # Create base map
    m = create_base_map()
    
    # Add intense heatmap for high-risk zones
    heat_data = [
        [row['latitude'], row['longitude'], row['aqi_value']] 
        for idx, row in high_risk.iterrows()
    ]
    
    HeatMap(
        heat_data,
        name='High-Risk Zones',
        min_opacity=0.5,
        max_opacity=0.9,
        radius=20,
        blur=15,
        gradient={
            0.0: 'orange',
            0.5: 'red',
            1.0: 'darkred'
        }
    ).add_to(m)
    
    # Add warning markers for critical locations
    critical = high_risk[high_risk['aqi_value'] >= 200]
    
    for idx, row in critical.iterrows():
        folium.CircleMarker(
            location=[row['latitude'], row['longitude']],
            radius=10,
            popup=f"""
                <b>‚ö†Ô∏è CRITICAL POLLUTION ZONE</b><br>
                AQI: {row['aqi_value']:.1f}<br>
                Source: {row['source_name']}<br>
                City: {row.get('city', 'Unknown')}
            """,
            color='darkred',
            fill=True,
            fillColor='red',
            fillOpacity=0.7
        ).add_to(m)
    
    # Add warning box
    warning_html = '''
    <div style="position: fixed; 
                top: 10px; left: 50px; width: 350px; 
                background-color: #ffcccc; border:3px solid red; z-index:9999; 
                font-size:14px; padding: 15px; border-radius: 5px;">
        <h3 style="margin: 0; color: red;">‚ö†Ô∏è HIGH-RISK POLLUTION ZONES</h3>
        <p style="margin: 5px 0;">
            Areas with AQI ‚â• 150 (Unhealthy levels)<br>
            Immediate action recommended
        </p>
    </div>
    '''
    m.get_root().html.add_child(folium.Element(warning_html))
    
    folium.LayerControl().add_to(m)
    m.save(save_path)
    print(f"‚úÖ High-risk zones map saved to: {save_path}")
    
    return m

# =================================================
# 11. GENERATE ALL MAPS
# =================================================

print("\n" + "="*60)
print("GENERATING GEOSPATIAL VISUALIZATIONS")
print("="*60)

# 1. Basic Heatmap
map1 = create_pollution_heatmap(df_map)

# 2. Source Markers Map
map2 = create_source_markers_map(df_map)

# 3. Combined Map (Main Dashboard Map)
map3 = create_combined_map(df_map, save_path='main_dashboard_map.html')

# 4. High-Risk Zones
map4 = create_high_risk_zones_map(df_map, risk_threshold=150)

# 5. Example: Filtered by Industrial Sources
map5 = create_filtered_map(
    df_map, 
    source_filter='Industrial',
    save_path='industrial_pollution_map.html'
)

# 6. Example: High AQI locations
map6 = create_filtered_map(
    df_map,
    aqi_threshold=100,
    save_path='high_aqi_locations.html'
)

print("\n" + "="*60)
print("‚úÖ ALL MAPS GENERATED SUCCESSFULLY!")
print("="*60)
print("\nGenerated maps:")
print("1. pollution_heatmap.html - Basic pollution heatmap")
print("2. pollution_sources_map.html - Source-specific markers")
print("3. main_dashboard_map.html - Combined view (recommended for dashboard)")
print("4. high_risk_zones.html - High-risk pollution zones")
print("5. industrial_pollution_map.html - Industrial sources only")
print("6. high_aqi_locations.html - Locations with AQI > 100")

# =================================================
# 12. CREATE SUMMARY STATISTICS
# =================================================

print("\n" + "="*60)
print("POLLUTION STATISTICS SUMMARY")
print("="*60)

print(f"\nüìä Overall Statistics:")
print(f"   Total Locations: {len(df_map)}")
print(f"   Average AQI: {df_map['aqi_value'].mean():.2f}")
print(f"   Max AQI: {df_map['aqi_value'].max():.2f}")
print(f"   Min AQI: {df_map['aqi_value'].min():.2f}")

print(f"\nüè≠ By Pollution Source:")
for source in df_map['source_name'].unique():
    if pd.notna(source):
        source_data = df_map[df_map['source_name'] == source]
        print(f"   {source}:")
        print(f"      Count: {len(source_data)}")
        print(f"      Avg AQI: {source_data['aqi_value'].mean():.2f}")

print(f"\n‚ö†Ô∏è Risk Categories:")
for category in df_map['aqi_category'].unique():
    count = len(df_map[df_map['aqi_category'] == category])
    print(f"   {category}: {count} locations")

print("\n" + "="*60)
print("üéâ GEOSPATIAL VISUALIZATION MODULE COMPLETE!")
print("="*60)

‚úÖ Dataset loaded: (22264, 34)
‚ö†Ô∏è Models not found. Using existing predictions from dataset.
‚úÖ Prepared 22264 locations for mapping

Source distribution:
Series([], Name: count, dtype: int64)

GENERATING GEOSPATIAL VISUALIZATIONS

üó∫Ô∏è Creating Pollution Heatmap...
‚úÖ Heatmap saved to: pollution_heatmap.html

üìç Creating Source-Specific Markers Map...
‚úÖ Source markers map saved to: pollution_sources_map.html

üåê Creating Combined Map...
‚úÖ Combined map saved to: main_dashboard_map.html

‚ö†Ô∏è Creating High-Risk Zones Map (AQI >= 150)...
   Found 2582 high-risk locations
‚úÖ High-risk zones map saved to: high_risk_zones.html

üîç Creating Filtered Map...
   Filtered by source: Industrial
   Locations after filtering: 0
‚ö†Ô∏è No data matches the filters!

üîç Creating Filtered Map...
   Filtered by AQI >= 100
   Locations after filtering: 4070

üåê Creating Combined Map...
‚úÖ Combined map saved to: high_aqi_locations.html

‚úÖ ALL MAPS GENERATED SUCCESSFULLY!

Gen