In [11]:
# Cell 1: Setup and Initialize Everything
import sys
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import folium
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

# FIXED: Add utils to path properly
current_dir = os.getcwd()
utils_dir = os.path.join(current_dir, '..', 'utils')
if os.path.exists(utils_dir):
    sys.path.insert(0, utils_dir)
    print(f"‚úÖ Added utils to path: {utils_dir}")
else:
    print(f"‚ùå Utils directory not found at: {utils_dir}")

# Import our modules
from config import Config
from gee_auth import initialize_earth_engine
from gee_data_extractor import GEEDataExtractor
from weather_api import WeatherAPI
from soil_data_collector import SoilDataCollector

print("üåæ PUNJAB SMART CROP ADVISORY - COMPLETE DATA COLLECTION")
print("=" * 65)
print(f"üìÖ Collection Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"üéØ Goal: Collect real satellite, weather & soil data for Punjab")

# Create directories
Config.create_directories()


‚úÖ Added utils to path: c:\Users\DELL\OneDrive\Desktop\SIH2\Punjab_Crop_Advisory\notebooks\..\utils
üåæ PUNJAB SMART CROP ADVISORY - COMPLETE DATA COLLECTION
üìÖ Collection Date: 2025-09-01 02:37:22
üéØ Goal: Collect real satellite, weather & soil data for Punjab
‚úÖ All directories created


In [15]:
# Cell 2: Initialize All Systems
print("\nüîß INITIALIZING ALL SYSTEMS")
print("=" * 35)

# Initialize Google Earth Engine with your JSON
print("1Ô∏è‚É£ Google Earth Engine...")
gee_success = initialize_earth_engine()

# Initialize Weather API
print("\n2Ô∏è‚É£ Weather API System...")
weather_api = WeatherAPI()

# Test weather API
test_weather = weather_api.get_weather_for_location(30.9010, 75.8573)
weather_success = test_weather is not None
print(f"   Weather API Status: {'‚úÖ Working' if weather_success else '‚ùå Failed'}")

# Initialize Soil Data Collector
print("\n3Ô∏è‚É£ Soil Data Collection System...")
soil_collector = SoilDataCollector()

# Test soil data collection
test_soil = soil_collector.get_soilgrids_data(30.9010, 75.8573)
soil_success = test_soil is not None
print(f"   Soil Data Status: {'‚úÖ Working' if soil_success else '‚ùå Failed'}")

# Initialize GEE Data Extractor
print("\n4Ô∏è‚É£ Satellite Data Extractor...")
gee_extractor = GEEDataExtractor()

print(f"\nüìä SYSTEM STATUS:")
print(f"   üõ∞Ô∏è Google Earth Engine: {'‚úÖ' if gee_success else '‚ùå'}")
print(f"   üå§Ô∏è Weather APIs: {'‚úÖ' if weather_success else '‚ùå'}")
print(f"   üå± Soil Data: {'‚úÖ' if soil_success else '‚ùå'}")

if test_weather:
    print(f"   üå°Ô∏è Current temp in Ludhiana: {test_weather['temperature']:.1f}¬∞C")
if test_soil:
    print(f"   üß™ Sample pH for Ludhiana: {test_soil['pH']:.1f}")



üîß INITIALIZING ALL SYSTEMS
1Ô∏è‚É£ Google Earth Engine...
üõ∞Ô∏è Initializing Google Earth Engine...
üîë Service Account: airflow-earth-engine-reader@smart-crop-advisory.iam.gserviceaccount.com
üîë Project ID: smart-crop-advisory
‚úÖ Google Earth Engine initialized successfully!

2Ô∏è‚É£ Weather API System...
   Weather API Status: ‚úÖ Working

3Ô∏è‚É£ Soil Data Collection System...
‚úÖ Google Earth Engine initialized successfully!

2Ô∏è‚É£ Weather API System...
   Weather API Status: ‚úÖ Working

3Ô∏è‚É£ Soil Data Collection System...
SoilGrids API error: HTTPSConnectionPool(host='rest.soilgrids.org', port=443): Max retries exceeded with url: /soilgrids/v2.0/properties/query?lon=75.8573&lat=30.901&property=phh2o&property=soc&property=sand&property=silt&property=clay&property=nitrogen&depth=0-5cm (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x00000221ABCD6710>: Failed to resolve 'rest.soilgrids.org' ([Errno 11001] getaddrinfo failed)")), using Pu

In [14]:
# Force reload the config module
import importlib
import config
importlib.reload(config)
from config import Config

# Test Google Earth Engine Configuration
print("üîç Testing RELOADED GEE Configuration...")
print(f"Config GEE Path: {Config.GEE_SERVICE_ACCOUNT_JSON}")
print(f"File exists: {os.path.exists(Config.GEE_SERVICE_ACCOUNT_JSON)}")

# If still not found, let's manually set it
if not os.path.exists(Config.GEE_SERVICE_ACCOUNT_JSON):
    Config.GEE_SERVICE_ACCOUNT_JSON = "../config/google_earth_engine.json"
    print(f"‚úÖ Fixed path: {Config.GEE_SERVICE_ACCOUNT_JSON}")
    print(f"File exists now: {os.path.exists(Config.GEE_SERVICE_ACCOUNT_JSON)}")

# Now reload and test GEE auth
import gee_auth
importlib.reload(gee_auth)
from gee_auth import initialize_earth_engine

print("\nüõ∞Ô∏è Testing Google Earth Engine initialization...")
gee_success = initialize_earth_engine()
print(f"GEE Success: {gee_success}")

üîç Testing RELOADED GEE Configuration...
Config GEE Path: config/service-account.json
File exists: False
‚úÖ Fixed path: ../config/google_earth_engine.json
File exists now: True

üõ∞Ô∏è Testing Google Earth Engine initialization...
üõ∞Ô∏è Initializing Google Earth Engine...
üîë Service Account: airflow-earth-engine-reader@smart-crop-advisory.iam.gserviceaccount.com
üîë Project ID: smart-crop-advisory
‚úÖ Google Earth Engine initialized successfully!
GEE Success: True
‚úÖ Google Earth Engine initialized successfully!
GEE Success: True


In [16]:
# Cell 3: Create Punjab Farm Plot Network
print("\nüó∫Ô∏è CREATING PUNJAB FARM PLOT NETWORK")
print("=" * 42)

# Create farm plots across Punjab districts
farm_plots = gee_extractor.create_punjab_farm_plots(num_plots=50)

# Convert to DataFrame
plots_df = pd.DataFrame([{
    'plot_id': plot['plot_id'],
    'latitude': plot['latitude'],
    'longitude': plot['longitude'],
    'district': plot['district']
} for plot in farm_plots])

print(f"‚úÖ Created {len(plots_df)} farm plots across Punjab")

# Display geographic coverage
print(f"\nüìç Geographic Coverage:")
print(f"   Latitude: {plots_df['latitude'].min():.3f}¬∞N to {plots_df['latitude'].max():.3f}¬∞N")
print(f"   Longitude: {plots_df['longitude'].min():.3f}¬∞E to {plots_df['longitude'].max():.3f}¬∞E")

# District distribution
district_counts = plots_df['district'].value_counts()
print(f"\nüèòÔ∏è District Distribution:")
for district, count in district_counts.items():
    print(f"   {district}: {count} plots")

# Save farm plots
plots_df.to_csv('../data/raw/punjab_farm_plots.csv', index=False)
print(f"\nüíæ Saved: data/raw/punjab_farm_plots.csv")

# Display sample
print(f"\nüîç Sample Farm Plots:")
print(plots_df.head())



üó∫Ô∏è CREATING PUNJAB FARM PLOT NETWORK
‚úÖ Created 50 farm plots across Punjab districts
‚úÖ Created 50 farm plots across Punjab

üìç Geographic Coverage:
   Latitude: 30.009¬∞N to 32.021¬∞N
   Longitude: 74.625¬∞E to 76.869¬∞E

üèòÔ∏è District Distribution:
   Mohali: 9 plots
   Patiala: 8 plots
   Bathinda: 8 plots
   Jalandhar: 6 plots
   Gurdaspur: 5 plots
   Kapurthala: 5 plots
   Ludhiana: 5 plots
   Amritsar: 4 plots

üíæ Saved: data/raw/punjab_farm_plots.csv

üîç Sample Farm Plots:
  plot_id   latitude  longitude    district
0  PB_001  31.961465  75.484315   Gurdaspur
1  PB_002  31.211218  75.428835  Kapurthala
2  PB_003  31.311807  75.436676   Jalandhar
3  PB_004  30.577882  76.499115     Patiala
4  PB_005  30.715975  76.694073      Mohali


In [17]:
# Cell 4: Collect Real Satellite Data
print(f"\nüõ∞Ô∏è COLLECTING REAL SATELLITE DATA (SENTINEL-2)")
print("=" * 52)

# Define date range for satellite data
end_date = '2024-08-31'
start_date = '2024-06-01'  # Growing season

print(f"üìÖ Satellite Data Period: {start_date} to {end_date}")
print(f"üéØ Processing {len(farm_plots)} farm plots...")

satellite_data = []
successful_extractions = 0

for idx, plot in enumerate(farm_plots):
    plot_id = plot['plot_id']
    geometry = plot['geometry']
    
    print(f"üîÑ Processing {idx+1}/{len(farm_plots)}: {plot_id}", end=" ... ")
    
    try:
        # Extract real satellite data
        sat_data = gee_extractor.extract_ndvi_sentinel2(geometry, start_date, end_date)
        
        # Add plot information
        plot_data = {
            'plot_id': plot_id,
            'latitude': plot['latitude'],
            'longitude': plot['longitude'],
            'district': plot['district'],
            **sat_data
        }
        
        satellite_data.append(plot_data)
        
        if sat_data['data_source'] == 'Sentinel2_Real':
            successful_extractions += 1
            print("‚úÖ Real data")
        else:
            print("‚ö†Ô∏è Synthetic")
        
    except Exception as e:
        print(f"‚ùå Error: {str(e)[:30]}...")
        # Add plot with null values
        satellite_data.append({
            'plot_id': plot_id,
            'latitude': plot['latitude'],
            'longitude': plot['longitude'],
            'district': plot['district'],
            'ndvi_mean': np.nan,
            'ndwi_mean': np.nan,
            'data_source': 'Failed'
        })

# Create satellite DataFrame
satellite_df = pd.DataFrame(satellite_data)

print(f"\nüìä SATELLITE DATA COLLECTION RESULTS:")
print(f"   ‚úÖ Total processed: {len(satellite_data)}")
print(f"   üõ∞Ô∏è Real Sentinel-2: {successful_extractions}")
print(f"   üìà Average NDVI: {satellite_df['ndvi_mean'].mean():.3f}")
print(f"   üíß Average NDWI: {satellite_df['ndwi_mean'].mean():.3f}")

# Data source breakdown
source_counts = satellite_df['data_source'].value_counts()
print(f"\nüìä Data Sources:")
for source, count in source_counts.items():
    print(f"   {source}: {count} plots")

# Save satellite data
satellite_df.to_csv('../data/raw/punjab_satellite_data.csv', index=False)
print(f"\nüíæ Saved: data/raw/punjab_satellite_data.csv")

# Display sample
print(f"\nüîç Sample Satellite Data:")
display_cols = ['plot_id', 'district', 'ndvi_mean', 'ndwi_mean', 'data_source']
print(satellite_df[display_cols].head())



üõ∞Ô∏è COLLECTING REAL SATELLITE DATA (SENTINEL-2)
üìÖ Satellite Data Period: 2024-06-01 to 2024-08-31
üéØ Processing 50 farm plots...
üîÑ Processing 1/50: PB_001 ... Sentinel-2 extraction error: 'NoneType' object is not subscriptable, using synthetic data
‚ö†Ô∏è Synthetic
üîÑ Processing 2/50: PB_002 ... Sentinel-2 extraction error: 'NoneType' object is not subscriptable, using synthetic data
‚ö†Ô∏è Synthetic
üîÑ Processing 2/50: PB_002 ... Sentinel-2 extraction error: 'NoneType' object is not subscriptable, using synthetic data
‚ö†Ô∏è Synthetic
üîÑ Processing 3/50: PB_003 ... Sentinel-2 extraction error: 'NoneType' object is not subscriptable, using synthetic data
‚ö†Ô∏è Synthetic
üîÑ Processing 3/50: PB_003 ... Sentinel-2 extraction error: 'NoneType' object is not subscriptable, using synthetic data
‚ö†Ô∏è Synthetic
üîÑ Processing 4/50: PB_004 ... Sentinel-2 extraction error: 'NoneType' object is not subscriptable, using synthetic data
‚ö†Ô∏è Synthetic
üîÑ Processing 4/50:

In [18]:
# Cell 5: Collect Weather Data for All Plots
print(f"\nüå§Ô∏è COLLECTING WEATHER DATA FOR ALL PLOTS")
print("=" * 45)

# Get weather for all plots
weather_df = weather_api.get_weather_for_multiple_locations(plots_df)

print(f"\nüìä WEATHER DATA SUMMARY:")
print(f"   üå°Ô∏è Temperature range: {weather_df['temperature'].min():.1f}¬∞C to {weather_df['temperature'].max():.1f}¬∞C")
print(f"   üíß Humidity range: {weather_df['humidity'].min():.0f}% to {weather_df['humidity'].max():.0f}%")
print(f"   üåßÔ∏è Plots with rain: {(weather_df['rainfall'] > 0).sum()}")
print(f"   üí® Avg wind speed: {weather_df['wind_speed'].mean():.1f} km/h")

# Weather data sources
weather_sources = weather_df['data_source'].value_counts()
print(f"\nüìä Weather Data Sources:")
for source, count in weather_sources.items():
    print(f"   {source}: {count} plots")

# Save weather data
weather_df.to_csv('../data/raw/punjab_weather_data.csv', index=False)
print(f"\nüíæ Saved: data/raw/punjab_weather_data.csv")

# Display sample
print(f"\nüîç Sample Weather Data:")
weather_display_cols = ['plot_id', 'temperature', 'humidity', 'rainfall', 'data_source']
print(weather_df[weather_display_cols].head())



üå§Ô∏è COLLECTING WEATHER DATA FOR ALL PLOTS
üå§Ô∏è Collecting weather data...
‚úÖ Weather data collected for 50 locations
üìä Data sources used: {'OpenWeatherMap': 50}

üìä WEATHER DATA SUMMARY:
   üå°Ô∏è Temperature range: 22.4¬∞C to 24.4¬∞C
   üíß Humidity range: 93% to 98%
   üåßÔ∏è Plots with rain: 15
   üí® Avg wind speed: 3.5 km/h

üìä Weather Data Sources:
   OpenWeatherMap: 50 plots

üíæ Saved: data/raw/punjab_weather_data.csv

üîç Sample Weather Data:
  plot_id  temperature  humidity  rainfall     data_source
0  PB_001        22.53        96      0.00  OpenWeatherMap
1  PB_002        23.27        97      1.24  OpenWeatherMap
2  PB_003        23.04        97      0.60  OpenWeatherMap
3  PB_004        23.21        96      0.00  OpenWeatherMap
4  PB_005        23.17        94      0.00  OpenWeatherMap
‚úÖ Weather data collected for 50 locations
üìä Data sources used: {'OpenWeatherMap': 50}

üìä WEATHER DATA SUMMARY:
   üå°Ô∏è Temperature range: 22.4¬∞C to 24.4¬∞C


In [19]:
# Cell 6: Collect Comprehensive Soil Data
print(f"\nüå± COLLECTING COMPREHENSIVE SOIL DATA")
print("=" * 40)

# Collect soil data for all plots
soil_df = soil_collector.collect_soil_data_for_plots(plots_df)

print(f"\nüìä SOIL DATA SUMMARY:")
print(f"   üß™ pH range: {soil_df['pH'].min():.1f} to {soil_df['pH'].max():.1f}")
print(f"   üåø Organic Carbon: {soil_df['organic_carbon'].min():.2f}% to {soil_df['organic_carbon'].max():.2f}%")
print(f"   üçÉ Nitrogen availability: {soil_df['N_available'].min():.0f} to {soil_df['N_available'].max():.0f} kg/ha")
print(f"   üè• Soil Health Status:")

health_counts = soil_df['soil_health_status'].value_counts()
for status, count in health_counts.items():
    print(f"      {status}: {count} plots")

# Soil data sources  
soil_sources = soil_df['data_source'].value_counts()
print(f"\nüìä Soil Data Sources:")
for source, count in soil_sources.items():
    print(f"   {source}: {count}")

# Save soil data
soil_df.to_csv('../data/raw/punjab_soil_data.csv', index=False)
print(f"\nüíæ Saved: data/raw/punjab_soil_data.csv")

# Display sample
print(f"\nüîç Sample Soil Data:")
soil_display_cols = ['plot_id', 'pH', 'organic_carbon', 'N_available', 'soil_health_status', 'data_source']
print(soil_df[soil_display_cols].head())



üå± COLLECTING COMPREHENSIVE SOIL DATA
üå± Collecting soil data from global sources...
   Processing PB_001... SoilGrids API error: HTTPSConnectionPool(host='rest.soilgrids.org', port=443): Max retries exceeded with url: /soilgrids/v2.0/properties/query?lon=75.48431496086756&lat=31.961464826625406&property=phh2o&property=soc&property=sand&property=silt&property=clay&property=nitrogen&depth=0-5cm (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x00000221ABF15810>: Failed to resolve 'rest.soilgrids.org' ([Errno 11001] getaddrinfo failed)")), using Punjab model
‚úÖ
   Processing PB_002... SoilGrids API error: HTTPSConnectionPool(host='rest.soilgrids.org', port=443): Max retries exceeded with url: /soilgrids/v2.0/properties/query?lon=75.42883532770341&lat=31.21121798229296&property=phh2o&property=soc&property=sand&property=silt&property=clay&property=nitrogen&depth=0-5cm (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x0000022

In [20]:
# Cell 7: Generate Realistic Crop Yield Data
print(f"\nüåæ GENERATING REALISTIC CROP YIELD DATA")
print("=" * 42)

def generate_realistic_yield_data():
    """Generate crop yield data based on satellite, weather, and soil data"""
    
    yield_data = []
    
    # Punjab major crops and their typical yields (kg/hectare)
    crop_base_yields = {
        'Wheat': 4200,
        'Rice': 5800, 
        'Cotton': 480  # Cotton in kg/hectare (fiber)
    }
    
    print("üìä Generating yield data based on:")
    print("   ‚Ä¢ Satellite NDVI (crop health)")
    print("   ‚Ä¢ Soil health status")
    print("   ‚Ä¢ Weather conditions")
    print("   ‚Ä¢ Punjab crop patterns")
    
    for _, row in plots_df.iterrows():
        plot_id = row['plot_id']
        
        # Get corresponding data
        sat_row = satellite_df[satellite_df['plot_id'] == plot_id].iloc[0]
        soil_row = soil_df[soil_df['plot_id'] == plot_id].iloc[0]
        weather_row = weather_df[weather_df['plot_id'] == plot_id].iloc[0]
        
        # Extract key factors
        ndvi = sat_row['ndvi_mean'] if pd.notna(sat_row['ndvi_mean']) else 0.6
        soil_health = soil_row['soil_health_status']
        temperature = weather_row['temperature']
        
        # Calculate influence factors
        ndvi_factor = max(0.5, min(1.5, (ndvi - 0.2) / 0.5))  # NDVI impact
        soil_factor = {'Good': 1.1, 'Medium': 1.0, 'Poor': 0.85}[soil_health]
        
        # Temperature stress (too hot reduces yield)
        temp_factor = 1.0 if temperature < 35 else max(0.8, 1.0 - (temperature - 35) * 0.02)
        
        # Generate yields for 3 years and 3 crops
        for year in [2022, 2023, 2024]:
            for crop, base_yield in crop_base_yields.items():
                
                # Seasonal variation
                year_factor = np.random.normal(1.0, 0.1)
                
                # Final yield calculation
                final_yield = (base_yield * 
                              ndvi_factor * 
                              soil_factor * 
                              temp_factor * 
                              year_factor)
                
                # Ensure minimum viable yield
                final_yield = max(base_yield * 0.4, final_yield)
                
                # Crop-specific adjustments
                if crop == 'Cotton' and temperature > 38:  # Cotton heat tolerance
                    final_yield *= 0.9
                elif crop == 'Rice' and weather_row['rainfall'] < 1:  # Rice needs water
                    final_yield *= 0.85
                
                yield_data.append({
                    'plot_id': plot_id,
                    'year': year,
                    'crop_type': crop,
                    'yield_kg_per_hectare': round(final_yield, 1),
                    'sowing_date': f'{year}-{6 if crop == "Rice" else 11}-{np.random.randint(1,20):02d}',
                    'harvest_date': f'{year if crop == "Rice" else year+1}-{10 if crop == "Rice" else 4}-{np.random.randint(1,25):02d}'
                })
    
    return pd.DataFrame(yield_data)

# Generate yield data
yield_df = generate_realistic_yield_data()

print(f"\nüìä CROP YIELD DATA SUMMARY:")
print(f"   üìà Total yield records: {len(yield_df):,}")
print(f"   üåæ Crops: {list(yield_df['crop_type'].unique())}")
print(f"   üìÖ Years: {sorted(yield_df['year'].unique())}")

# Average yields by crop
avg_yields = yield_df.groupby('crop_type')['yield_kg_per_hectare'].mean().sort_values(ascending=False)
print(f"\nüìä Average Yields (kg/hectare):")
for crop, yield_val in avg_yields.items():
    print(f"   {crop:<8}: {yield_val:>6.0f} kg/ha")

# Yield distribution by soil health
yield_soil_health = yield_df.merge(
    soil_df[['plot_id', 'soil_health_status']], 
    on='plot_id'
).groupby(['crop_type', 'soil_health_status'])['yield_kg_per_hectare'].mean()

print(f"\nüìä Yield by Soil Health (kg/ha):")
for crop in yield_df['crop_type'].unique():
    print(f"   {crop}:")
    for health in ['Good', 'Medium', 'Poor']:
        if (crop, health) in yield_soil_health:
            yield_val = yield_soil_health[(crop, health)]
            print(f"      {health}: {yield_val:.0f} kg/ha")

# Save yield data
yield_df.to_csv('../data/raw/punjab_crop_yields.csv', index=False)
print(f"\nüíæ Saved: data/raw/punjab_crop_yields.csv")



üåæ GENERATING REALISTIC CROP YIELD DATA
üìä Generating yield data based on:
   ‚Ä¢ Satellite NDVI (crop health)
   ‚Ä¢ Soil health status
   ‚Ä¢ Weather conditions
   ‚Ä¢ Punjab crop patterns

üìä CROP YIELD DATA SUMMARY:
   üìà Total yield records: 450
   üåæ Crops: ['Wheat', 'Rice', 'Cotton']
   üìÖ Years: [np.int64(2022), np.int64(2023), np.int64(2024)]

üìä Average Yields (kg/hectare):
   Rice    :   5129 kg/ha
   Wheat   :   4256 kg/ha
   Cotton  :    482 kg/ha

üìä Yield by Soil Health (kg/ha):
   Wheat:
      Good: 4951 kg/ha
      Medium: 4495 kg/ha
      Poor: 3693 kg/ha
   Rice:
      Good: 6068 kg/ha
      Medium: 5316 kg/ha
      Poor: 4425 kg/ha
   Cotton:
      Good: 558 kg/ha
      Medium: 495 kg/ha
      Poor: 426 kg/ha

üíæ Saved: data/raw/punjab_crop_yields.csv


In [22]:
# Cell 8: Create Interactive Maps and Visualizations
%pip install folium --quiet
import folium
print(f"\nüó∫Ô∏è CREATING INTERACTIVE MAPS AND VISUALIZATIONS")
print("=" * 52)
print(f"\nüó∫Ô∏è CREATING INTERACTIVE MAPS AND VISUALIZATIONS")
print("=" * 52)

def create_comprehensive_punjab_map():
    """Create comprehensive interactive map with all data layers"""
    
    # Center on Punjab
    punjab_center = [30.9, 75.8]
    
    # Create base map
    m = folium.Map(
        location=punjab_center,
        zoom_start=8,
        tiles='OpenStreetMap'
    )
    
    # Add farm plots with multi-layer information
    for _, row in satellite_df.iterrows():
        plot_id = row['plot_id']
        
        # Get corresponding data from other DataFrames
        soil_info = soil_df[soil_df['plot_id'] == plot_id].iloc[0]
        weather_info = weather_df[weather_df['plot_id'] == plot_id].iloc[0]
        
        # Color code by NDVI (crop health)
        ndvi = row['ndvi_mean']
        if pd.notna(ndvi):
            if ndvi > 0.7:
                color = 'green'
                health = 'Healthy'
            elif ndvi > 0.5:
                color = 'orange'
                health = 'Moderate'
            else:
                color = 'red'
                health = 'Stressed'
        else:
            color = 'gray'
            health = 'No Data'
        
        # Create comprehensive popup
        popup_html = f"""
        <div style='width: 300px; font-family: Arial;'>
            <h4>üåæ Farm Plot: {plot_id}</h4>
            <hr>
            <b>üìç Location:</b><br>
            ‚Ä¢ Coordinates: {row['latitude']:.4f}¬∞N, {row['longitude']:.4f}¬∞E<br>
            ‚Ä¢ District: {row['district']}<br>
            
            <hr><b>üõ∞Ô∏è Satellite Data:</b><br>
            ‚Ä¢ NDVI: {ndvi:.3f} ({health})<br>
            ‚Ä¢ NDWI: {row['ndwi_mean']:.3f}<br>
            ‚Ä¢ Data Source: {row['data_source']}<br>
            
            <hr><b>üå± Soil Health:</b><br>
            ‚Ä¢ Status: {soil_info['soil_health_status']}<br>
            ‚Ä¢ pH: {soil_info['pH']:.1f}<br>
            ‚Ä¢ Organic Carbon: {soil_info['organic_carbon']:.2f}%<br>
            ‚Ä¢ N Available: {soil_info['N_available']:.0f} kg/ha<br>
            ‚Ä¢ Data Source: {soil_info['data_source']}<br>
            
            <hr><b>üå§Ô∏è Weather Data:</b><br>
            ‚Ä¢ Temperature: {weather_info['temperature']:.1f}¬∞C<br>
            ‚Ä¢ Humidity: {weather_info['humidity']:.0f}%<br>
            ‚Ä¢ Rainfall: {weather_info['rainfall']:.1f}mm<br>
            ‚Ä¢ Data Source: {weather_info['data_source']}<br>
        </div>
        """
        
        # Add marker to map
        folium.CircleMarker(
            location=[row['latitude'], row['longitude']],
            radius=8,
            popup=folium.Popup(popup_html, max_width=350),
            color='black',
            weight=1,
            fillColor=color,
            fillOpacity=0.8
        ).add_to(m)
    
    # Add legend
    legend_html = f'''
    <div style="position: fixed; 
                bottom: 50px; left: 50px; width: 200px; height: 140px; 
                background-color: white; border:2px solid grey; z-index:9999; 
                font-size:12px; padding: 10px; box-shadow: 3px 3px 10px rgba(0,0,0,0.3);">
    <h4>üåæ Punjab Smart Crop Advisory</h4>
    <p><b>Crop Health (NDVI):</b></p>
    <p><span style="color:green;">‚óè</span> Healthy (>0.7)</p>
    <p><span style="color:orange;">‚óè</span> Moderate (0.5-0.7)</p>
    <p><span style="color:red;">‚óè</span> Stressed (<0.5)</p>
    <p><span style="color:gray;">‚óè</span> No Data</p>
    <small>üìÖ Data: {datetime.now().strftime('%Y-%m-%d')}</small>
    </div>
    '''
    m.get_root().html.add_child(folium.Element(legend_html))
    
    return m

# Create comprehensive map
print("üó∫Ô∏è Creating interactive map with all data layers...")
punjab_map = create_comprehensive_punjab_map()

# Save map
map_filename = '../data/processed/punjab_comprehensive_farm_map.html'
punjab_map.save(map_filename)

print(f"‚úÖ Interactive map created!")
print(f"üó∫Ô∏è Saved: {map_filename}")
print(f"üí° Open this file in your browser to explore the data")

# Display map in notebook
punjab_map



üó∫Ô∏è CREATING INTERACTIVE MAPS AND VISUALIZATIONS
üó∫Ô∏è Creating interactive map with all data layers...
‚úÖ Interactive map created!
üó∫Ô∏è Saved: ../data/processed/punjab_comprehensive_farm_map.html
üí° Open this file in your browser to explore the data


In [11]:
# Cell 9: Display the Interactive Map and Final Summary
print("üó∫Ô∏è DISPLAYING INTERACTIVE MAP")
print("=" * 32)

# If the map is not displaying, click "Trust" at the top of the notebook
try:
    # Re-create and display the map
    punjab_map = create_comprehensive_punjab_map()
    
    # Display in notebook
    display(punjab_map)
    
    print("‚úÖ Map should be displayed above!")
    print("üí° If you don't see the map, click 'Trust' at the top of this notebook")
    
except Exception as e:
    print(f"‚ùå Error displaying map: {e}")
    print("üí° Please click 'Trust' at the top of this notebook to enable interactive content")

# Final Summary
print(f"\nüéØ DATA COLLECTION COMPLETE - FINAL SUMMARY")
print("=" * 48)

print(f"üìä DATASETS CREATED:")
print(f"   üìÅ Raw Data:")
print(f"      ‚Ä¢ data/raw/punjab_farm_plots.csv")
print(f"      ‚Ä¢ data/raw/punjab_satellite_data.csv") 
print(f"      ‚Ä¢ data/raw/punjab_weather_data.csv")
print(f"      ‚Ä¢ data/raw/punjab_soil_data.csv")
print(f"      ‚Ä¢ data/raw/punjab_crop_yields.csv")
print(f"   üìÅ Processed Data:")
print(f"      ‚Ä¢ data/processed/punjab_comprehensive_farm_map.html")

print(f"\n‚úÖ DATA COLLECTION COMPLETE!")
print(f"üéØ Ready for Feature Engineering and Model Training!")
print(f"üìö Next: Open notebooks/02_Feature_Engineering_EDA.ipynb")

üó∫Ô∏è DISPLAYING INTERACTIVE MAP


‚úÖ Map should be displayed above!
üí° If you don't see the map, click 'Trust' at the top of this notebook

üéØ DATA COLLECTION COMPLETE - FINAL SUMMARY
üìä DATASETS CREATED:
   üìÅ Raw Data:
      ‚Ä¢ data/raw/punjab_farm_plots.csv
      ‚Ä¢ data/raw/punjab_satellite_data.csv
      ‚Ä¢ data/raw/punjab_weather_data.csv
      ‚Ä¢ data/raw/punjab_soil_data.csv
      ‚Ä¢ data/raw/punjab_crop_yields.csv
   üìÅ Processed Data:
      ‚Ä¢ data/processed/punjab_comprehensive_farm_map.html

‚úÖ DATA COLLECTION COMPLETE!
üéØ Ready for Feature Engineering and Model Training!
üìö Next: Open notebooks/02_Feature_Engineering_EDA.ipynb
