In [1]:
# OpenStreetMap Feature Extraction for Air Quality Analysis
# This script extracts nearby physical features (roads, industrial zones, dumps, agricultural fields)
# from OpenStreetMap using OSMnx library in small batches with error handling

import pandas as pd
import numpy as np
import osmnx as ox
import geopandas as gpd
from shapely.geometry import Point, box
import warnings
import time
from typing import Dict, List, Tuple
import sys

warnings.filterwarnings('ignore')

# ============================================================================
# CONFIGURATION
# ============================================================================

BATCH_SIZE = 5
SEARCH_RADIUS = 5000  # meters = 5KM
OUTPUT_FILE = "Air_Quality_with_OSM_Features_5KM.xlsx"
INPUT_FILE = "Air Quality Dataset 2021-2023.xlsx"

# ============================================================================
# FEATURE EXTRACTION FUNCTIONS
# ============================================================================

def extract_roads(lat: float, lon: float, radius: int) -> Dict:
    """Extract road features from OSM"""
    try:
        roads = ox.features_from_point(
            (lat, lon),
            tags={'highway': True},
            dist=radius
        )
        
        if len(roads) > 0:
            road_names = [roads.index.get_level_values(0).unique()[i] 
                         for i in range(min(5, len(roads.index.get_level_values(0).unique())))]
            return {
                'count': len(roads),
                'examples': ', '.join(str(r)[:40] for r in road_names),
                'status': 'success'
            }
        return {'count': 0, 'examples': '', 'status': 'success'}
    except Exception as e:
        return {'count': 0, 'examples': f"Error: {str(e)[:30]}", 'status': 'failed'}

def extract_industrial_zones(lat: float, lon: float, radius: int) -> Dict:
    """Extract industrial facilities from OSM"""
    try:
        tags = {'industrial': True, 'landuse': 'industrial'}
        industrial = ox.features_from_point((lat, lon), tags=tags, dist=radius)
        
        if len(industrial) > 0:
            ind_names = industrial.index.get_level_values(0).unique()[:5]
            return {
                'count': len(industrial),
                'examples': ', '.join(str(i)[:35] for i in ind_names),
                'status': 'success'
            }
        return {'count': 0, 'examples': '', 'status': 'success'}
    except Exception as e:
        return {'count': 0, 'examples': f"Error: {str(e)[:30]}", 'status': 'failed'}

def extract_dump_sites(lat: float, lon: float, radius: int) -> Dict:
    """Extract waste management and dump sites from OSM"""
    try:
        tags = {'waste': True}
        dumps = ox.features_from_point((lat, lon), tags=tags, dist=radius)
        
        if len(dumps) > 0:
            dump_names = dumps.index.get_level_values(0).unique()[:5]
            return {
                'count': len(dumps),
                'examples': ', '.join(str(d)[:35] for d in dump_names),
                'status': 'success'
            }
        return {'count': 0, 'examples': '', 'status': 'success'}
    except Exception as e:
        return {'count': 0, 'examples': f"Error: {str(e)[:30]}", 'status': 'failed'}

def extract_agricultural_fields(lat: float, lon: float, radius: int) -> Dict:
    """Extract agricultural areas from OSM"""
    try:
        tags = {'landuse': ['farmland', 'farm', 'agricultural', 'grass', 'meadow']}
        agriculture = ox.features_from_point((lat, lon), tags=tags, dist=radius)
        
        if len(agriculture) > 0:
            agr_names = agriculture.index.get_level_values(0).unique()[:5]
            return {
                'count': len(agriculture),
                'examples': ', '.join(str(a)[:35] for a in agr_names),
                'status': 'success'
            }
        return {'count': 0, 'examples': '', 'status': 'success'}
    except Exception as e:
        return {'count': 0, 'examples': f"Error: {str(e)[:30]}", 'status': 'failed'}

def extract_all_features(lat: float, lon: float, radius: int) -> Dict:
    """Extract all features for a location"""
    features = {
        'roads': extract_roads(lat, lon, radius),
        'industrial': extract_industrial_zones(lat, lon, radius),
        'dumps': extract_dump_sites(lat, lon, radius),
        'agriculture': extract_agricultural_fields(lat, lon, radius)
    }
    return features

# ============================================================================
# MAIN PROCESSING
# ============================================================================

def main():
    print(f"\n{'='*80}")
    print("OSM FEATURE EXTRACTION FOR AIR QUALITY ANALYSIS")
    print(f"{'='*80}\n")
    
    # Load dataset
    print(f"Loading dataset from {INPUT_FILE}...")
    df = pd.read_excel(INPUT_FILE, sheet_name=0)
    print(f"✓ Loaded {len(df)} records\n")
    
    # Get unique locations
    unique_locations = df[['Latitude', 'Longitude', 'Location', 'State / Union Territory',
                           'City / town / village']].drop_duplicates(
                           subset=['Latitude', 'Longitude']).reset_index(drop=True)
    
    print(f"Unique locations to process: {len(unique_locations)}")
    print(f"Processing parameters:")
    print(f"  - Batch size: {BATCH_SIZE}")
    print(f"  - Search radius: {SEARCH_RADIUS}m")
    print(f"\n{'='*80}\n")
    
    # Initialize results
    features_data = []
    total_batches = (len(unique_locations) - 1) // BATCH_SIZE + 1
    
    # Process in batches
    for batch_num in range(0, len(unique_locations), BATCH_SIZE):
        batch = unique_locations.iloc[batch_num:batch_num + BATCH_SIZE]
        batch_idx = (batch_num // BATCH_SIZE) + 1
        
        print(f"Batch {batch_idx}/{total_batches}:")
        print(f"Processing locations {batch_num + 1} to {min(batch_num + BATCH_SIZE, len(unique_locations))}\n")
        
        for idx, row in batch.iterrows():
            lat = row['Latitude']
            lon = row['Longitude']
            location_name = row['Location']
            city = row['City / town / village']
            state = row['State / Union Territory']
            
            try:
                # Extract features
                print(f"  Querying {city}, {state} ({lat:.4f}, {lon:.4f})...", end='', flush=True)
                features = extract_all_features(lat, lon, SEARCH_RADIUS)
                
                # Compile record
                record = {
                    'Location': location_name,
                    'City': city,
                    'State': state,
                    'Latitude': lat,
                    'Longitude': lon,
                    'Roads_count': features['roads']['count'],
                    'Roads_examples': features['roads']['examples'],
                    'Industrial_zones_count': features['industrial']['count'],
                    'Industrial_examples': features['industrial']['examples'],
                    'Dump_sites_count': features['dumps']['count'],
                    'Dump_examples': features['dumps']['examples'],
                    'Agricultural_fields_count': features['agriculture']['count'],
                    'Agricultural_examples': features['agriculture']['examples'],
                    'Query_status': 'Success'
                }
                
                features_data.append(record)
                
                print(f" ✓ Roads:{features['roads']['count']} | "
                      f"Industrial:{features['industrial']['count']} | "
                      f"Dumps:{features['dumps']['count']} | "
                      f"Agriculture:{features['agriculture']['count']}")
                
            except Exception as e:
                print(f" ✗ Error: {str(e)[:50]}")
                record = {
                    'Location': location_name,
                    'City': city,
                    'State': state,
                    'Latitude': lat,
                    'Longitude': lon,
                    'Roads_count': 0,
                    'Roads_examples': '',
                    'Industrial_zones_count': 0,
                    'Industrial_examples': '',
                    'Dump_sites_count': 0,
                    'Dump_examples': '',
                    'Agricultural_fields_count': 0,
                    'Agricultural_examples': '',
                    'Query_status': f'Failed: {str(e)[:40]}'
                }
                features_data.append(record)
            
            # Small delay between queries to avoid rate limiting
            time.sleep(0.5)
        
        print(f"\nBatch completed. Waiting before next batch...\n")
        time.sleep(2)
    
    # Create features dataframe
    features_df = pd.DataFrame(features_data)
    
    # Print summary statistics
    print(f"\n{'='*80}")
    print("EXTRACTION SUMMARY")
    print(f"{'='*80}\n")
    
    print(f"Total locations processed: {len(features_df)}")
    successful = (features_df['Query_status'] == 'Success').sum()
    print(f"Successful queries: {successful}/{len(features_df)}")
    
    print(f"\n{'Feature Statistics':^80}")
    print(f"{'-'*80}")
    print(f"{'Feature':<20} {'Total':<12} {'Mean':<12} {'Max':<12}")
    print(f"{'-'*80}")
    print(f"{'Roads':<20} {features_df['Roads_count'].sum():<12.0f} "
          f"{features_df['Roads_count'].mean():<12.1f} {features_df['Roads_count'].max():<12.0f}")
    print(f"{'Industrial':<20} {features_df['Industrial_zones_count'].sum():<12.0f} "
          f"{features_df['Industrial_zones_count'].mean():<12.1f} "
          f"{features_df['Industrial_zones_count'].max():<12.0f}")
    print(f"{'Dump Sites':<20} {features_df['Dump_sites_count'].sum():<12.0f} "
          f"{features_df['Dump_sites_count'].mean():<12.1f} {features_df['Dump_sites_count'].max():<12.0f}")
    print(f"{'Agriculture':<20} {features_df['Agricultural_fields_count'].sum():<12.0f} "
          f"{features_df['Agricultural_fields_count'].mean():<12.1f} "
          f"{features_df['Agricultural_fields_count'].max():<12.0f}")
    print(f"{'-'*80}\n")
    
    # Merge with original data
    print("Merging extracted features with original air quality data...")
    
    # Merge on latitude and longitude
    merged_df = df.merge(
        features_df[['Latitude', 'Longitude', 'Roads_count', 'Industrial_zones_count',
                     'Dump_sites_count', 'Agricultural_fields_count', 'Query_status']],
        on=['Latitude', 'Longitude'],
        how='left'
    )
    
    # Fill NaN values for unmatched records (should be minimal)
    merged_df['Roads_count'].fillna(0, inplace=True)
    merged_df['Industrial_zones_count'].fillna(0, inplace=True)
    merged_df['Dump_sites_count'].fillna(0, inplace=True)
    merged_df['Agricultural_fields_count'].fillna(0, inplace=True)
    merged_df['Query_status'].fillna('Not processed', inplace=True)
    
    # Create additional derived features
    merged_df['Urban_density_score'] = (merged_df['Roads_count'] / merged_df['Roads_count'].max()).round(2)
    merged_df['Industrial_presence'] = (merged_df['Industrial_zones_count'] > 0).astype(int)
    merged_df['Pollution_source_risk'] = (
        (merged_df['Industrial_zones_count'] * 0.4 + 
         merged_df['Dump_sites_count'] * 0.3 + 
         merged_df['Roads_count'] * 0.3) / 100
    ).round(2)
    merged_df['Green_area_ratio'] = (
        merged_df['Agricultural_fields_count'] / 
        (merged_df['Roads_count'] + merged_df['Agricultural_fields_count'])
    ).round(2)
    
    # Save to Excel
    print(f"\nSaving merged data to {OUTPUT_FILE}...")
    with pd.ExcelWriter(OUTPUT_FILE, engine='openpyxl') as writer:
        merged_df.to_excel(writer, sheet_name='All Data', index=False)
        features_df.to_excel(writer, sheet_name='OSM Features', index=False)
        
        # Summary statistics sheet
        summary_stats = pd.DataFrame({
            'Metric': ['Total Records', 'Unique Locations', 'Successful Queries',
                      'Mean Roads', 'Mean Industrial', 'Mean Dumps', 'Mean Agriculture',
                      'Max Roads', 'Max Industrial', 'Max Dumps', 'Max Agriculture'],
            'Value': [len(merged_df), len(features_df), successful,
                     f"{features_df['Roads_count'].mean():.1f}",
                     f"{features_df['Industrial_zones_count'].mean():.1f}",
                     f"{features_df['Dump_sites_count'].mean():.1f}",
                     f"{features_df['Agricultural_fields_count'].mean():.1f}",
                     features_df['Roads_count'].max(),
                     features_df['Industrial_zones_count'].max(),
                     features_df['Dump_sites_count'].max(),
                     features_df['Agricultural_fields_count'].max()]
        })
        summary_stats.to_excel(writer, sheet_name='Summary', index=False)
    
    print(f"✓ Successfully saved to {OUTPUT_FILE}\n")
    
    # Display sample results
    print(f"{'Sample Results (First 15 locations):':^80}")
    print(merged_df[['City / town / village', 'State / Union Territory', 'Roads_count',
                     'Industrial_zones_count', 'Dump_sites_count', 'Agricultural_fields_count',
                     'Pollution_source_risk', 'Green_area_ratio']].head(15))
    
    print(f"\n{'='*80}")
    print("✓ Processing Complete!")
    print(f"{'='*80}\n")
    
    return merged_df, features_df

if __name__ == "__main__":
    merged_df, features_df = main()


OSM FEATURE EXTRACTION FOR AIR QUALITY ANALYSIS

Loading dataset from Air Quality Dataset 2021-2023.xlsx...
✓ Loaded 1068 records

Unique locations to process: 544
Processing parameters:
  - Batch size: 5
  - Search radius: 5000m


Batch 1/109:
Processing locations 1 to 5

  Querying Amaravati, Andhra Pradesh (16.4942, 80.5106)... ✓ Roads:2332 | Industrial:0 | Dumps:0 | Agriculture:89
  Querying Anatapur, Andhra Pradesh (14.6824, 77.6017)... ✓ Roads:5915 | Industrial:3 | Dumps:0 | Agriculture:2
  Querying Chittor, Andhra Pradesh (13.2162, 79.1051)... ✓ Roads:1986 | Industrial:4 | Dumps:0 | Agriculture:9
  Querying Eluru, Andhra Pradesh (16.7104, 81.1154)... ✓ Roads:4515 | Industrial:2 | Dumps:0 | Agriculture:1
  Querying Guntur, Andhra Pradesh (16.2915, 80.4542)... ✓ Roads:7442 | Industrial:4 | Dumps:0 | Agriculture:37

Batch completed. Waiting before next batch...

Batch 2/109:
Processing locations 6 to 10

  Querying Kadapa, Andhra Pradesh (14.4753, 78.8217)... ✓ Roads:3546 | Indust

In [1]:
import pandas as pd
import geopandas as gpd
import osmnx as ox
from shapely.geometry import Point
import time

SEARCH_RADIUS = 5000  # 5 km
INPUT_FILE = "Air_Quality_with_OSM_Features_5KM.xlsx"  # your merged file [file:2]

# 1) Load main data and unique points
df = pd.read_excel(INPUT_FILE, sheet_name="All Data")
unique_locs = df[["Latitude", "Longitude"]].drop_duplicates().reset_index(drop=True)  # [file:2]

# 2) Empty GeoDataFrames for each feature type (WGS84)
roads_gdf   = gpd.GeoDataFrame(columns=["osm_id", "geometry"], crs="EPSG:4326")
inds_gdf    = gpd.GeoDataFrame(columns=["osm_id", "geometry"], crs="EPSG:4326")
dumps_gdf   = gpd.GeoDataFrame(columns=["osm_id", "geometry"], crs="EPSG:4326")

def append_features(gdf_base, features):
    """Append unique geometries from an OSMnx features GeoDataFrame into gdf_base."""
    if features is None or features.empty:
        return gdf_base
    tmp = features.reset_index()
    osm_ids = tmp["osmid"] if "osmid" in tmp.columns else tmp[tmp.columns[0]]
    out = gpd.GeoDataFrame(
        {
            "osm_id": osm_ids,
            "geometry": tmp["geometry"]
        },
        crs=features.crs
    )
    # drop duplicates by osm_id to avoid many repeats from overlapping buffers
    out = out.drop_duplicates(subset="osm_id")
    return pd.concat([gdf_base, out], ignore_index=True)

for i, row in unique_locs.iterrows():
    lat, lon = row["Latitude"], row["Longitude"]
    print(f"{i+1}/{len(unique_locs)}: ({lat:.4f}, {lon:.4f})")

    # Roads
    try:
        roads = ox.features_from_point((lat, lon), tags={"highway": True}, dist=SEARCH_RADIUS)
        roads_gdf = append_features(roads_gdf, roads)
    except Exception as e:
        print("  Roads error:", e)

    # Industrial zones
    try:
        ind = ox.features_from_point((lat, lon), tags={"industrial": True, "landuse": "industrial"}, dist=SEARCH_RADIUS)
        inds_gdf = append_features(inds_gdf, ind)
    except Exception as e:
        print("  Industrial error:", e)

    # Dumps/waste
    try:
        dumps = ox.features_from_point((lat, lon), tags={"waste": True}, dist=SEARCH_RADIUS)
        dumps_gdf = append_features(dumps_gdf, dumps)
    except Exception as e:
        print("  Dumps error:", e)

    time.sleep(0.5)

# 3) Drop remaining duplicate geometries by osm_id once
roads_gdf = roads_gdf.drop_duplicates(subset="osm_id").reset_index(drop=True)
inds_gdf  = inds_gdf.drop_duplicates(subset="osm_id").reset_index(drop=True)
dumps_gdf = dumps_gdf.drop_duplicates(subset="osm_id").reset_index(drop=True)

# 4) Reproject to metric CRS (EPSG:3857) for distance calculations
roads_gdf_m = roads_gdf.to_crs(3857)
inds_gdf_m  = inds_gdf.to_crs(3857)
dumps_gdf_m = dumps_gdf.to_crs(3857)

# 5) Save to disk for reuse
roads_gdf_m.to_file("osm_roads_5km.gpkg", layer="roads", driver="GPKG")
inds_gdf_m.to_file("osm_industries_5km.gpkg", layer="industries", driver="GPKG")
dumps_gdf_m.to_file("osm_dumps_5km.gpkg", layer="dumps", driver="GPKG")


1/544: (16.4942, 80.5106)
  Industrial error: No matching features. Check query location, tags, and log.
  Dumps error: No matching features. Check query location, tags, and log.
2/544: (14.6824, 77.6017)
  Dumps error: No matching features. Check query location, tags, and log.
3/544: (13.2162, 79.1051)
  Dumps error: No matching features. Check query location, tags, and log.
4/544: (16.7104, 81.1154)
  Dumps error: No matching features. Check query location, tags, and log.
5/544: (16.2915, 80.4542)
  Dumps error: No matching features. Check query location, tags, and log.
6/544: (14.4753, 78.8217)
  Dumps error: No matching features. Check query location, tags, and log.
7/544: (16.9437, 82.2351)
  Dumps error: No matching features. Check query location, tags, and log.
8/544: (15.8309, 78.0425)
  Dumps error: No matching features. Check query location, tags, and log.
9/544: (14.4494, 79.9874)
  Dumps error: No matching features. Check query location, tags, and log.
10/544: (15.5059, 80.

In [2]:
import pandas as pd
import geopandas as gpd
import osmnx as ox
import time

SEARCH_RADIUS = 5000  # 5 km
INPUT_FILE = "Air_Quality_with_OSM_Features_5KM.xlsx"

# 1) Load main data and unique points
df = pd.read_excel(INPUT_FILE, sheet_name="All Data")  # has lat/lon for all sites [file:2]
unique_locs = df[["Latitude", "Longitude"]].drop_duplicates().reset_index(drop=True)

# 2) Empty GeoDataFrame for agriculture (WGS84)
agri_gdf = gpd.GeoDataFrame(columns=["osm_id", "geometry"], crs="EPSG:4326")

def append_features(gdf_base, features):
    if features is None or features.empty:
        return gdf_base
    tmp = features.reset_index()
    osm_ids = tmp["osmid"] if "osmid" in tmp.columns else tmp[tmp.columns[0]]
    out = gpd.GeoDataFrame(
        {"osm_id": osm_ids, "geometry": tmp["geometry"]},
        crs=features.crs
    )
    out = out.drop_duplicates(subset="osm_id")
    return pd.concat([gdf_base, out], ignore_index=True)

# 3) Query only agricultural / green landuse
# farmland, meadows, orchards, vineyards, grass etc. are common agriculture/green tags [web:8][web:12]
agri_tags = {
    "landuse": [
        "farmland", "farm", "meadow", "orchard", "vineyard", "paddy", "grass"
    ]
}

for i, row in unique_locs.iterrows():
    lat, lon = row["Latitude"], row["Longitude"]
    print(f"{i+1}/{len(unique_locs)}: ({lat:.4f}, {lon:.4f})")

    try:
        agri = ox.features_from_point((lat, lon), tags=agri_tags, dist=SEARCH_RADIUS)  # [web:17]
        agri_gdf = append_features(agri_gdf, agri)
    except Exception as e:
        print("  Agriculture error:", e)

    time.sleep(0.5)

# 4) Deduplicate and project to metric CRS
agri_gdf = agri_gdf.drop_duplicates(subset="osm_id").reset_index(drop=True)
agri_gdf_m = agri_gdf.to_crs(3857)

# 5) Save only agriculture layer
agri_gdf_m.to_file("osm_agriculture_5km.gpkg", layer="agriculture", driver="GPKG")


1/544: (16.4942, 80.5106)
2/544: (14.6824, 77.6017)
3/544: (13.2162, 79.1051)
4/544: (16.7104, 81.1154)
5/544: (16.2915, 80.4542)
6/544: (14.4753, 78.8217)
7/544: (16.9437, 82.2351)
8/544: (15.8309, 78.0425)
9/544: (14.4494, 79.9874)
10/544: (15.5059, 80.0499)
  Agriculture error: No matching features. Check query location, tags, and log.
11/544: (17.0050, 81.7805)
12/544: (18.2949, 83.8939)
13/544: (13.6316, 79.4232)
14/544: (16.5115, 80.6160)
15/544: (17.6936, 83.2921)
16/544: (18.1141, 83.4114)
17/544: (27.0980, 93.6237)
  Agriculture error: No matching features. Check query location, tags, and log.
18/544: (27.1309, 93.7097)
  Agriculture error: No matching features. Check query location, tags, and log.
19/544: (26.4800, 90.5580)
20/544: (26.7851, 91.5145)
21/544: (27.4845, 94.9019)
22/544: (26.5108, 93.9744)
23/544: (26.1806, 91.7539)
24/544: (27.2864, 95.6702)
25/544: (26.3314, 92.7525)
26/544: (26.3534, 91.3984)
  Agriculture error: No matching features. Check query location, ta

In [1]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point

SEARCH_RADIUS = 5000  # m
INPUT_FILE = "Air_Quality_with_OSM_Features_5KM.xlsx"

# a) Read main AQ + weather data
df = pd.read_excel(INPUT_FILE, sheet_name="All Data")  # has Lat/Long, AQ, weather, counts etc. [file:2]

# b) Turn stations into GeoDataFrame (WGS84)
gdf_pts = gpd.GeoDataFrame(
    df.copy(),
    geometry=gpd.points_from_xy(df["Longitude"], df["Latitude"]),
    crs="EPSG:4326"
)

# c) Reproject points to metric CRS (same as saved OSM layers)
gdf_pts = gdf_pts.to_crs(3857)

# d) Load OSM feature layers (already in 3857 from your previous step)
roads_gdf_m = gpd.read_file("osm_roads_5km.gpkg", layer="roads")
inds_gdf_m  = gpd.read_file("osm_industries_5km.gpkg", layer="industries")
dumps_gdf_m = gpd.read_file("osm_dumps_5km.gpkg", layer="dumps")
agri_gdf_m  = gpd.read_file("osm_agriculture_5km.gpkg", layer="agriculture")


In [3]:
def nearest_distance(points, targets, max_dist=None):
    """
    Compute distance (meters) from each point to nearest geometry in targets.
    points and targets must be in the same projected CRS (e.g. EPSG:3857).
    """
    if targets.empty:
        return pd.Series([None] * len(points), index=points.index)

    # GeoSeries of point geometries
    pts = points.geometry

    # spatial index
    sidx = targets.sindex

    # query_nearest: pass GeoSeries, get back arrays of indices
    # (for shapely 2.x via geopandas 0.13+)
    idx_src, idx_tgt = sidx.nearest(pts, return_distance=False)

    # idx_src: indices in pts; idx_tgt: corresponding indices in targets
    # Build a Series mapping point index -> nearest target index
    nearest_target_idx = pd.Series(idx_tgt, index=pts.index[idx_src])

    # Now compute distances
    dists = []
    for i in pts.index:
        if i not in nearest_target_idx.index:
            dists.append(None)
            continue
        j = nearest_target_idx.loc[i]
        d = pts.loc[i].distance(targets.geometry.iloc[j])
        if max_dist is not None and d > max_dist:
            dists.append(None)
        else:
            dists.append(d)

    return pd.Series(dists, index=pts.index)


# Add distance columns (meters)
gdf_pts["dist_nearest_road_m"]   = nearest_distance(gdf_pts, roads_gdf_m)
gdf_pts["dist_nearest_ind_m"]    = nearest_distance(gdf_pts, inds_gdf_m)
gdf_pts["dist_nearest_dump_m"]   = nearest_distance(gdf_pts, dumps_gdf_m)
gdf_pts["dist_nearest_agri_m"]   = nearest_distance(gdf_pts, agri_gdf_m)


In [7]:
# Start from gdf_pts with distance columns already added
df_full = gdf_pts.copy()

# Drop geometry only (for CSV / ML)
df_full = df_full.drop(columns="geometry")

# Just save everything
df_full.to_csv("Air Quality Dataset 2021-2023.csv", index=False)
