In [5]:
# OpenAQ API Key - Replace with your actual key
import os
from dotenv import load_dotenv
#load_dotenv()  # Load environment variables from .env file if present
OPENAQ_API_KEY ="3a0c22ab3910a3cdaa3ba939eaa270dbe222a36406c7ed6bfc4a9dc325903de5"

# Force complete reimport of openaq module and its submodules
import sys
modules_to_remove = [key for key in sys.modules.keys() if key.startswith('openaq')]
for mod in modules_to_remove:
    del sys.modules[mod]
    
print("Cleaned up openaq modules from cache")

Cleaned up openaq modules from cache


In [7]:
"""
OpenAQ API v3 - Collect Air Quality Data for Erode District, Tamil Nadu
Parameters: PM2.5, PM10, NO₂, CO, SO₂, O₃, temperature, humidity, wind speed, wind direction
Time Period: Last 30 days

Using the Official OpenAQ Python SDK (v0.6.0+)
Documentation: https://python.openaq.org/
"""

import datetime
import time
import os
import pandas as pd
from openaq import OpenAQ

# Erode District, Tamil Nadu - approximate center coordinates
# Erode is located at approximately 11.34°N 77.72°E
ERODE_LATITUDE = 11.34
ERODE_LONGITUDE = 77.72
RADIUS_METERS = 25000  # 25 km radius to cover the district

# Define date range: last 30 days to today
date_to = datetime.datetime.now(datetime.timezone.utc)
date_from = date_to - datetime.timedelta(days=30)

print(f"Fetching data from {date_from.isoformat()} to {date_to.isoformat()}")
print(f"Location: Erode District ({ERODE_LATITUDE}, {ERODE_LONGITUDE})")
print(f"Search radius: {RADIUS_METERS/1000} km")
print("-" * 60)

# Define the parameters we want to collect (flexible matching)
WANTED_KEYWORDS = {
    "pm25": ["pm25", "pm2.5", "pm2_5"],
    "pm10": ["pm10"],
    "no2": ["no2", "nitrogen dioxide"],
    "co": ["co", "carbon monoxide"],
    "so2": ["so2", "sulfur dioxide", "sulphur dioxide"],
    "o3": ["o3", "ozone"],
    "temperature": ["temperature", "temp", "at"],
    "humidity": ["humidity", "rh", "relativehumidity", "relative humidity"],
    "wind_speed": ["windspeed", "wind_speed", "ws", "wind speed"],
    "wind_direction": ["winddirection", "wind_direction", "wd", "wind direction"]
}

def is_wanted_parameter(param_name):
    """Check if parameter name matches any of our wanted parameters"""
    if not param_name:
        return None
    param_lower = param_name.lower().strip()
    for category, keywords in WANTED_KEYWORDS.items():
        for kw in keywords:
            if kw == param_lower or param_lower.startswith(kw):
                return category
    return None

def get_sensor_param_info(sensor):
    """Extract parameter info from sensor, handling both dict and object responses"""
    param_name = ""
    param_display = ""
    param_units = ""
    
    if hasattr(sensor, 'parameter'):
        param = sensor.parameter
        # Handle dict response
        if isinstance(param, dict):
            param_name = param.get('name', '')
            param_display = param.get('display_name', param_name)
            param_units = param.get('units', '')
        # Handle object response
        else:
            if hasattr(param, 'name') and param.name:
                param_name = param.name
            if hasattr(param, 'display_name') and param.display_name:
                param_display = param.display_name
            if hasattr(param, 'units') and param.units:
                param_units = param.units
    
    # If still empty, try the sensor name which often contains param info
    if not param_name and hasattr(sensor, 'name') and sensor.name:
        param_name = sensor.name.split()[0] if sensor.name else ""
        param_display = sensor.name
    
    return param_name, param_display, param_units

# Initialize the OpenAQ client with API key
API_KEY = OPENAQ_API_KEY
client = OpenAQ(api_key=API_KEY)

# Step 1: Fetch all locations (monitoring stations) around Erode
print("\n1. Fetching locations around Erode District...")
try:
    loc_response = client.locations.list(
        coordinates=(ERODE_LATITUDE, ERODE_LONGITUDE),
        radius=RADIUS_METERS,
        limit=1000
    )
    locations = loc_response.results
    print(f"   Found {len(locations)} monitoring location(s)")
except Exception as e:
    print(f"   Error fetching locations: {e}")
    locations = []

# Display location details
for loc in locations:
    print(f"\n   Location ID: {loc.id}")
    print(f"   Name: {loc.name}")
    print(f"   Coordinates: ({loc.coordinates.latitude}, {loc.coordinates.longitude})")
    if hasattr(loc, 'locality') and loc.locality:
        print(f"   Locality: {loc.locality}")

# Step 2: For each location, get sensors and fetch measurements
print("\n2. Fetching measurements for each location...")
all_data = []

for loc in locations:
    loc_id = loc.id
    loc_name = loc.name
    loc_lat = loc.coordinates.latitude
    loc_lon = loc.coordinates.longitude
    
    print(f"\n   Processing location: {loc_name} (ID: {loc_id})")
    
    # Get sensors for this location
    try:
        sensors_response = client.locations.sensors(loc_id)
        sensors = sensors_response.results
        print(f"   Found {len(sensors)} sensor(s)")
    except Exception as e:
        print(f"   Error fetching sensors: {e}")
        continue
    
    # Show all available parameters at this location
    print("   Available parameters:")
    for sensor in sensors:
        param_name, param_display, param_units = get_sensor_param_info(sensor)
        matched = is_wanted_parameter(param_name)
        match_indicator = " ✓" if matched else ""
        print(f"      - {param_display} ({param_name}) [{param_units}]{match_indicator}")
    
    # For each sensor, check if it measures a parameter we want
    for sensor in sensors:
        sensor_id = sensor.id
        param_name, param_display, param_units = get_sensor_param_info(sensor)
        
        # Check if this is a parameter we want
        matched_category = is_wanted_parameter(param_name)
        if not matched_category:
            continue
        
        print(f"\n      Fetching: {param_display} (Sensor ID: {sensor_id})")
        
        # Fetch measurements for this sensor with pagination
        page = 1
        sensor_records = 0
        
        while True:
            try:
                meas_response = client.measurements.list(
                    sensors_id=sensor_id,
                    datetime_from=date_from,
                    datetime_to=date_to,
                    page=page,
                    limit=1000
                )
                results = meas_response.results
                
                if not results:
                    break
                
                for m in results:
                    # Extract measurement datetime
                    datetime_utc = None
                    datetime_local = None
                    
                    if hasattr(m, 'period') and m.period:
                        if hasattr(m.period, 'datetime_from'):
                            dt_from = m.period.datetime_from
                            if hasattr(dt_from, 'utc'):
                                datetime_utc = str(dt_from.utc)
                            if hasattr(dt_from, 'local'):
                                datetime_local = str(dt_from.local)
                    
                    record = {
                        "location_id": loc_id,
                        "location_name": loc_name,
                        "sensor_id": sensor_id,
                        "parameter": matched_category,
                        "parameter_original": param_name,
                        "parameter_display": param_display,
                        "value": m.value if hasattr(m, 'value') else None,
                        "unit": param_units,
                        "datetime_utc": datetime_utc,
                        "datetime_local": datetime_local,
                        "latitude": loc_lat,
                        "longitude": loc_lon,
                    }
                    all_data.append(record)
                    sensor_records += 1
                
                page += 1
                time.sleep(0.1)  # Rate limiting
                
            except Exception as e:
                print(f"      Error fetching measurements (page {page}): {e}")
                break
        
        if sensor_records > 0:
            print(f"         Retrieved {sensor_records} measurement(s)")
        else:
            print(f"         No measurements found for this time range")

# Close the client connection
client.close()

# Step 3: Build DataFrame
print("\n" + "=" * 60)
print("3. Building DataFrame...")
df = pd.DataFrame(all_data)
print(f"   Total records fetched: {len(df)}")

if len(df) > 0:
    print("\n   Data Preview:")
    print(df.head(10))
    
    print("\n   Parameter distribution:")
    print(df["parameter"].value_counts())
    
    print("\n   Data summary by parameter:")
    summary = df.groupby("parameter")["value"].agg(["count", "mean", "min", "max"])
    print(summary)
    
    # Create data/raw directory if it doesn't exist
    os.makedirs("data/raw", exist_ok=True)
    
    # Save to CSV
    output_file = "data/raw/erode_aq_last30days.csv"
    df.to_csv(output_file, index=False)
    print(f"\n4. Saved data to '{output_file}'")
else:
    print("\n   No data found for the specified location and time range.")
    print("   This could mean:")
    print("   - No monitoring stations exist within the search radius")
    print("   - No data available for the last 30 days")
    print("   - The parameters you're looking for aren't measured at these stations")
    print("\n   Consider trying:")
    print("   - Increasing the search radius")
    print("   - Searching in a different location")
    print("   - Checking the OpenAQ website for available stations in your area")

Fetching data from 2025-11-25T04:03:58.814568+00:00 to 2025-12-25T04:03:58.814568+00:00
Location: Erode District (11.34, 77.72)
Search radius: 25.0 km
------------------------------------------------------------

1. Fetching locations around Erode District...
   Found 2 monitoring location(s)

   Location ID: 6920
   Name: Manali Village, Chennai - TNPCB
   Coordinates: (11.258244, 77.552429)

   Location ID: 3409527
   Name: SIPCOT Industrial Park, Perundurai - TNPCB
   Coordinates: (11.258242, 77.552761)

2. Fetching measurements for each location...

   Processing location: Manali Village, Chennai - TNPCB (ID: 6920)
   Found 6 sensor(s)
   Available parameters:
      - PM10 (pm10) [µg/m³] ✓
      - NO₂ mass (no2) [µg/m³] ✓
      - CO mass (co) [µg/m³] ✓
      - PM2.5 (pm25) [µg/m³] ✓
      - O₃ mass (o3) [µg/m³] ✓
      - SO₂ mass (so2) [µg/m³] ✓

      Fetching: PM10 (Sensor ID: 19871)
         No measurements found for this time range

      Fetching: NO₂ mass (Sensor ID: 19868)
 