# Air Quality Stations - Location Extraction

This notebook extracts the list of air quality monitoring stations along with their latitude and longitude coordinates from the India AQ station JSON file.

## 1. Import Required Libraries

Import necessary libraries including json for reading JSON files and pandas for data manipulation.

In [2]:
import json
import pandas as pd

## 2. Load JSON Data

Read the india_aq_station.json file and load the JSON data into a Python object.

In [5]:
# Load the JSON file with error handling
try:
    with open('india_aq_station.json', 'r', encoding='utf-8') as file:
        content = file.read()
        
        # Remove BOM (Byte Order Mark) and other invisible characters at the start
        content = content.lstrip('\ufeff\uffef')
        
        # Remove the special character at the beginning
        if content.startswith('̦'):
            content = content[1:]
        
        print(f"File size: {len(content)} characters")
        print(f"First 200 characters:\n{content[:200]}\n")
        
        # Try to parse JSON
        if content.strip():
            data = json.loads(content)
            print(f"Data type: {type(data)}")
            
            # Check structure
            if isinstance(data, dict):
                print(f"Top-level keys: {list(data.keys())}")
                if 'map' in data and 'station_list' in data['map']:
                    print(f"Number of stations: {len(data['map']['station_list'])}")
        else:
            print("Error: The JSON file is empty!")
            data = None
except json.JSONDecodeError as e:
    print(f"JSON Decode Error: {e}")
    data = None
except FileNotFoundError:
    print("Error: File 'india_aq_station.json' not found!")
    data = None

File size: 10519653 characters
First 200 characters:
{"map": {"timestamp": "28-01-2026 13:00:50", "station_list": [{"ip_address": ["59.99.208.110", "13.126.127.74", "10.247.72.142", "117.197.205.143", "117.197.201.59", "117.197.195.114", "117.197.195.23

Data type: <class 'dict'>
Top-level keys: ['map', 'status']
Number of stations: 567


## 3. Extract Station Information

Parse the JSON structure to extract station names, latitude, and longitude coordinates.

In [7]:
# Extract station information
stations_list = []

# The data structure is: data['map']['station_list']
if data and isinstance(data, dict) and 'map' in data and 'station_list' in data['map']:
    station_list = data['map']['station_list']
    
    for station in station_list:
        if isinstance(station, dict):
            station_info = {
                'station_name': station.get('name', station.get('station_name', 'Unknown')),
                'station_id': station.get('id', station.get('station_id', None)),
                'latitude': station.get('lat', station.get('latitude', None)),
                'longitude': station.get('lon', station.get('longitude', station.get('long', None))),
                'city': station.get('city', None),
                'state': station.get('state', None)
            }
            stations_list.append(station_info)

print(f"Total stations extracted: {len(stations_list)}")

Total stations extracted: 567


## 4. Create DataFrame with Station Details

Create a pandas DataFrame containing station names and their corresponding latitude/longitude coordinates.

In [8]:
# Create DataFrame
df_stations = pd.DataFrame(stations_list)

# Display basic information about the DataFrame
print(f"DataFrame shape: {df_stations.shape}")
print(f"\nColumn names: {df_stations.columns.tolist()}")
print(f"\nFirst few rows:")
df_stations.head()

DataFrame shape: (567, 6)

Column names: ['station_name', 'station_id', 'latitude', 'longitude', 'city', 'state']

First few rows:


Unnamed: 0,station_name,station_id,latitude,longitude,city,state
0,"SIDCO Kurichi, Coimbatore - TNPCB",site_5094,10.942451,76.978996,,
1,"Urban, Chamarajanagar - KSPCB",site_5124,11.55358,76.55521,,
2,"MD University, Rohtak - HSPCB",site_147,28.52123,76.37138,,
3,"IESD Banaras Hindu University, Varanasi - UPPCB",site_5468,25.262326,82.995408,,
4,"Sirifort, Delhi - CPCB",site_119,28.5504249,77.2159377,,


## 5. Display Stations with Coordinates

Display the complete list of stations with their coordinates in a formatted table.

In [9]:
# Display all stations with their coordinates
print("=== Air Quality Monitoring Stations with Coordinates ===\n")
df_stations

=== Air Quality Monitoring Stations with Coordinates ===



Unnamed: 0,station_name,station_id,latitude,longitude,city,state
0,"SIDCO Kurichi, Coimbatore - TNPCB",site_5094,10.942451,76.978996,,
1,"Urban, Chamarajanagar - KSPCB",site_5124,11.55358,76.55521,,
2,"MD University, Rohtak - HSPCB",site_147,28.52123,76.37138,,
3,"IESD Banaras Hindu University, Varanasi - UPPCB",site_5468,25.262326,82.995408,,
4,"Sirifort, Delhi - CPCB",site_119,28.5504249,77.2159377,,
...,...,...,...,...,...,...
562,"IIT Delhi, Delhi - IITM",site_6060,28.542460,77.191651,,
563,"Khunmoh, Srinagar - JKPCC",site_6061,34.06313,74.96017,,
564,"Khrew, Pampore - JKPCC",site_6062,34.031606,75.009712,,
565,"Mahashweta Nagar, Ujjain - MPPCB",site_6063,23.1633321,75.800225,,


In [13]:
# Summary statistics
print("=== Summary ===")
print(f"Total number of stations: {len(df_stations)}")
print(f"\nStations with valid coordinates: {df_stations[['latitude', 'longitude']].notna().all(axis=1).sum()}")
print(f"Stations missing coordinates: {df_stations[['latitude', 'longitude']].isna().any(axis=1).sum()}")

# Convert latitude and longitude to numeric
df_stations['latitude'] = pd.to_numeric(df_stations['latitude'], errors='coerce')
df_stations['longitude'] = pd.to_numeric(df_stations['longitude'], errors='coerce')

# Display coordinate ranges
if not df_stations['latitude'].isna().all():
    print(f"\nLatitude range: {df_stations['latitude'].min():.4f} to {df_stations['latitude'].max():.4f}")
    print(f"Longitude range: {df_stations['longitude'].min():.4f} to {df_stations['longitude'].max():.4f}")

# Additional statistics by state/city if available
if 'state' in df_stations.columns and df_stations['state'].notna().any():
    print(f"\nNumber of unique states: {df_stations['state'].nunique()}")
    print(f"\nTop 5 states by station count:")
    print(df_stations['state'].value_counts().head())

if 'city' in df_stations.columns and df_stations['city'].notna().any():
    print(f"\nNumber of unique cities: {df_stations['city'].nunique()}")

=== Summary ===
Total number of stations: 567

Stations with valid coordinates: 567
Stations missing coordinates: 0

Latitude range: 8.5149 to 34.0662
Longitude range: 70.9092 to 94.6366


In [14]:
# Optional: Export to CSV for further use
output_file = 'station_locations.csv'
df_stations.to_csv(output_file, index=False)
print(f"Station locations exported to: {output_file}")

Station locations exported to: station_locations.csv
