In [1]:
!pip install requests pandas numpy shapely scikit-learn tqdm



In [2]:
import requests
import pandas as pd
import numpy as np
import math
import time
import os
from tqdm import tqdm
from sklearn.neighbors import BallTree

# Put your keys here
WAQI_TOKEN = "4c0735999b909ffa3580a1ffea0044acb2b671f1"
OPENWEATHER_API_KEY = "b9ccc55124c700e32d7baf7f4e0b74ac"

# Files
GRID_FILE = "locations_25km_india.csv"
FINAL_OUTPUT = "module1_waqi_weather_osm_india.csv"

# Settings
SLEEP = 0.15
OSM_RADIUS_M = 500
WEATHER_ROUND_PRECISION = 1

In [3]:
df_grid = pd.read_csv(GRID_FILE)
print("Grid points:", len(df_grid))
df_grid.head()

Grid points: 5056


Unnamed: 0,id,lat,lon
0,pt_00001,8.093018,77.443716
1,pt_00002,8.317596,77.222009
2,pt_00003,8.317596,77.448974
3,pt_00004,8.317596,77.675939
4,pt_00005,8.317596,77.902904


In [4]:
def get_waqi_pollution(lat, lon):
    url = f"https://api.waqi.info/feed/geo:{lat};{lon}/"
    params = {"token": WAQI_TOKEN}

    try:
        r = requests.get(url, params=params, timeout=20)
        r.raise_for_status()
        data = r.json()
        
        if data.get("status") != "ok":
            return None
        
        iaqi = data["data"].get("iaqi", {})
        
        return {
            "aqi": data["data"].get("aqi"),
            "pm25": iaqi.get("pm25", {}).get("v"),
            "pm10": iaqi.get("pm10", {}).get("v"),
            "no2": iaqi.get("no2", {}).get("v"),
            "so2": iaqi.get("so2", {}).get("v"),
            "co": iaqi.get("co", {}).get("v"),
            "o3": iaqi.get("o3", {}).get("v")
        }
    except Exception as e:
        return None

In [7]:
weather_cache = {}

def get_weather(lat, lon):
    key = (round(lat, WEATHER_ROUND_PRECISION), round(lon, WEATHER_ROUND_PRECISION))
    if key in weather_cache:
        return weather_cache[key]

    url = "https://api.openweathermap.org/data/2.5/weather"
    params = {"lat": lat, "lon": lon, "appid": OPENWEATHER_API_KEY, "units": "metric"}

    try:
        r = requests.get(url, params=params, timeout=20)
        r.raise_for_status()
        d = r.json()
        result = {
            "temp_c": d.get("main", {}).get("temp"),
            "humidity": d.get("main", {}).get("humidity"),
            "wind_speed": d.get("wind", {}).get("speed"),
            "wind_deg": d.get("wind", {}).get("deg"),
            "weather_desc": d.get("weather", [{}])[0].get("description")
        }
    except:
        result = {
            "temp_c": None, "humidity": None,
            "wind_speed": None, "wind_deg": None,
            "weather_desc": None
        }

    weather_cache[key] = result
    return result


In [9]:
weather_cache = {}

def get_weather(lat, lon):
    key = (round(lat, WEATHER_ROUND_PRECISION), round(lon, WEATHER_ROUND_PRECISION))
    if key in weather_cache:
        return weather_cache[key]

    url = "https://api.openweathermap.org/data/2.5/weather"
    params = {"lat": lat, "lon": lon, "appid": OPENWEATHER_API_KEY, "units": "metric"}

    try:
        r = requests.get(url, params=params, timeout=20)
        r.raise_for_status()
        d = r.json()
        result = {
            "temp_c": d.get("main", {}).get("temp"),
            "humidity": d.get("main", {}).get("humidity"),
            "wind_speed": d.get("wind", {}).get("speed"),
            "wind_deg": d.get("wind", {}).get("deg"),
            "weather_desc": d.get("weather", [{}])[0].get("description")
        }
    except:
        result = {
            "temp_c": None, "humidity": None,
            "wind_speed": None, "wind_deg": None,
            "weather_desc": None
        }

    weather_cache[key] = result
    return result

In [11]:
osm_cache = {}

def get_osm(lat, lon, radius=OSM_RADIUS_M):
    key = (round(lat,2), round(lon,2))
    if key in osm_cache:
        return osm_cache[key]

    lat_min = lat - (radius/111111)
    lat_max = lat + (radius/111111)
    lon_factor = radius/(111111 * max(0.0001, abs(math.cos(math.radians(lat)))))
    lon_min = lon - lon_factor
    lon_max = lon + lon_factor

    bbox = f"{lat_min},{lon_min},{lat_max},{lon_max}"

    query = f"""
    [out:json][timeout:25];
    (
      way["highway"]({bbox});
      way["landuse"="industrial"]({bbox});
      way["landuse"="farmland"]({bbox});
      node["amenity"="waste_disposal"]({bbox});
      node["landuse"="landfill"]({bbox});
    );
    out body;
    """

    try:
        r = requests.post("https://overpass-api.de/api/interpreter", data=query, timeout=30)
        data = r.json()
    except:
        data = {"elements": []}

    features = {"roads": 0, "industrial": 0, "farmland": 0, "dump": 0}

    for el in data.get("elements", []):
        tags = el.get("tags", {}) or {}
        if "highway" in tags:
            features["roads"] += 1
        if tags.get("landuse") == "industrial":
            features["industrial"] += 1
        if tags.get("landuse") == "farmland":
            features["farmland"] += 1
        if tags.get("amenity") == "waste_disposal" or tags.get("landuse") == "landfill":
            features["dump"] += 1

    osm_cache[key] = features
    return features

In [13]:
CHECKPOINT_FILE = "module1_checkpoint.csv"

records = []

# --------------------------
# RESUME LOGIC
# --------------------------
if os.path.exists(CHECKPOINT_FILE):
    df_prev = pd.read_csv(CHECKPOINT_FILE)
    processed_ids = set(df_prev["id"])
    records = df_prev.to_dict("records")
    print(f"Resuming from checkpoint — {len(processed_ids)} rows already completed.")
else:
    processed_ids = set()
    records = []
    print("No checkpoint found — starting fresh.")


# --------------------------
# MAIN LOOP WITH CHECKPOINTS
# --------------------------
for i, r in tqdm(df_grid.iterrows(), total=len(df_grid), desc="Processing grid"):
    lat = r["lat"]
    lon = r["lon"]
    row_id = r["id"]

    # Skip if already processed
    if row_id in processed_ids:
        continue

    # ----------------------
    # Pollution (WAQI)
    # ----------------------
    p = get_waqi_pollution(lat, lon) or {
        "aqi": None, "pm25": None, "pm10": None,
        "no2": None, "so2": None, "co": None, "o3": None
    }

    # ----------------------
    # Weather (cached)
    # ----------------------
    w = get_weather(lat, lon)

    # ----------------------
    # OSM land features (cached)
    # ----------------------
    o = get_osm(lat, lon)

    # Merge into one row
    rec = {
        "id": row_id,
        "lat": lat,
        "lon": lon,
        **p,
        **w,
        **o
    }
    records.append(rec)
    processed_ids.add(row_id)

    # ----------------------
    # CHECKPOINT every 30 rows
    # ----------------------
    if i % 30 == 0 and i > 0:
        pd.DataFrame(records).to_csv(CHECKPOINT_FILE, index=False)
        print(f"Checkpoint saved at row {i} — total saved: {len(records)}")

    time.sleep(SLEEP)


# --------------------------
# FINAL SAVE
# --------------------------
df_final = pd.DataFrame(records)
df_final.to_csv(FINAL_OUTPUT, index=False)
print("Processing complete!")
print("Saved final dataset:", FINAL_OUTPUT)

Resuming from checkpoint — 1261 rows already completed.


Processing grid:  26%|███████████████                                            | 1290/5056 [02:23<1:27:02,  1.39s/it]

Checkpoint saved at row 1290 — total saved: 1291


Processing grid:  26%|███████████████▍                                           | 1321/5056 [04:25<3:09:34,  3.05s/it]

Checkpoint saved at row 1320 — total saved: 1321


Processing grid:  27%|███████████████▊                                           | 1351/5056 [06:41<3:55:49,  3.82s/it]

Checkpoint saved at row 1350 — total saved: 1351


Processing grid:  27%|████████████████                                           | 1381/5056 [08:44<4:34:05,  4.47s/it]

Checkpoint saved at row 1380 — total saved: 1381


Processing grid:  28%|████████████████▍                                          | 1411/5056 [11:42<6:04:36,  6.00s/it]

Checkpoint saved at row 1410 — total saved: 1411


Processing grid:  29%|████████████████▊                                          | 1441/5056 [14:37<5:10:07,  5.15s/it]

Checkpoint saved at row 1440 — total saved: 1441


Processing grid:  29%|█████████████████▏                                         | 1471/5056 [17:11<4:55:20,  4.94s/it]

Checkpoint saved at row 1470 — total saved: 1471


Processing grid:  30%|█████████████████▌                                         | 1501/5056 [19:16<4:25:27,  4.48s/it]

Checkpoint saved at row 1500 — total saved: 1501


Processing grid:  30%|█████████████████▊                                         | 1531/5056 [21:58<4:56:49,  5.05s/it]

Checkpoint saved at row 1530 — total saved: 1531


Processing grid:  31%|██████████████████▏                                        | 1561/5056 [24:02<2:59:01,  3.07s/it]

Checkpoint saved at row 1560 — total saved: 1561


Processing grid:  31%|██████████████████▌                                        | 1591/5056 [25:56<3:01:59,  3.15s/it]

Checkpoint saved at row 1590 — total saved: 1591


Processing grid:  32%|██████████████████▉                                        | 1621/5056 [28:06<3:31:48,  3.70s/it]

Checkpoint saved at row 1620 — total saved: 1621


Processing grid:  33%|███████████████████▎                                       | 1651/5056 [29:59<2:55:20,  3.09s/it]

Checkpoint saved at row 1650 — total saved: 1651


Processing grid:  33%|███████████████████▌                                       | 1681/5056 [32:17<3:35:13,  3.83s/it]

Checkpoint saved at row 1680 — total saved: 1681


Processing grid:  34%|███████████████████▉                                       | 1711/5056 [34:35<4:48:57,  5.18s/it]

Checkpoint saved at row 1710 — total saved: 1711


Processing grid:  34%|████████████████████▎                                      | 1741/5056 [36:41<4:59:56,  5.43s/it]

Checkpoint saved at row 1740 — total saved: 1741


Processing grid:  35%|████████████████████▋                                      | 1771/5056 [38:32<4:37:23,  5.07s/it]

Checkpoint saved at row 1770 — total saved: 1771


Processing grid:  36%|█████████████████████                                      | 1801/5056 [40:42<4:50:25,  5.35s/it]

Checkpoint saved at row 1800 — total saved: 1801


Processing grid:  36%|█████████████████████▎                                     | 1831/5056 [42:52<3:41:24,  4.12s/it]

Checkpoint saved at row 1830 — total saved: 1831


Processing grid:  37%|█████████████████████▋                                     | 1861/5056 [44:55<3:27:13,  3.89s/it]

Checkpoint saved at row 1860 — total saved: 1861


Processing grid:  37%|██████████████████████                                     | 1891/5056 [47:06<3:16:13,  3.72s/it]

Checkpoint saved at row 1890 — total saved: 1891


Processing grid:  38%|██████████████████████▍                                    | 1921/5056 [48:47<2:28:20,  2.84s/it]

Checkpoint saved at row 1920 — total saved: 1921


Processing grid:  39%|██████████████████████▊                                    | 1951/5056 [50:34<2:51:22,  3.31s/it]

Checkpoint saved at row 1950 — total saved: 1951


Processing grid:  39%|███████████████████████                                    | 1981/5056 [52:16<2:29:13,  2.91s/it]

Checkpoint saved at row 1980 — total saved: 1981


Processing grid:  40%|███████████████████████▍                                   | 2011/5056 [54:15<3:05:39,  3.66s/it]

Checkpoint saved at row 2010 — total saved: 2011


Processing grid:  40%|███████████████████████▊                                   | 2041/5056 [56:24<4:03:22,  4.84s/it]

Checkpoint saved at row 2040 — total saved: 2041


Processing grid:  41%|████████████████████████▏                                  | 2071/5056 [58:35<2:33:38,  3.09s/it]

Checkpoint saved at row 2070 — total saved: 2071


Processing grid:  42%|███████████████████████▋                                 | 2101/5056 [1:00:34<3:04:46,  3.75s/it]

Checkpoint saved at row 2100 — total saved: 2101


Processing grid:  42%|████████████████████████                                 | 2131/5056 [1:02:35<4:02:47,  4.98s/it]

Checkpoint saved at row 2130 — total saved: 2131


Processing grid:  43%|████████████████████████▎                                | 2161/5056 [1:04:28<2:57:54,  3.69s/it]

Checkpoint saved at row 2160 — total saved: 2161


Processing grid:  43%|████████████████████████▋                                | 2191/5056 [1:06:54<3:50:42,  4.83s/it]

Checkpoint saved at row 2190 — total saved: 2191


Processing grid:  44%|█████████████████████████                                | 2221/5056 [1:09:31<4:24:09,  5.59s/it]

Checkpoint saved at row 2220 — total saved: 2221


Processing grid:  45%|████████████████████████▍                              | 2251/5056 [2:00:15<82:11:57, 105.50s/it]

Checkpoint saved at row 2250 — total saved: 2251


Processing grid:  45%|█████████████████████████▋                               | 2281/5056 [2:02:25<3:06:30,  4.03s/it]

Checkpoint saved at row 2280 — total saved: 2281


Processing grid:  46%|██████████████████████████                               | 2311/5056 [2:04:48<3:32:07,  4.64s/it]

Checkpoint saved at row 2310 — total saved: 2311


Processing grid:  46%|██████████████████████████▍                              | 2341/5056 [2:07:30<3:23:03,  4.49s/it]

Checkpoint saved at row 2340 — total saved: 2341


Processing grid:  47%|██████████████████████████▋                              | 2371/5056 [2:10:13<5:03:40,  6.79s/it]

Checkpoint saved at row 2370 — total saved: 2371


Processing grid:  47%|███████████████████████████                              | 2401/5056 [2:12:46<4:41:01,  6.35s/it]

Checkpoint saved at row 2400 — total saved: 2401


Processing grid:  48%|███████████████████████████▍                             | 2431/5056 [2:14:41<2:02:34,  2.80s/it]

Checkpoint saved at row 2430 — total saved: 2431


Processing grid:  49%|███████████████████████████▋                             | 2461/5056 [2:16:29<3:22:59,  4.69s/it]

Checkpoint saved at row 2460 — total saved: 2461


Processing grid:  49%|████████████████████████████                             | 2491/5056 [2:18:09<2:42:28,  3.80s/it]

Checkpoint saved at row 2490 — total saved: 2491


Processing grid:  50%|████████████████████████████▍                            | 2521/5056 [2:19:33<2:00:19,  2.85s/it]

Checkpoint saved at row 2520 — total saved: 2521


Processing grid:  50%|████████████████████████████▊                            | 2551/5056 [2:21:29<3:06:03,  4.46s/it]

Checkpoint saved at row 2550 — total saved: 2551


Processing grid:  51%|█████████████████████████████                            | 2581/5056 [2:23:02<1:57:50,  2.86s/it]

Checkpoint saved at row 2580 — total saved: 2581


Processing grid:  52%|█████████████████████████████▍                           | 2611/5056 [2:24:37<1:59:14,  2.93s/it]

Checkpoint saved at row 2610 — total saved: 2611


Processing grid:  52%|█████████████████████████████▊                           | 2641/5056 [2:26:21<2:38:13,  3.93s/it]

Checkpoint saved at row 2640 — total saved: 2641


Processing grid:  53%|██████████████████████████████                           | 2671/5056 [2:28:16<2:16:05,  3.42s/it]

Checkpoint saved at row 2670 — total saved: 2671


Processing grid:  53%|███████████████████████████████▌                           | 2701/5056 [2:30:05<07:35,  5.17it/s]

Checkpoint saved at row 2700 — total saved: 2701


Processing grid:  54%|███████████████████████████████▊                           | 2731/5056 [2:30:10<06:50,  5.67it/s]

Checkpoint saved at row 2730 — total saved: 2731


Processing grid:  55%|████████████████████████████████▏                          | 2761/5056 [2:30:15<06:38,  5.76it/s]

Checkpoint saved at row 2760 — total saved: 2761


Processing grid:  55%|████████████████████████████████▌                          | 2791/5056 [2:30:19<06:35,  5.72it/s]

Checkpoint saved at row 2790 — total saved: 2791


Processing grid:  56%|████████████████████████████████▉                          | 2821/5056 [2:30:24<06:27,  5.77it/s]

Checkpoint saved at row 2820 — total saved: 2821


Processing grid:  56%|█████████████████████████████████▎                         | 2851/5056 [2:30:29<06:20,  5.80it/s]

Checkpoint saved at row 2850 — total saved: 2851


Processing grid:  57%|█████████████████████████████████▌                         | 2881/5056 [2:30:34<06:20,  5.72it/s]

Checkpoint saved at row 2880 — total saved: 2881


Processing grid:  58%|█████████████████████████████████▉                         | 2911/5056 [2:30:39<06:13,  5.74it/s]

Checkpoint saved at row 2910 — total saved: 2911


Processing grid:  58%|██████████████████████████████████▎                        | 2941/5056 [2:30:44<06:05,  5.78it/s]

Checkpoint saved at row 2940 — total saved: 2941


Processing grid:  59%|██████████████████████████████████▋                        | 2971/5056 [2:30:49<05:55,  5.87it/s]

Checkpoint saved at row 2970 — total saved: 2971


Processing grid:  59%|███████████████████████████████████                        | 3001/5056 [2:30:54<05:57,  5.75it/s]

Checkpoint saved at row 3000 — total saved: 3001


Processing grid:  60%|███████████████████████████████████▎                       | 3031/5056 [2:30:59<05:51,  5.77it/s]

Checkpoint saved at row 3030 — total saved: 3031


Processing grid:  61%|███████████████████████████████████▋                       | 3061/5056 [2:31:04<05:41,  5.85it/s]

Checkpoint saved at row 3060 — total saved: 3061


Processing grid:  61%|████████████████████████████████████                       | 3091/5056 [2:31:09<05:42,  5.74it/s]

Checkpoint saved at row 3090 — total saved: 3091


Processing grid:  62%|████████████████████████████████████▍                      | 3121/5056 [2:31:14<05:34,  5.78it/s]

Checkpoint saved at row 3120 — total saved: 3121


Processing grid:  62%|████████████████████████████████████▊                      | 3151/5056 [2:31:19<05:33,  5.72it/s]

Checkpoint saved at row 3150 — total saved: 3151


Processing grid:  63%|█████████████████████████████████████                      | 3181/5056 [2:31:24<05:22,  5.82it/s]

Checkpoint saved at row 3180 — total saved: 3181


Processing grid:  64%|█████████████████████████████████████▍                     | 3211/5056 [2:31:29<05:18,  5.79it/s]

Checkpoint saved at row 3210 — total saved: 3211


Processing grid:  64%|█████████████████████████████████████▊                     | 3241/5056 [2:31:34<05:16,  5.74it/s]

Checkpoint saved at row 3240 — total saved: 3241


Processing grid:  65%|██████████████████████████████████████▏                    | 3271/5056 [2:31:39<05:13,  5.68it/s]

Checkpoint saved at row 3270 — total saved: 3271


Processing grid:  65%|██████████████████████████████████████▌                    | 3301/5056 [2:31:44<05:03,  5.79it/s]

Checkpoint saved at row 3300 — total saved: 3301


Processing grid:  66%|██████████████████████████████████████▊                    | 3331/5056 [2:31:49<05:00,  5.75it/s]

Checkpoint saved at row 3330 — total saved: 3331


Processing grid:  66%|███████████████████████████████████████▏                   | 3361/5056 [2:31:54<04:55,  5.73it/s]

Checkpoint saved at row 3360 — total saved: 3361


Processing grid:  67%|███████████████████████████████████████▌                   | 3391/5056 [2:31:58<04:43,  5.87it/s]

Checkpoint saved at row 3390 — total saved: 3391


Processing grid:  68%|███████████████████████████████████████▉                   | 3421/5056 [2:32:03<04:44,  5.76it/s]

Checkpoint saved at row 3420 — total saved: 3421


Processing grid:  68%|████████████████████████████████████████▎                  | 3451/5056 [2:32:08<04:40,  5.73it/s]

Checkpoint saved at row 3450 — total saved: 3451


Processing grid:  69%|████████████████████████████████████████▌                  | 3481/5056 [2:32:13<04:35,  5.71it/s]

Checkpoint saved at row 3480 — total saved: 3481


Processing grid:  69%|████████████████████████████████████████▉                  | 3511/5056 [2:32:18<04:29,  5.74it/s]

Checkpoint saved at row 3510 — total saved: 3511


Processing grid:  70%|█████████████████████████████████████████▎                 | 3541/5056 [2:32:23<04:24,  5.73it/s]

Checkpoint saved at row 3540 — total saved: 3541


Processing grid:  71%|█████████████████████████████████████████▋                 | 3571/5056 [2:32:28<04:22,  5.66it/s]

Checkpoint saved at row 3570 — total saved: 3571


Processing grid:  71%|██████████████████████████████████████████                 | 3601/5056 [2:32:33<04:16,  5.66it/s]

Checkpoint saved at row 3600 — total saved: 3601


Processing grid:  72%|██████████████████████████████████████████▎                | 3631/5056 [2:32:38<04:07,  5.76it/s]

Checkpoint saved at row 3630 — total saved: 3631


Processing grid:  72%|██████████████████████████████████████████▋                | 3661/5056 [2:32:43<04:05,  5.68it/s]

Checkpoint saved at row 3660 — total saved: 3661


Processing grid:  73%|███████████████████████████████████████████                | 3691/5056 [2:32:48<03:56,  5.77it/s]

Checkpoint saved at row 3690 — total saved: 3691


Processing grid:  74%|███████████████████████████████████████████▍               | 3721/5056 [2:32:53<03:53,  5.72it/s]

Checkpoint saved at row 3720 — total saved: 3721


Processing grid:  74%|███████████████████████████████████████████▊               | 3751/5056 [2:32:58<03:52,  5.62it/s]

Checkpoint saved at row 3750 — total saved: 3751


Processing grid:  75%|████████████████████████████████████████████               | 3781/5056 [2:33:03<03:40,  5.78it/s]

Checkpoint saved at row 3780 — total saved: 3781


Processing grid:  75%|████████████████████████████████████████████▍              | 3811/5056 [2:33:08<03:36,  5.75it/s]

Checkpoint saved at row 3810 — total saved: 3811


Processing grid:  76%|████████████████████████████████████████████▊              | 3841/5056 [2:33:13<03:33,  5.68it/s]

Checkpoint saved at row 3840 — total saved: 3841


Processing grid:  77%|█████████████████████████████████████████████▏             | 3871/5056 [2:33:18<03:29,  5.65it/s]

Checkpoint saved at row 3870 — total saved: 3871


Processing grid:  77%|█████████████████████████████████████████████▌             | 3901/5056 [2:33:23<03:19,  5.80it/s]

Checkpoint saved at row 3900 — total saved: 3901


Processing grid:  78%|█████████████████████████████████████████████▊             | 3931/5056 [2:33:28<03:20,  5.62it/s]

Checkpoint saved at row 3930 — total saved: 3931


Processing grid:  78%|██████████████████████████████████████████████▏            | 3961/5056 [2:33:32<03:11,  5.73it/s]

Checkpoint saved at row 3960 — total saved: 3961


Processing grid:  79%|██████████████████████████████████████████████▌            | 3991/5056 [2:33:37<03:07,  5.69it/s]

Checkpoint saved at row 3990 — total saved: 3991


Processing grid:  80%|██████████████████████████████████████████████▉            | 4021/5056 [2:33:42<03:02,  5.68it/s]

Checkpoint saved at row 4020 — total saved: 4021


Processing grid:  80%|███████████████████████████████████████████████▎           | 4051/5056 [2:33:47<02:57,  5.66it/s]

Checkpoint saved at row 4050 — total saved: 4051


Processing grid:  81%|███████████████████████████████████████████████▌           | 4081/5056 [2:33:52<02:51,  5.67it/s]

Checkpoint saved at row 4080 — total saved: 4081


Processing grid:  81%|███████████████████████████████████████████████▉           | 4111/5056 [2:33:57<02:44,  5.74it/s]

Checkpoint saved at row 4110 — total saved: 4111


Processing grid:  82%|████████████████████████████████████████████████▎          | 4141/5056 [2:34:02<02:38,  5.78it/s]

Checkpoint saved at row 4140 — total saved: 4141


Processing grid:  82%|████████████████████████████████████████████████▋          | 4171/5056 [2:34:07<02:33,  5.78it/s]

Checkpoint saved at row 4170 — total saved: 4171


Processing grid:  83%|█████████████████████████████████████████████████          | 4201/5056 [2:34:12<02:29,  5.72it/s]

Checkpoint saved at row 4200 — total saved: 4201


Processing grid:  84%|█████████████████████████████████████████████████▎         | 4231/5056 [2:34:17<02:28,  5.57it/s]

Checkpoint saved at row 4230 — total saved: 4231


Processing grid:  84%|█████████████████████████████████████████████████▋         | 4261/5056 [2:34:22<02:18,  5.72it/s]

Checkpoint saved at row 4260 — total saved: 4261


Processing grid:  85%|██████████████████████████████████████████████████         | 4291/5056 [2:34:27<02:13,  5.72it/s]

Checkpoint saved at row 4290 — total saved: 4291


Processing grid:  85%|██████████████████████████████████████████████████▍        | 4321/5056 [2:34:32<02:09,  5.68it/s]

Checkpoint saved at row 4320 — total saved: 4321


Processing grid:  86%|██████████████████████████████████████████████████▊        | 4351/5056 [2:34:37<02:03,  5.69it/s]

Checkpoint saved at row 4350 — total saved: 4351


Processing grid:  87%|███████████████████████████████████████████████████        | 4381/5056 [2:34:42<02:00,  5.62it/s]

Checkpoint saved at row 4380 — total saved: 4381


Processing grid:  87%|███████████████████████████████████████████████████▍       | 4411/5056 [2:34:47<01:54,  5.62it/s]

Checkpoint saved at row 4410 — total saved: 4411


Processing grid:  88%|███████████████████████████████████████████████████▊       | 4441/5056 [2:34:52<01:46,  5.75it/s]

Checkpoint saved at row 4440 — total saved: 4441


Processing grid:  88%|████████████████████████████████████████████████████▏      | 4471/5056 [2:34:57<01:43,  5.65it/s]

Checkpoint saved at row 4470 — total saved: 4471


Processing grid:  89%|████████████████████████████████████████████████████▌      | 4501/5056 [2:35:02<01:38,  5.61it/s]

Checkpoint saved at row 4500 — total saved: 4501


Processing grid:  90%|████████████████████████████████████████████████████▊      | 4531/5056 [2:35:07<01:33,  5.61it/s]

Checkpoint saved at row 4530 — total saved: 4531


Processing grid:  90%|█████████████████████████████████████████████████████▏     | 4561/5056 [2:35:12<01:27,  5.67it/s]

Checkpoint saved at row 4560 — total saved: 4561


Processing grid:  91%|█████████████████████████████████████████████████████▌     | 4591/5056 [2:35:17<01:22,  5.64it/s]

Checkpoint saved at row 4590 — total saved: 4591


Processing grid:  91%|█████████████████████████████████████████████████████▉     | 4621/5056 [2:35:22<01:16,  5.72it/s]

Checkpoint saved at row 4620 — total saved: 4621


Processing grid:  92%|██████████████████████████████████████████████████████▎    | 4651/5056 [2:35:26<01:10,  5.72it/s]

Checkpoint saved at row 4650 — total saved: 4651


Processing grid:  93%|██████████████████████████████████████████████████████▌    | 4681/5056 [2:35:31<01:07,  5.59it/s]

Checkpoint saved at row 4680 — total saved: 4681


Processing grid:  93%|██████████████████████████████████████████████████████▉    | 4711/5056 [2:35:36<01:00,  5.66it/s]

Checkpoint saved at row 4710 — total saved: 4711


Processing grid:  94%|███████████████████████████████████████████████████████▎   | 4741/5056 [2:35:41<00:55,  5.69it/s]

Checkpoint saved at row 4740 — total saved: 4741


Processing grid:  94%|███████████████████████████████████████████████████████▋   | 4771/5056 [2:35:46<00:50,  5.62it/s]

Checkpoint saved at row 4770 — total saved: 4771


Processing grid:  95%|████████████████████████████████████████████████████████   | 4801/5056 [2:35:51<00:45,  5.65it/s]

Checkpoint saved at row 4800 — total saved: 4801


Processing grid:  96%|████████████████████████████████████████████████████████▎  | 4831/5056 [2:35:56<00:39,  5.64it/s]

Checkpoint saved at row 4830 — total saved: 4831


Processing grid:  96%|████████████████████████████████████████████████████████▋  | 4861/5056 [2:36:01<00:34,  5.66it/s]

Checkpoint saved at row 4860 — total saved: 4861


Processing grid:  97%|█████████████████████████████████████████████████████████  | 4891/5056 [2:36:06<00:29,  5.61it/s]

Checkpoint saved at row 4890 — total saved: 4891


Processing grid:  97%|█████████████████████████████████████████████████████████▍ | 4921/5056 [2:36:11<00:24,  5.59it/s]

Checkpoint saved at row 4920 — total saved: 4921


Processing grid:  98%|█████████████████████████████████████████████████████████▊ | 4951/5056 [2:36:16<00:18,  5.72it/s]

Checkpoint saved at row 4950 — total saved: 4951


Processing grid:  99%|██████████████████████████████████████████████████████████ | 4981/5056 [2:36:21<00:13,  5.62it/s]

Checkpoint saved at row 4980 — total saved: 4981


Processing grid:  99%|██████████████████████████████████████████████████████████▍| 5011/5056 [2:36:26<00:07,  5.69it/s]

Checkpoint saved at row 5010 — total saved: 5011


Processing grid: 100%|██████████████████████████████████████████████████████████▊| 5041/5056 [2:36:31<00:02,  5.66it/s]

Checkpoint saved at row 5040 — total saved: 5041


Processing grid: 100%|███████████████████████████████████████████████████████████| 5056/5056 [2:36:34<00:00,  1.86s/it]


Processing complete!
Saved final dataset: module1_waqi_weather_osm_india.csv


In [15]:
df_final.to_csv(FINAL_OUTPUT, index=False)
print("Saved dataset:", FINAL_OUTPUT)
df_final.head()

Saved dataset: module1_waqi_weather_osm_india.csv


Unnamed: 0,id,lat,lon,aqi,pm25,pm10,no2,so2,co,o3,temp_c,humidity,wind_speed,wind_deg,weather_desc,roads,industrial,farmland,dump
0,pt_00001,8.093018,77.443716,93.0,93.0,63.0,0.1,7.0,3.1,6.7,28.66,59.0,3.56,51.0,overcast clouds,3,0,0,0
1,pt_00002,8.317596,77.222009,93.0,93.0,63.0,0.1,7.0,3.1,6.7,32.26,43.0,2.06,210.0,overcast clouds,34,0,0,0
2,pt_00003,8.317596,77.448974,93.0,93.0,63.0,0.1,7.0,3.1,6.7,30.75,45.0,2.43,46.0,overcast clouds,6,0,0,0
3,pt_00004,8.317596,77.675939,66.0,,66.0,3.9,2.5,7.3,11.8,28.75,56.0,6.6,14.0,overcast clouds,5,0,0,0
4,pt_00005,8.317596,77.902904,66.0,,66.0,3.9,2.5,7.3,11.8,28.37,62.0,7.83,20.0,overcast clouds,3,0,0,0
