In [None]:
pip install requests python-metar


Collecting python-metar
  Downloading python-metar-1.4.0.tar.gz (25 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: python-metar
  Building wheel for python-metar (setup.py) ... [?25l[?25hdone
  Created wheel for python-metar: filename=python_metar-1.4.0-py3-none-any.whl size=16926 sha256=cfae9eb931d26599ef7d1f7cc63f17d4b5ec5945d4bfddfd2e2a270814f3d7cc
  Stored in directory: /root/.cache/pip/wheels/1c/c7/33/370bed0725fd1aab6f731fd77dadfc7b66bdb6998909b7d8d0
Successfully built python-metar
Installing collected packages: python-metar
Successfully installed python-metar-1.4.0


In [None]:
import requests
import csv
from metar import Metar  # pip install python-metar

# Define airports
AIRPORTS = {
    'CYYZ': 'Toronto Pearson',
    'CYOW': 'Ottawa',
    'CYHM': 'Hamilton',
    'CYXU': 'London',
    'CYKF': 'Waterloo'
}

BASE_URL = "https://tgftp.nws.noaa.gov/data/observations/metar/stations/"
OUTPUT_FILE = "ontario_airports_metar.csv"


def fetch_metar(icao):
    """
    Fetch latest METAR for a given airport from NOAA.
    """
    url = f"{BASE_URL}{icao}.TXT"
    try:
        resp = requests.get(url, timeout=5)
        if resp.status_code == 200:
            lines = resp.text.strip().splitlines()
            if len(lines) >= 2:
                obs_time = lines[0].strip()
                metar_text = lines[1].strip()
                return {"station_id": icao, "observation_time": obs_time, "raw_text": metar_text}
    except Exception as e:
        print(f"⚠️ Error fetching {icao}: {e}")
    return {"station_id": icao, "observation_time": None, "raw_text": None}


def decode_metar(raw_metar):
    """
    Decode METAR string into structured weather info.
    """
    if not raw_metar:
        return {key: None for key in [
            "temperature_C", "dewpoint_C", "wind_dir_deg", "wind_speed_kt",
            "visibility_m", "pressure_hPa", "clouds", "weather", "remarks"
        ]}

    try:
        report = Metar.Metar(raw_metar)
        return {
            "temperature_C": report.temp.value(units='C') if report.temp else None,
            "dewpoint_C": report.dewpt.value(units='C') if report.dewpt else None,
            "wind_dir_deg": report.wind_dir.value() if report.wind_dir else None,
            "wind_speed_kt": report.wind_speed.value() if report.wind_speed else None,
            "visibility_m": report.vis.value(units='m') if report.vis else None,
            "pressure_hPa": report.press.value(units='hPa') if report.press else None,
            "clouds": "; ".join([str(layer) for layer in report.sky]) if report.sky else None,
            "weather": " ".join(report.weather) if report.weather else None,
            "remarks": report.remarks if report.remarks else None
        }
    except Exception:
        return {key: None for key in [
            "temperature_C", "dewpoint_C", "wind_dir_deg", "wind_speed_kt",
            "visibility_m", "pressure_hPa", "clouds", "weather", "remarks"
        ]}


def main():
    print("Fetching current METAR data for selected Ontario airports...\n")

    data_rows = []
    for icao, name in AIRPORTS.items():
        metar_info = fetch_metar(icao)
        decoded = decode_metar(metar_info["raw_text"])
        row = {
            "station_id": icao,
            "airport_name": name,
            **metar_info,
            **decoded
        }
        data_rows.append(row)

    # Write to CSV
    fieldnames = [
        "station_id", "airport_name", "observation_time", "raw_text",
        "temperature_C", "dewpoint_C", "wind_dir_deg", "wind_speed_kt",
        "visibility_m", "pressure_hPa", "clouds", "weather", "remarks"
    ]

    with open(OUTPUT_FILE, "w", newline="", encoding="utf-8") as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(data_rows)

    print(f"✅ Saved decoded METAR data for {len(AIRPORTS)} airports to '{OUTPUT_FILE}'")
    for r in data_rows:
        print(f" - {r['airport_name']} ({r['station_id']}): {r['raw_text']}")


if __name__ == "__main__":
    main()


Fetching current METAR data for selected Ontario airports...

✅ Saved decoded METAR data for 5 airports to 'ontario_airports_metar.csv'
 - Toronto Pearson (CYYZ): CYYZ 151300Z 36008KT 15SM BKN150 BKN190 10/03 A3023 RMK AC5AC2 SLP241
 - Ottawa (CYOW): CYOW 151300Z 32009KT 15SM FEW150 FEW180 SCT220 SCT250 07/00 A3020 RMK AC1AC1CI2CI1 AC TR CI TR SLP230
 - Hamilton (CYHM): CYHM 151300Z 35007KT 15SM BKN120 BKN230 10/04 A3021 RMK AC6CI1 SLP236
 - London (CYXU): CYXU 151300Z 35005KT 15SM BKN083 BKN110 10/08 A3025 RMK AC5AC2 SLP249
 - Waterloo (CYKF): CYKF 151300Z AUTO 36005KT 9SM BKN120 OVC150 09/04 A3023 RMK SLP245


In [None]:
import requests
import csv
from io import StringIO
from metar import Metar
from datetime import datetime

# Airports of interest
AIRPORTS = {
    'CYYZ': 'Toronto Pearson',
    'CYOW': 'Ottawa',
    'CYHM': 'Hamilton',
    'CYXU': 'London',
    'CYKF': 'Waterloo'
}

START_DATE = "2025-04-01"
END_DATE   = "2025-10-01"
OUTPUT_FILE = "ontario_airports_metar_6months.csv"

IEM_BASE = "https://mesonet.agron.iastate.edu/cgi-bin/request/asos.py"

def fetch_metar_history(station, start, end):
    """Fetch raw METAR history from Iowa State IEM."""
    y1, m1, d1 = start.split("-")
    y2, m2, d2 = end.split("-")
    params = {
        "station": station,
        "data": "metar",
        "year1": y1, "month1": m1, "day1": d1,
        "year2": y2, "month2": m2, "day2": d2,
        "tz": "Etc/UTC",
        "format": "onlycomma",
        "latlon": "no"
    }
    resp = requests.get(IEM_BASE, params=params, timeout=60)
    resp.raise_for_status()
    return resp.text

def decode_metar(raw_metar):
    """Decode a METAR string using python-metar."""
    try:
        report = Metar.Metar(raw_metar)
        return {
            "temperature_C": report.temp.value(units='C') if report.temp else None,
            "dewpoint_C": report.dewpt.value(units='C') if report.dewpt else None,
            "wind_dir_deg": report.wind_dir.value() if report.wind_dir else None,
            "wind_speed_kt": report.wind_speed.value() if report.wind_speed else None,
            "visibility_m": report.vis.value(units='m') if report.vis else None,
            "pressure_hPa": report.press.value(units='hPa') if report.press else None,
            "clouds": "; ".join([str(layer) for layer in report.sky]) if report.sky else None,
            "weather": " ".join(report.weather) if report.weather else None,
            "remarks": report.remarks if report.remarks else None
        }
    except Exception:
        return {}

def main():
    all_rows = []
    for icao, name in AIRPORTS.items():
        print(f"Fetching 6 months of data for {icao} ({name})...")
        csv_text = fetch_metar_history(icao, START_DATE, END_DATE)
        csv_reader = csv.reader(StringIO(csv_text))
        # Skip header (first line starts with station)
        for row in csv_reader:
            if not row or row[0] == "station":
                continue
            try:
                station, valid_time, metar = row[0], row[1], row[-1]
                decoded = decode_metar(metar)
                all_rows.append({
                    "station_id": station,
                    "airport_name": name,
                    "observation_time": valid_time,
                    "raw_text": metar,
                    **decoded
                })
            except Exception:
                continue

    # Write combined CSV
    fieldnames = [
        "station_id", "airport_name", "observation_time", "raw_text",
        "temperature_C", "dewpoint_C", "wind_dir_deg", "wind_speed_kt",
        "visibility_m", "pressure_hPa", "clouds", "weather", "remarks"
    ]

    with open(OUTPUT_FILE, "w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(all_rows)

    print(f"\n✅ Saved {len(all_rows)} decoded METAR records (6 months) to '{OUTPUT_FILE}'")

if __name__ == "__main__":
    main()


Fetching 6 months of data for CYYZ (Toronto Pearson)...
Fetching 6 months of data for CYOW (Ottawa)...
Fetching 6 months of data for CYHM (Hamilton)...
Fetching 6 months of data for CYXU (London)...
Fetching 6 months of data for CYKF (Waterloo)...

✅ Saved 28426 decoded METAR records (6 months) to 'ontario_airports_metar_6months.csv'


In [None]:
import requests
import pandas as pd
from io import StringIO
from metar import Metar
from datetime import datetime
import math

# Target airports
AIRPORTS = {
    'CYYZ': 'Toronto Pearson',
    'CYOW': 'Ottawa',
    'CYHM': 'Hamilton',
    'CYXU': 'London',
    'CYKF': 'Waterloo'
}

START_DATE = "2025-04-01"
END_DATE   = "2025-10-01"
OUTPUT_FILE = "canadian_airports_metar_ml_ready.csv"
IEM_BASE = "https://mesonet.agron.iastate.edu/cgi-bin/request/asos.py"


def fetch_metar_history(station, start, end):
    y1, m1, d1 = start.split("-")
    y2, m2, d2 = end.split("-")
    params = {
        "station": station,
        "data": "metar",
        "year1": y1, "month1": m1, "day1": d1,
        "year2": y2, "month2": m2, "day2": d2,
        "tz": "Etc/UTC",
        "format": "onlycomma",
        "latlon": "no"
    }
    resp = requests.get(IEM_BASE, params=params, timeout=60)
    resp.raise_for_status()
    return resp.text


def relative_humidity(temp_c, dew_c):
    """Approximate relative humidity from temperature and dew point."""
    if temp_c is None or dew_c is None:
        return None
    return 100 * (math.exp((17.625 * dew_c) / (243.04 + dew_c)) /
                  math.exp((17.625 * temp_c) / (243.04 + temp_c)))


def decode_metar(raw_metar):
    """Decode METAR string into structured ML features."""
    try:
        report = Metar.Metar(raw_metar)
        temp = report.temp.value(units='C') if report.temp else None
        dew = report.dewpt.value(units='C') if report.dewpt else None
        wind_speed = report.wind_speed.value() if report.wind_speed else None
        wind_dir = report.wind_dir.value() if report.wind_dir else None
        visibility = report.vis.value(units='m') if report.vis else None
        pressure = report.press.value(units='hPa') if report.press else None
        clouds = len(report.sky) if report.sky else 0
        weather = " ".join(report.weather) if report.weather else "CLR"

        return {
            "temperature_C": temp,
            "dewpoint_C": dew,
            "humidity_%": relative_humidity(temp, dew),
            "pressure_hPa": pressure,
            "wind_speed_kt": wind_speed,
            "wind_dir_deg": wind_dir,
            "visibility_m": visibility,
            "cloud_coverage_okta": clouds,
            "weather_code": weather.strip()
        }
    except Exception:
        return {}


def main():
    all_records = []

    for icao, name in AIRPORTS.items():
        print(f"Fetching historical METAR data for {icao} ({name})...")
        csv_text = fetch_metar_history(icao, START_DATE, END_DATE)
        df = pd.read_csv(StringIO(csv_text))

        if df.empty:
            continue

        for _, row in df.iterrows():
            raw_metar = str(row['metar'])
            decoded = decode_metar(raw_metar)
            if decoded:
                all_records.append({
                    "station_id": icao,
                    "airport_name": name,
                    "timestamp": row['valid'],
                    "raw_text": raw_metar,
                    **decoded
                })

    df_final = pd.DataFrame(all_records)

    # Drop rows with missing essential data
    df_final.dropna(subset=["temperature_C", "pressure_hPa", "humidity_%"], inplace=True)

    # Convert timestamp to datetime
    df_final["timestamp"] = pd.to_datetime(df_final["timestamp"], utc=True)

    # Sort chronologically
    df_final.sort_values(by=["station_id", "timestamp"], inplace=True)

    # Save to CSV
    df_final.to_csv(OUTPUT_FILE, index=False, encoding="utf-8")

    print(f"\n✅ Saved {len(df_final)} ML-ready METAR samples to '{OUTPUT_FILE}'")


if __name__ == "__main__":
    main()


Fetching historical METAR data for CYYZ (Toronto Pearson)...
Fetching historical METAR data for CYOW (Ottawa)...
Fetching historical METAR data for CYHM (Hamilton)...
Fetching historical METAR data for CYXU (London)...
Fetching historical METAR data for CYKF (Waterloo)...

✅ Saved 20233 ML-ready METAR samples to 'canadian_airports_metar_ml_ready.csv'
