In [1]:
import requests

In [2]:
# https://www.ncei.noaa.gov/support/access-data-service-api-user-documentation
requests.get('https://www.ncei.noaa.gov/access/services/data/v1?dataset=daily-summaries&stations=AGE00147708&startDate=2009-12-01&endDate=2010-01-01&includeAttributes=true&format=json').json()

[{'DATE': '2009-12-01',
  'STATION': 'AGE00147708',
  'TAVG_ATTRIBUTES': 'H,,S',
  'PRCP_ATTRIBUTES': ',,S',
  'TMAX': '  145',
  'TAVG': '   99',
  'TMAX_ATTRIBUTES': ',,S',
  'TMIN': '   70',
  'PRCP': '  269',
  'TMIN_ATTRIBUTES': ',,S'},
 {'DATE': '2009-12-02',
  'STATION': 'AGE00147708',
  'TAVG_ATTRIBUTES': 'H,,S',
  'PRCP_ATTRIBUTES': ',,S',
  'TMAX': '  130',
  'TAVG': '   96',
  'TMAX_ATTRIBUTES': ',,S',
  'TMIN': '   75',
  'PRCP': '   41',
  'TMIN_ATTRIBUTES': ',,S'},
 {'DATE': '2009-12-03',
  'STATION': 'AGE00147708',
  'TAVG_ATTRIBUTES': 'H,,S',
  'PRCP_ATTRIBUTES': ',,S',
  'TMAX': '  185',
  'TAVG': '  111',
  'TMAX_ATTRIBUTES': ',,S',
  'TMIN': '   73',
  'PRCP': '    5',
  'TMIN_ATTRIBUTES': ',,S'},
 {'DATE': '2009-12-04',
  'STATION': 'AGE00147708',
  'TAVG_ATTRIBUTES': 'H,,S',
  'PRCP_ATTRIBUTES': ',,S',
  'TMAX': '  162',
  'TAVG': '  124',
  'TMAX_ATTRIBUTES': ',,S',
  'PRCP': '  330'},
 {'DATE': '2009-12-05',
  'STATION': 'AGE00147708',
  'TAVG_ATTRIBUTES': 'H,,S'

In [6]:
import pandas as pd

# https://www.ncdc.noaa.gov/homr/
# https://www1.ncdc.noaa.gov/pub/data/ghcn/daily/ghcnd-stations.txt
STATIONS_URL = 'https://www1.ncdc.noaa.gov/pub/data/ghcn/daily/ghcnd-stations.txt'
def parse_ghcnd_stations_line(l: str):
    """
    In [IV. FORMAT OF "ghcnd-stations.txt"] of
    https://www1.ncdc.noaa.gov/pub/data/ghcn/daily/readme.txt
    """
    if len(l) == 85:
        return {
            'ID': l[0:11],
            'LATITUDE': float(l[12:20]),
            'LONGITUDE': float(l[21:30]),
            'ELEVATION': float(l[31:37]),
            'STATE': l[38:40],
            'NAME': l[41:71],
            'GSN FLAG': l[72:75],
            'HCN/CRN FLAG': l[76:79],
            'WMO ID': l[80:85],
        }
    raise ValueError(l)

stations_resl = []
for line in requests.get(STATIONS_URL).text.split('\n'):
    if len(line) > 0:
        stations_resl.append(parse_ghcnd_stations_line(line))
STATIONS_DF = pd.DataFrame(stations_resl)

In [7]:
STATIONS_DF.head()

Unnamed: 0,ID,LATITUDE,LONGITUDE,ELEVATION,STATE,NAME,GSN FLAG,HCN/CRN FLAG,WMO ID
0,ACW00011604,17.1167,-61.7833,10.1,,ST JOHNS COOLIDGE FLD,,,
1,ACW00011647,17.1333,-61.7833,19.2,,ST JOHNS,,,
2,AE000041196,25.333,55.517,34.0,,SHARJAH INTER. AIRP,GSN,,41196.0
3,AEM00041194,25.255,55.364,10.4,,DUBAI INTL,,,41194.0
4,AEM00041217,24.433,54.651,26.8,,ABU DHABI INTL,,,41217.0


In [10]:
# Toronto
lat=43.700111
lon=-79.416298

In [19]:
from typing import List, Optional
import numpy as np

def get_closest_station_id(lat: float, lon: float, top_n: Optional[int] = 10) -> List[str]:
    stations_df = STATIONS_DF.copy()
    stations_df['distance'] = stations_df.apply(
        lambda row: float(np.linalg.norm(
            np.array([row['LATITUDE'], row['LONGITUDE']]) - np.array([lat, lon])
        )),
        axis=1
    )
    out = stations_df.sort_values('distance')['ID']
    if top_n is not None:
        out = out.iloc[:top_n]
    return list(out)


In [20]:
get_closest_station_id(lat, lon)

['CA006158567',
 'CA006158779',
 'CA006158417',
 'CA006158350',
 'CA006158355',
 'CA006158846',
 'CA006158762',
 'CA1ON000564',
 'CA006158730',
 'CA006158550']

In [21]:
STATIONS_DF.loc[STATIONS_DF.ID == 'CA006158567']

Unnamed: 0,ID,LATITUDE,LONGITUDE,ELEVATION,STATE,NAME,GSN FLAG,HCN/CRN FLAG,WMO ID
29691,CA006158567,43.7,-79.45,174.0,ON,TORONTO GLENVIEW,,,


In [23]:
# https://www.ncei.noaa.gov/support/access-data-service-api-user-documentation
requests.get('https://www.ncei.noaa.gov/access/services/data/v1?dataset=daily-summaries&stations=CA006158567&startDate=1950-12-01&endDate=2010-01-01&includeAttributes=true&format=json').json()

': '1955-05-31',
  'STATION': 'CA006158567',
  'SNOW': '    0',
  'PRCP_ATTRIBUTES': 'T,,C',
  'SNOW_ATTRIBUTES': ',,C',
  'PRCP': '    0'},
 {'DATE': '1955-06-01',
  'STATION': 'CA006158567',
  'SNOW': '    0',
  'PRCP_ATTRIBUTES': ',,C',
  'SNOW_ATTRIBUTES': ',,C',
  'PRCP': '    0'},
 {'DATE': '1955-06-02',
  'STATION': 'CA006158567',
  'SNOW': '    0',
  'PRCP_ATTRIBUTES': ',,C',
  'SNOW_ATTRIBUTES': ',,C',
  'PRCP': '    0'},
 {'DATE': '1955-06-03',
  'STATION': 'CA006158567',
  'SNOW': '    0',
  'PRCP_ATTRIBUTES': ',,C',
  'SNOW_ATTRIBUTES': ',,C',
  'PRCP': '    0'},
 {'DATE': '1955-06-04',
  'STATION': 'CA006158567',
  'SNOW': '    0',
  'PRCP_ATTRIBUTES': ',,C',
  'SNOW_ATTRIBUTES': ',,C',
  'PRCP': '    0'},
 {'DATE': '1955-06-05',
  'STATION': 'CA006158567',
  'SNOW': '    0',
  'PRCP_ATTRIBUTES': ',,C',
  'SNOW_ATTRIBUTES': ',,C',
  'PRCP': '    0'},
 {'DATE': '1955-06-06',
  'STATION': 'CA006158567',
  'SNOW': '    0',
  'PRCP_ATTRIBUTES': ',,C',
  'SNOW_ATTRIBUTES': ',,C