In [4]:
import requests

base_url="https://www.ncdc.noaa.gov/cdo-web/api/v2/"
token="ZsHovRGdGGOzYbUEGuvPcAkKBGcPGVed"

def make_req(endpoint, payload=None):
    return requests.get(f"{base_url}{endpoint}",headers={"token":token},params=payload)

response = make_req('datasets', {'startdate':'2018-10-01'})
response.status_code

200

In [8]:
response.json().keys()

dict_keys(['metadata', 'results'])

In [9]:
response.json()['results'][0].keys()

dict_keys(['uid', 'mindate', 'maxdate', 'name', 'datacoverage', 'id'])

In [10]:
[(data['id'], data['name']) for data in response.json()['results']]

[('GHCND', 'Daily Summaries'),
 ('GSOM', 'Global Summary of the Month'),
 ('GSOY', 'Global Summary of the Year'),
 ('NEXRAD2', 'Weather Radar (Level II)'),
 ('NEXRAD3', 'Weather Radar (Level III)'),
 ('NORMAL_ANN', 'Normals Annual/Seasonal'),
 ('NORMAL_DLY', 'Normals Daily'),
 ('NORMAL_HLY', 'Normals Hourly'),
 ('NORMAL_MLY', 'Normals Monthly'),
 ('PRECIP_15', 'Precipitation 15 Minute'),
 ('PRECIP_HLY', 'Precipitation Hourly')]

In [13]:
response = make_req('datacategories', payload={'datasetid' : 'GHCND'})
response.status_code

200

In [14]:
response.json()['results']

[{'name': 'Evaporation', 'id': 'EVAP'},
 {'name': 'Land', 'id': 'LAND'},
 {'name': 'Precipitation', 'id': 'PRCP'},
 {'name': 'Sky cover & clouds', 'id': 'SKY'},
 {'name': 'Sunshine', 'id': 'SUN'},
 {'name': 'Air Temperature', 'id': 'TEMP'},
 {'name': 'Water', 'id': 'WATER'},
 {'name': 'Wind', 'id': 'WIND'},
 {'name': 'Weather Type', 'id': 'WXTYPE'}]

In [16]:
response = make_req('datatypes',payload={'datacategoryid' : 'TEMP','limit' : 100})
response.status_code

200

In [17]:
[(datatype['id'], datatype['name']) for datatype in response.json()['results']][-5:]

[('MNTM', 'Monthly mean temperature'),
 ('TAVG', 'Average Temperature.'),
 ('TMAX', 'Maximum temperature'),
 ('TMIN', 'Minimum temperature'),
 ('TOBS', 'Temperature at the time of observation')]

In [19]:
response = make_req('locationcategories',{
'datasetid' : 'GHCND'})
response.status_code

200

In [20]:
import pprint
pprint.pprint(response.json())

{'metadata': {'resultset': {'count': 12, 'limit': 25, 'offset': 1}},
 'results': [{'id': 'CITY', 'name': 'City'},
             {'id': 'CLIM_DIV', 'name': 'Climate Division'},
             {'id': 'CLIM_REG', 'name': 'Climate Region'},
             {'id': 'CNTRY', 'name': 'Country'},
             {'id': 'CNTY', 'name': 'County'},
             {'id': 'HYD_ACC', 'name': 'Hydrologic Accounting Unit'},
             {'id': 'HYD_CAT', 'name': 'Hydrologic Cataloging Unit'},
             {'id': 'HYD_REG', 'name': 'Hydrologic Region'},
             {'id': 'HYD_SUB', 'name': 'Hydrologic Subregion'},
             {'id': 'ST', 'name': 'State'},
             {'id': 'US_TERR', 'name': 'US Territory'},
             {'id': 'ZIP', 'name': 'Zip Code'}]}


In [6]:
def get_item(name, what, endpoint, start=1, end=None):
    mid = (start + (end if end else 1)) // 2
    name = name.lower()
    payload = {"datasetid": "GHCND","sortfield": "name","offset": mid, "limit": 1}

    response = make_req(endpoint, {**payload, **what})

    if response.ok:
        end = end if end else response.json()['metadata']['resultset']['count']

        current_name = response.json()['results'][0]['name'].lower()
        if name in current_name:
            return response.json()['results'][0]
        else:
            if start >= end:
                return {}
            elif name < current_name:
                return get_item(name, what, endpoint, start, mid - 1)
            elif name > current_name:
                return get_item(name, what, endpoint, mid + 1, end)

    else:
        print(f'Response not OK, status: {response.status_code}')

def get_location(name):
    return get_item(name, {'locationcategoryid' : 'CITY'}, 'locations')

nyc = get_location('New York')
print(nyc)

{'mindate': '1843-05-01', 'maxdate': '2025-10-26', 'name': 'New York, NY US', 'datacoverage': 1, 'id': 'CITY:US360019'}


In [7]:
central_park = get_item('NY City Central Park', {'locationid' : nyc['id']}, 'stations')
print(central_park)

{'elevation': 42.7, 'mindate': '1869-01-01', 'maxdate': '2025-10-25', 'latitude': 40.77898, 'name': 'NY CITY CENTRAL PARK, NY US', 'datacoverage': 1, 'id': 'GHCND:USW00094728', 'elevationUnit': 'METERS', 'longitude': -73.96925}


In [9]:
response = make_req('data',
{'datasetid' : 'GHCND',
'stationid' : central_park['id'],
'locationid' : nyc['id'],
'startdate' : '2018-10-01',
'enddate' : '2018-10-31',
'datatypeid' : ['TMIN', 'TMAX', 'TAVG'],
'units' : 'metric',
'limit' : 1000})

response.status_code

200

In [10]:
import pandas as pd
df = pd.DataFrame(response.json()['results'])
df.head()

Unnamed: 0,date,datatype,station,attributes,value
0,2018-10-01T00:00:00,TMAX,GHCND:USW00094728,",,W,2400",24.4
1,2018-10-01T00:00:00,TMIN,GHCND:USW00094728,",,W,2400",17.2
2,2018-10-02T00:00:00,TMAX,GHCND:USW00094728,",,W,2400",25.0
3,2018-10-02T00:00:00,TMIN,GHCND:USW00094728,",,W,2400",18.3
4,2018-10-03T00:00:00,TMAX,GHCND:USW00094728,",,W,2400",23.3


In [11]:
df.datatype.unique()

array(['TMAX', 'TMIN'], dtype=object)

In [13]:
if get_item('NY City Central Park', {'locationid' : nyc['id'], 'datatypeid': 'TOBS'}, 'stations'):
    print('Found!')

Response not OK, status: 503


In [14]:
laguardia = get_item('LaGuardia', {'locationid' : nyc['id']}, 'stations')
print(laguardia)

{'elevation': 3, 'mindate': '1939-10-07', 'maxdate': '2025-10-25', 'latitude': 40.77945, 'name': 'LAGUARDIA AIRPORT, NY US', 'datacoverage': 1, 'id': 'GHCND:USW00014732', 'elevationUnit': 'METERS', 'longitude': -73.88027}


In [15]:
response = make_req('data',
{'datasetid' : 'GHCND',
'stationid' : laguardia['id'],
'locationid' : nyc['id'],
'startdate' : '2018-10-01',
'enddate' : '2018-10-31',
'datatypeid' : ['TMIN', 'TMAX', 'TAVG'],
'units' : 'metric',
'limit' : 1000})

response.status_code

200

In [16]:
df = pd.DataFrame(response.json()['results'])
df.head()

Unnamed: 0,date,datatype,station,attributes,value
0,2018-10-01T00:00:00,TAVG,GHCND:USW00014732,"H,,S,",21.2
1,2018-10-01T00:00:00,TMAX,GHCND:USW00014732,",,W,2400",25.6
2,2018-10-01T00:00:00,TMIN,GHCND:USW00014732,",,W,2400",18.3
3,2018-10-02T00:00:00,TAVG,GHCND:USW00014732,"H,,S,",22.7
4,2018-10-02T00:00:00,TMAX,GHCND:USW00014732,",,W,2400",26.1


In [17]:
df.datatype.value_counts()

datatype
TAVG    31
TMAX    31
TMIN    31
Name: count, dtype: int64

In [20]:
df.to_csv('data/nyc_temperatures.csv', index=False)