# Using the NCEI API
Paste your token below.

In [1]:
import requests

def make_request(endpoint, payload=None):
    """Make a request to a specific endpoint on the weather API
    passing headers and optional payload."""
    return requests.get(
        f'https://www.ncdc.noaa.gov/cdo-web/api/v2/{endpoint}',
        headers={
            'token': 'PASTE_YOUR_TOKEN_HERE'
        },
        params=payload
    )

## Datasets available

In [2]:
# see what datasets are available
response = make_request('datasets', {'startdate':'2018-10-01'})
response.status_code

200

### Get the keys of the result

In [3]:
list(response.json().keys())

['metadata', 'results']

In [4]:
response.json()['metadata']

{'resultset': {'offset': 1, 'count': 11, 'limit': 25}}

### Figure out what data is in the result

In [5]:
list(response.json()['results'][0])

['uid', 'mindate', 'maxdate', 'name', 'datacoverage', 'id']

### Parse the result

In [6]:
[(dataset['id'], dataset['name']) for dataset in response.json()['results']]

[('GHCND', 'Daily Summaries'),
 ('GSOM', 'Global Summary of the Month'),
 ('GSOY', 'Global Summary of the Year'),
 ('NEXRAD2', 'Weather Radar (Level II)'),
 ('NEXRAD3', 'Weather Radar (Level III)'),
 ('NORMAL_ANN', 'Normals Annual/Seasonal'),
 ('NORMAL_DLY', 'Normals Daily'),
 ('NORMAL_HLY', 'Normals Hourly'),
 ('NORMAL_MLY', 'Normals Monthly'),
 ('PRECIP_15', 'Precipitation 15 Minute'),
 ('PRECIP_HLY', 'Precipitation Hourly')]

## Figure out which data category we want

In [7]:
# get data category id
response = make_request(
    'datacategories',
    payload={
        'datasetid' : 'GHCND'
    }
)
response.status_code

200

In [8]:
response.json()['results']

[{'name': 'Evaporation', 'id': 'EVAP'},
 {'name': 'Land', 'id': 'LAND'},
 {'name': 'Precipitation', 'id': 'PRCP'},
 {'name': 'Sky cover & clouds', 'id': 'SKY'},
 {'name': 'Sunshine', 'id': 'SUN'},
 {'name': 'Air Temperature', 'id': 'TEMP'},
 {'name': 'Water', 'id': 'WATER'},
 {'name': 'Wind', 'id': 'WIND'},
 {'name': 'Weather Type', 'id': 'WXTYPE'}]

## Grab the data type ID for the Temperature category

In [9]:
# get data type id
response = make_request(
    'datatypes',
    payload={
        'datacategoryid' : 'TEMP', 
        'limit' : 100
    }
)
response.status_code

200

In [10]:
[(dataset['id'], dataset['name']) for dataset in response.json()['results']][-5:] # look at the last 5

[('MNTM', 'Monthly mean temperature'),
 ('TAVG', 'Average Temperature.'),
 ('TMAX', 'Maximum temperature'),
 ('TMIN', 'Minimum temperature'),
 ('TOBS', 'Temperature at the time of observation')]

## Determine which Location Category we want

In [11]:
# get location category id 
dataset_id = 'GHCND'
response = make_request(
    'locationcategories', 
    {
        'datasetid' : dataset_id
    }
)
response.status_code

200

In [12]:
response.json()

{'metadata': {'resultset': {'offset': 1, 'count': 12, 'limit': 25}},
 'results': [{'name': 'City', 'id': 'CITY'},
  {'name': 'Climate Division', 'id': 'CLIM_DIV'},
  {'name': 'Climate Region', 'id': 'CLIM_REG'},
  {'name': 'Country', 'id': 'CNTRY'},
  {'name': 'County', 'id': 'CNTY'},
  {'name': 'Hydrologic Accounting Unit', 'id': 'HYD_ACC'},
  {'name': 'Hydrologic Cataloging Unit', 'id': 'HYD_CAT'},
  {'name': 'Hydrologic Region', 'id': 'HYD_REG'},
  {'name': 'Hydrologic Subregion', 'id': 'HYD_SUB'},
  {'name': 'State', 'id': 'ST'},
  {'name': 'US Territory', 'id': 'US_TERR'},
  {'name': 'Zip Code', 'id': 'ZIP'}]}

## Get NYC Location ID

In [13]:
def get_location(name, start=1, end=None):
    """Grab the JSON for the location by name using binary search."""
    mid = (start + (end if end else 1)) // 2
    payload = {
        'datasetid' : 'GHCND',
        'locationcategoryid' : 'CITY',
        'sortfield' : 'name',
        'offset' : mid,
        'limit' : 1
    }
    response = make_request('locations', payload)
    if response.ok:
        end = end if end else response.json()['metadata']['resultset']['count']
        current_name = response.json()['results'][0]['name']
        if name in current_name:
            return response.json()['results'][0]
        else:
            if start == end:
                return 'Does not exist!'
            elif name < current_name:
                return get_location(name, start, mid - 1)
            elif name > current_name:
                start = (end + start) // 2 
                return get_location(name, mid + 1, end)    
    else:
        print(f'Response not OK, status: {response.status_code}')

In [14]:
# get NYC id 
get_location('New York')

{'mindate': '1869-01-01',
 'maxdate': '2018-11-25',
 'name': 'New York, NY US',
 'datacoverage': 1,
 'id': 'CITY:US360019'}

## Request the temperature data

In [15]:
# get NYC daily summaries data 
city_id = 'CITY:US360019'
dataset_id = 'GHCND'
response = make_request(
    'data', 
    {
        'datasetid' : dataset_id,
        'stationid' : 'GHCND:USC00280907',
        'locationid' : city_id,
        'startdate' : '2018-10-01',
        'enddate' : '2018-10-31',
        'datatypeid' : ['TOBS', 'TMIN', 'TMAX'], # temperature at time of observation, min, and max
        'units' : 'metric',
        'limit' : 100
    }
)
response.status_code

200

## Create a DataFrame

In [16]:
import pandas as pd
df = pd.DataFrame(response.json()['results'])
df.head()

Unnamed: 0,attributes,datatype,date,station,value
0,",,H,0700",TMAX,2018-10-01T00:00:00,GHCND:USC00280907,21.1
1,",,H,0700",TMIN,2018-10-01T00:00:00,GHCND:USC00280907,8.9
2,",,H,0700",TOBS,2018-10-01T00:00:00,GHCND:USC00280907,13.9
3,",,H,0700",TMAX,2018-10-02T00:00:00,GHCND:USC00280907,23.9
4,",,H,0700",TMIN,2018-10-02T00:00:00,GHCND:USC00280907,13.9
