# ATMS 523
## Module 4 Lecture 2

### Using a Web API to obtain data

This example shows you how to use a web API to obtain data from NCEI.  It is the same dataset we obtained within Module 3, but here we are making small calls to get only the data we want.

In [1]:
#needed to make web requests
import requests

#store the data we get as a dataframe
import pandas as pd

#convert the response as a strcuctured json
import json

#mathematical operations on lists
import numpy as np

#parse the datetimes we get from NOAA
from datetime import datetime

#import time for sleeping
import time

#add the access token you got from NOAA # this is njm12-code's so don't mess with it!
Token = 'hKZMGKSdnaaJusDecxBPrPnEBPkyFAzJ'

#Enter data type and station ID from inventory - here is GHCND https://www.ncei.noaa.gov/pub/data/ghcn/daily/ghcnd-stations.txt
station_id = 'GHCND:USC00118740'

In [3]:
#initialize lists to store data
dates_mintemp = []
dates_maxtemp = []
dates_precip = []
min_temps = []
max_temps = []
precip = []

#for each year from 1905-2022 where we know we have data inventory ...
for year in range(1905, 2023):
    year = str(year)
    print('working on year ' + year)
    
    url = (
        'https://www.ncdc.noaa.gov/cdo-web/api/v2/data?datasetid=GHCND'
        '&datatypeid=TMIN&datatypeid=TMAX&datatypeid=PRCP'
        '&limit=1000'
        '&stationid=' + station_id +
        '&startdate=' + year + '-01-01'
        '&enddate=' + year + '-12-31'
    )

    try:
        # Try up to 3 times in case of temporary API errors
        for attempt in range(3):
            r = requests.get(url, headers={'token': Token})
            if r.status_code == 200 and r.text.strip() != "":
                break
            else:
                print(f"  ⚠️ Attempt {attempt+1} failed (status {r.status_code}). Retrying...")
                time.sleep(2)
        else:
            print(f"  ❌ Failed to get data for {year} after 3 attempts. Skipping.")
            continue

        # Try to parse JSON response
        try:
            d = json.loads(r.text)
        except json.JSONDecodeError:
            print(f"  ❌ Invalid JSON for {year}. Skipping.")
            continue

        # Skip if there are no results
        if 'results' not in d:
            print(f"  ⚠️ No data found for {year}. Skipping.")
            continue

        #get all items in the response which are max&min temperature readings
        maxtemps = [item for item in d['results'] if item['datatype'] == 'TMAX']
        mintemps = [item for item in d['results'] if item['datatype'] == 'TMIN']
        precips = [item for item in d['results'] if item['datatype'] == 'PRCP']

        #get the date field from all readings
        dates_maxtemp += [item['date'] for item in maxtemps]
        dates_mintemp += [item['date'] for item in mintemps]
        dates_precip += [item['date'] for item in precips]

        #get the actual temperature from the returned data
        max_temps += [item['value'] for item in maxtemps]
        min_temps += [item['value'] for item in mintemps]
        precip += [item['value'] for item in precips]

        time.sleep(0.3)  # stay within API rate limits (max 5 req/sec)

    except Exception as e:
        print(f"  ❌ Error processing {year}: {e}")
        time.sleep(1)
        continue

print("✅ Finished all years!")


working on year 1905
working on year 1906
working on year 1907
working on year 1908
working on year 1909
working on year 1910
working on year 1911
working on year 1912
working on year 1913
working on year 1914
working on year 1915
working on year 1916
working on year 1917
working on year 1918
working on year 1919
working on year 1920
working on year 1921
working on year 1922
working on year 1923
working on year 1924
working on year 1925
working on year 1926
working on year 1927
working on year 1928
working on year 1929
working on year 1930
working on year 1931
working on year 1932
working on year 1933
  ⚠️ Attempt 1 failed (status 503). Retrying...
working on year 1934
working on year 1935
working on year 1936
working on year 1937
working on year 1938
working on year 1939
  ⚠️ Attempt 1 failed (status 503). Retrying...
working on year 1940
working on year 1941
working on year 1942
working on year 1943
working on year 1944
working on year 1945
  ⚠️ Attempt 1 failed (status 503). Retryin

In [7]:
r.text

'{"metadata":{"resultset":{"offset":1,"count":1094,"limit":1000}},"results":[{"date":"2022-01-01T00:00:00","datatype":"PRCP","station":"GHCND:USC00118740","attributes":",,7,0800","value":3},{"date":"2022-01-01T00:00:00","datatype":"TMAX","station":"GHCND:USC00118740","attributes":",,7,0800","value":111},{"date":"2022-01-01T00:00:00","datatype":"TMIN","station":"GHCND:USC00118740","attributes":",,7,0800","value":28},{"date":"2022-01-02T00:00:00","datatype":"PRCP","station":"GHCND:USC00118740","attributes":",,7,0800","value":91},{"date":"2022-01-02T00:00:00","datatype":"TMAX","station":"GHCND:USC00118740","attributes":",,7,0800","value":33},{"date":"2022-01-02T00:00:00","datatype":"TMIN","station":"GHCND:USC00118740","attributes":",,7,0800","value":-61},{"date":"2022-01-03T00:00:00","datatype":"PRCP","station":"GHCND:USC00118740","attributes":",,7,0800","value":0},{"date":"2022-01-03T00:00:00","datatype":"TMAX","station":"GHCND:USC00118740","attributes":",,7,0800","value":-44},{"date":"2

In [8]:
d

{'metadata': {'resultset': {'offset': 1, 'count': 1094, 'limit': 1000}},
 'results': [{'date': '2022-01-01T00:00:00',
   'datatype': 'PRCP',
   'station': 'GHCND:USC00118740',
   'attributes': ',,7,0800',
   'value': 3},
  {'date': '2022-01-01T00:00:00',
   'datatype': 'TMAX',
   'station': 'GHCND:USC00118740',
   'attributes': ',,7,0800',
   'value': 111},
  {'date': '2022-01-01T00:00:00',
   'datatype': 'TMIN',
   'station': 'GHCND:USC00118740',
   'attributes': ',,7,0800',
   'value': 28},
  {'date': '2022-01-02T00:00:00',
   'datatype': 'PRCP',
   'station': 'GHCND:USC00118740',
   'attributes': ',,7,0800',
   'value': 91},
  {'date': '2022-01-02T00:00:00',
   'datatype': 'TMAX',
   'station': 'GHCND:USC00118740',
   'attributes': ',,7,0800',
   'value': 33},
  {'date': '2022-01-02T00:00:00',
   'datatype': 'TMIN',
   'station': 'GHCND:USC00118740',
   'attributes': ',,7,0800',
   'value': -61},
  {'date': '2022-01-03T00:00:00',
   'datatype': 'PRCP',
   'station': 'GHCND:USC001187

In [None]:
#Original snesbitt code: 

#initialize lists to store data
dates_mintemp = []
dates_maxtemp = []
dates_precip = []
min_temps = []
max_temps = []
precip = []

#for each year from 1905-2022 where we know we have data inventory ...
for year in range(1905, 2023):
    year = str(year)
    print('working on year '+year)
    
    #make the api call
    r = requests.get('https://www.ncdc.noaa.gov/cdo-web/api/v2/data?datasetid=GHCND&datatypeid=TMIN&datatypeid=TMAX&datatypeid=PRCP&limit=1000&stationid='+station_id+'&startdate='+year+'-01-01&enddate='+year+'-12-31', headers={'token':Token})
    #load the api response as a json
    d = json.loads(r.text)
    

    #get all items in the response which are max&min temperature readings
    maxtemps = [item for item in d['results'] if item['datatype']=='TMAX']
    mintemps = [item for item in d['results'] if item['datatype']=='TMIN']
    precips = [item for item in d['results'] if item['datatype']=='PRCP']
    #get the date field from all average temperature readings
    dates_maxtemp += [item['date'] for item in maxtemps]
    dates_mintemp += [item['date'] for item in mintemps]
    dates_precip += [item['date'] for item in precips]
    #get the actual temperature from the returned data
    max_temps += [item['value'] for item in maxtemps]
    min_temps += [item['value'] for item in mintemps]
    precip += [item['value'] for item in precips]
    time.sleep(0.2) # API max 5 requests per second


In [4]:
#initialize dataframe
df_temp_min = pd.DataFrame()
df_temp_max = pd.DataFrame()
df_precip = pd.DataFrame()

#populate date and min and max temperature & precip fields (convert string date to datetime)
df_temp_min['date'] = [datetime.strptime(d, "%Y-%m-%dT%H:%M:%S") for d in dates_mintemp]
df_temp_min['minTemp'] = [float(v)/10.0 for v in min_temps]

df_temp_max['date'] = [datetime.strptime(d, "%Y-%m-%dT%H:%M:%S") for d in dates_maxtemp]
df_temp_max['maxTemp'] = [float(v)/10.0 for v in max_temps]

df_precip['date'] = [datetime.strptime(d, "%Y-%m-%dT%H:%M:%S") for d in dates_precip]
df_precip['precip'] = [float(v)/10.0 for v in precip]


In [5]:
#merge the dataframes
newdf_all = pd.merge(df_temp_max,df_temp_min, left_index=True, right_index=True)
newdf_all = pd.merge(newdf_all, df_precip, left_index=True, right_index=True)
newdf_all.drop(columns = ['date_x', 'date_y'], inplace=True)
newdf_all = newdf_all.set_index('date')

In [9]:
newdf_all

Unnamed: 0_level_0,maxTemp,minTemp,precip
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1905-01-01,14.4,1.7,0.0
1905-01-02,8.3,-5.0,4.8
1905-01-03,-3.9,-9.4,0.0
1905-01-04,-3.9,-14.4,0.0
1905-01-05,1.1,-14.4,0.0
...,...,...,...
2022-07-28,0.0,0.6,0.5
2022-07-29,2.8,-5.0,0.0
2022-07-30,11.1,-2.8,0.0
2022-07-31,14.4,3.9,0.0
