In [149]:
from datetime import datetime, timedelta
import pickle
import requests
import time

In [223]:
def query_string(d):
    ## Input: datetime object
    ## Output: string in the form 'YYYYMMDD'
    d_year = str(d.year)
    if d.month<10:
        d_month = '0' + str(d.month)
    else:
        d_month = str(d.month)
    if d.day<10:
        d_day = '0' + str(d.day)
    else:
        d_day = str(d.day)
    return d_year + d_month + d_day

def query_dates(start_date = datetime(1993,1,1), end_date = datetime.today()):
    ## Input: 
    ##    start date: datetime object
    ##    end_date: datetime object
    ## Output: list[[str,str]]
    ##    A list of pairs of strings representing dates 
    ##    that are at most 30 days apart.
    month_skip = timedelta(days=30)
    day_skip = timedelta(days=1)
    
    right_bound = end_date
    left_bound = end_date - month_skip
    
    query_times = [[query_string(left_bound),query_string(right_bound)]]
    
    while left_bound > start_date:
        right_bound = left_bound - day_skip
        left_bound = max(right_bound - month_skip, start_date)
        query_times.insert(0,[query_string(left_bound),query_string(right_bound)])
        
    return query_times

api_key = ###
station_code = 'KGEG'

def get_query_url(start_date, end_date):
    url  = 'https://api.weather.com/v1/location/' 
    url += station_code
    url += ':9:US/observations/historical.json?apiKey=' 
    url += api_key
    url += '&units=e&startDate=' + start_date
    url += '&endDate='
    url += end_date

    return url

In [224]:
dates = query_dates()

In [225]:
table = []
for date_pair in dates:
    url = get_query_url(date_pair[0],date_pair[1])
    
    response = requests.get(url).json()["observations"]
    
    weather_data = sorted(response, key=lambda k: k["valid_time_gmt"])

    header = [
        "time", "temp", "dew_pt", "rel_humidity", "wind_dir",
        "wind_spd", "gust_spd", "pressure", "precipitation", "wx_phase",
    ]

    for item in weather_data:
        row = [
            datetime.fromtimestamp(item["valid_time_gmt"]),
            item["temp"],
            item["dewPt"],
            item["rh"],
            item["wdir_cardinal"],
            item["wspd"],
            item["gust"],
            item["pressure"],
            item["precip_total"],
            item["wx_phrase"],
        ]
    
        table.append(row)
    
    time.sleep(3)

In [226]:
df = pd.DataFrame(table, columns=header)

In [227]:
with open('wx-underground.pkl', 'wb') as f:
  pickle.dump(df, f)

In [233]:
df.head(10)


Unnamed: 0,time,temp,dew_pt,rel_humidity,wind_dir,wind_spd,gust_spd,pressure,precipitation,wx_phase
0,1993-01-01 01:00:00,21.0,19.0,92.0,ENE,13.0,,27.47,,Cloudy
1,1993-01-01 02:00:00,21.0,19.0,92.0,E,5.0,,27.47,,Cloudy
2,1993-01-01 03:00:00,21.0,19.0,92.0,E,3.0,,27.48,,Cloudy
3,1993-01-01 04:00:00,22.0,19.0,89.0,CALM,,,27.48,,Cloudy
4,1993-01-01 05:00:00,21.0,18.0,88.0,ENE,6.0,,27.45,,Cloudy
5,1993-01-01 06:00:00,21.0,18.0,88.0,NE,7.0,,27.45,,Cloudy
6,1993-01-01 06:42:00,,,,NE,6.0,,27.37,,Cloudy
7,1993-01-01 07:00:00,21.0,18.0,88.0,NE,5.0,,27.45,,Cloudy
8,1993-01-01 08:00:00,20.0,19.0,96.0,NNE,7.0,,27.44,,Cloudy
9,1993-01-01 08:36:00,,,,NE,5.0,,27.37,,Cloudy
