#### api reference page :
https://www.wunderground.com/weather/api/d/docs?d=data/history

local weather
https://stackoverflow.com/questions/4770297/convert-utc-datetime-string-to-local-datetime-with-python

In [279]:
from datetime import timedelta, date, datetime
from urllib.request import urlopen
import pandas as pd
import credentials
import json
import os.path
import time

In [280]:
def daterange(start_date, end_date):
    for n in range(int ((end_date - start_date).days)):
        yield start_date + timedelta(n)

In [281]:
def call_api(url):
    f = urlopen(url)
    return f

In [282]:
def parse_url(api_result):
    json_string = api_result.read()
    parsed_json = json.loads(json_string)
    observations = parsed_json['history']['observations']
    return observations

In [283]:
def structure_dataframe(station_id, observations):
    df = pd.DataFrame(observations)
    
    # read date into datetime object and set as index
    df_date = df['date'].apply(pd.Series)
    
    # to_date time parses to utc
    df_datetime = pd.to_datetime(df_date['pretty'])
    df['utc_date'] = df_datetime
    df = df.set_index('utc_date')
    
    # drop redundancies and columns of no interest
    df = df.drop(['date', 'utcdate', 'UV', 'softwaretype', 'solarradiation'], axis=1)
    
    # add station id column
    df['station_id'] = station_id
    
    # reorder columns for legibility
    cols = df.columns.tolist()
    cols = cols[-1:] + cols[:-1]
    df = df[cols]
       
    return df

In [284]:
def append_day(df_day, df_day2):
    df_append = df_day.append(df_day2)
    return df_append

In [285]:
def save_file(df_day, filepath):
    if os.path.isfile(filepath):
        with open(filepath, 'a') as f:
            df_day.to_csv(f, header=False)
    else:
        df_day.to_csv(filepath)

In [286]:
# retrieve list of identified stations from csv

filename = "../output/station_id-list.csv"
station_list = open(filename).read().split('\n')
station_list.remove('')

# NO HISTORY DATA
# [KMTCORVA23, KMTCORVA21, KMTFLORE1, KMTFLORE12, 
# KMTLOLO8, KMTMISSO62, KMTMISSO74, KMTSTEVE16] 

In [287]:
# set date range

start_date = date(2017, 7, 1)
end_date = date(2017, 11, 1)

In [288]:
dates = []

for single_date in daterange(start_date, end_date):
        dates.append(single_date.strftime("history_%Y%m%d"))

In [384]:
# import api key
#api_key = credentials.api['key']
api_key = 'x'

In [514]:
%%time

# Run full list
num_exec = 0

for station_id in station_list[41:]:
    for date in dates:
        # sleep to limit API call speed
        print(datetime.now().time())
        print(station_id)
        time.sleep(6)
        
        # url returns json of weather history of date and station
        url = "http://api.wunderground.com/api/" + api_key + "/" + date + "/q/pws:" + station_id + ".json"
        print(url)
        
        api_result = call_api(url)
        
        num_exec = num_exec + 1
        print(num_exec)
        
        try:
            observations = parse_url(api_result)
            df_day = structure_dataframe(station_id, observations) 
            save_file(df_day, "../output/180418-full_run.csv")
        except:
            pass
        

14:41:45.788908
KMTVICTO9
http://api.wunderground.com/api/4be8042f62a3719a/history_20170701/q/pws:KMTVICTO9.json
1
14:41:53.210124
KMTVICTO9
http://api.wunderground.com/api/4be8042f62a3719a/history_20170702/q/pws:KMTVICTO9.json
2
14:42:00.567892
KMTVICTO9
http://api.wunderground.com/api/4be8042f62a3719a/history_20170703/q/pws:KMTVICTO9.json
3
14:42:07.859509
KMTVICTO9
http://api.wunderground.com/api/4be8042f62a3719a/history_20170704/q/pws:KMTVICTO9.json
4
14:42:14.868249
KMTVICTO9
http://api.wunderground.com/api/4be8042f62a3719a/history_20170705/q/pws:KMTVICTO9.json
5
14:42:21.943834
KMTVICTO9
http://api.wunderground.com/api/4be8042f62a3719a/history_20170706/q/pws:KMTVICTO9.json
6
14:42:29.018691
KMTVICTO9
http://api.wunderground.com/api/4be8042f62a3719a/history_20170707/q/pws:KMTVICTO9.json
7
14:42:36.269890
KMTVICTO9
http://api.wunderground.com/api/4be8042f62a3719a/history_20170708/q/pws:KMTVICTO9.json
8
14:42:43.485532
KMTVICTO9
http://api.wunderground.com/api/4be8042f62a3719a/histo

71
14:50:23.430841
KMTVICTO9
http://api.wunderground.com/api/4be8042f62a3719a/history_20170910/q/pws:KMTVICTO9.json
72
14:50:30.963921
KMTVICTO9
http://api.wunderground.com/api/4be8042f62a3719a/history_20170911/q/pws:KMTVICTO9.json
73
14:50:38.130955
KMTVICTO9
http://api.wunderground.com/api/4be8042f62a3719a/history_20170912/q/pws:KMTVICTO9.json
74
14:50:45.500149
KMTVICTO9
http://api.wunderground.com/api/4be8042f62a3719a/history_20170913/q/pws:KMTVICTO9.json
75
14:50:53.497237
KMTVICTO9
http://api.wunderground.com/api/4be8042f62a3719a/history_20170914/q/pws:KMTVICTO9.json
76
14:51:00.574487
KMTVICTO9
http://api.wunderground.com/api/4be8042f62a3719a/history_20170915/q/pws:KMTVICTO9.json
77
14:51:08.258485
KMTVICTO9
http://api.wunderground.com/api/4be8042f62a3719a/history_20170916/q/pws:KMTVICTO9.json
78
14:51:15.249084
KMTVICTO9
http://api.wunderground.com/api/4be8042f62a3719a/history_20170917/q/pws:KMTVICTO9.json
79
14:51:22.238544
KMTVICTO9
http://api.wunderground.com/api/4be8042f62a

In [358]:
df_day.tail()

Unnamed: 0_level_0,station_id,dewpti,dewptm,heatindexi,heatindexm,hum,precip_ratei,precip_ratem,precip_totali,precip_totalm,...,tempi,tempm,wdird,wdire,wgusti,wgustm,windchilli,windchillm,wspdi,wspdm
utc_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-07-16 05:34:00,KMTHAMIL13,58.0,14.4,-9999,-9999,90,0.0,0.0,0.04,1.0,...,61.7,16.5,203,SSW,-999.0,-1607.4,-999,-999,0.0,0.0
2017-07-16 05:40:00,KMTHAMIL13,58.0,14.4,-9999,-9999,90,0.0,0.0,0.04,1.0,...,61.5,16.4,248,WSW,-999.0,-1607.4,-999,-999,0.0,0.0
2017-07-16 05:46:00,KMTHAMIL13,58.0,14.4,-9999,-9999,90,0.0,0.0,0.04,1.0,...,61.5,16.4,135,SE,-999.0,-1607.4,-999,-999,0.0,0.0
2017-07-16 05:51:00,KMTHAMIL13,58.0,14.4,-9999,-9999,90,0.0,0.0,0.04,1.0,...,61.1,16.2,270,West,-999.0,-1607.4,-999,-999,0.0,0.0
2017-07-16 05:56:00,KMTHAMIL13,57.0,13.9,-9999,-9999,91,0.0,0.0,0.04,1.0,...,60.5,15.8,180,South,-999.0,-1607.4,-999,-999,0.0,0.0


In [253]:
station_len = len(station_list)
dates_len = len(dates)

print(station_len, dates_len)
print(station_len*dates_len)

42 123
5166


In [246]:
df_day.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 251 entries, 2017-07-01 06:00:00 to 2017-07-02 05:56:00
Data columns (total 22 columns):
station_id       251 non-null object
dewpti           251 non-null object
dewptm           251 non-null object
heatindexi       251 non-null object
heatindexm       251 non-null object
hum              251 non-null object
precip_ratei     251 non-null object
precip_ratem     251 non-null object
precip_totali    251 non-null object
precip_totalm    251 non-null object
pressurei        251 non-null object
pressurem        251 non-null object
tempi            251 non-null object
tempm            251 non-null object
wdird            251 non-null object
wdire            251 non-null object
wgusti           251 non-null object
wgustm           251 non-null object
windchilli       251 non-null object
windchillm       251 non-null object
wspdi            251 non-null object
wspdm            251 non-null object
dtypes: object(22)
memory usage: 45.1+ KB


In [247]:
df_day.head()

Unnamed: 0_level_0,station_id,dewpti,dewptm,heatindexi,heatindexm,hum,precip_ratei,precip_ratem,precip_totali,precip_totalm,...,tempi,tempm,wdird,wdire,wgusti,wgustm,windchilli,windchillm,wspdi,wspdm
utc_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-07-01 06:00:00,KMTCORVA12,47.0,8.3,-9999,-9999,57,0.0,0.0,0.0,0.0,...,62.3,16.8,0,North,-999.0,-1607.4,-999,-999,1.5,2.4
2017-07-01 06:06:00,KMTCORVA12,46.8,8.2,-9999,-9999,56,0.0,0.0,0.0,0.0,...,62.7,17.1,225,SW,-999.0,-1607.4,-999,-999,0.2,0.3
2017-07-01 06:11:00,KMTCORVA12,47.0,8.3,-9999,-9999,57,0.0,0.0,0.0,0.0,...,62.3,16.8,45,NE,-999.0,-1607.4,-999,-999,1.2,1.9
2017-07-01 06:16:00,KMTCORVA12,47.2,8.4,-9999,-9999,58,0.0,0.0,0.0,0.0,...,62.0,16.7,45,NE,-999.0,-1607.4,-999,-999,2.6,4.2
2017-07-01 06:21:00,KMTCORVA12,47.0,8.3,-9999,-9999,58,0.0,0.0,0.0,0.0,...,61.8,16.6,45,NE,-999.0,-1607.4,-999,-999,1.7,2.7


In [512]:
# http://api.wunderground.com/api/4be8042f62a3719a/history_20170828/q/pws:KMTSTEVE8.json

url = ""

for station_id in station_list[40:41]:
    for date in dates[58:]:
        url = "http://api.wunderground.com/api/" + api_key + "/" + date + "/q/pws:" + station_id + ".json"
        print(url)




http://api.wunderground.com/api/4be8042f62a3719a/history_20170828/q/pws:KMTSTEVE8.json


In [505]:
# http://api.wunderground.com/api/4be8042f62a3719a/history_20170828/q/pws:KMTSTEVE8.json
for station_id in station_list[:]:
       print(station_id)

KMTCORVA9
KMTCORVA10
KMTCORVA23
KMTCORVA12
KMTCORVA15
KMTCORVA21
KMTFLORE12
KMTFLORE4
KMTFLORE5
KMTFLORE7
KMTFLORE2
KMTFLORE1
KMTFLORE17
KMTFLORE18
KMTFLORE13
KMTFLORE11
KMTFLORE3
KMTHAMIL12
MK6S5
M6S59
KMTHAMIL16
KMTHAMIL1
KMTHAMIL13
KMTHAMIL7
KMTHAMIL4
KMTLEISU2
KMTLOLO3
KMTLOLO7
KMTLOLO8
KMTMISSO44
KMTMISSO62
KMTMISSO50
KMTMISSO59
KMTMISSO41
KMTMISSO60
KMTMISSO15
KMTMISSO74
KMTSTEVE12
KMTSTEVE16
KMTSTEVE13
KMTSTEVE8
KMTVICTO9


In [400]:
len(station_list)

42

In [None]:
# last line of full_run.csv
# 2017-07-16 05:56:00,KMTHAMIL13

url = "http://api.wunderground.com/api/" + api_key + "/" + date + "/q/pws:" + station_id + ".json"