#### *Collect sunset, sunrise times for wunderground*

### Import libraries

In [177]:
# data structure
import pandas as pd
# time series
from datetime import timedelta, date, datetime
# api calling
from urllib.request import urlopen
# read json
import json
# time execution
import time
# save results
import os.path

### Load station location data

In [178]:
# file generated through Wunderground geolookup
filename = '../data/geolookup-mso_stev_ham.csv'
geolookup = pd.read_csv(filename)

### View location data

In [179]:
geolookup.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 42 entries, 0 to 41
Data columns (total 9 columns):
city            42 non-null object
country         42 non-null object
distance_km     42 non-null int64
distance_mi     42 non-null int64
id              42 non-null object
lat             42 non-null float64
lon             42 non-null float64
neighborhood    40 non-null object
state           42 non-null object
dtypes: float64(2), int64(2), object(5)
memory usage: 3.0+ KB


In [180]:
geolookup.head(3)

Unnamed: 0,city,country,distance_km,distance_mi,id,lat,lon,neighborhood,state
0,Corvallis,US,9,5,KMTCORVA9,46.277557,-114.045059,Hamilton Heights,MT
1,Corvallis,US,5,3,KMTCORVA10,46.284927,-114.106499,All View Lane,MT
2,Corvallis,US,6,3,KMTCORVA23,46.273411,-114.086159,corvallis orchard,MT


In [181]:
geolookup = geolookup.set_index('id')

### Show collected wunderground data

In [182]:
filename = '../data/wunderground-170701_171101-day_night.csv'
wund_weather = pd.read_csv(filename)

In [183]:
collected = wund_weather.station_id.unique().tolist()

In [184]:
print(collected)

['KMTCORVA9', 'KMTCORVA10', 'KMTCORVA12', 'KMTCORVA15', 'KMTFLORE12', 'KMTFLORE4', 'KMTFLORE5', 'KMTFLORE7', 'KMTFLORE2', 'KMTFLORE17', 'KMTFLORE18', 'KMTFLORE13', 'KMTFLORE11', 'KMTFLORE3', 'KMTHAMIL12', 'KMTHAMIL16', 'KMTHAMIL1', 'KMTHAMIL13', 'KMTHAMIL7', 'KMTHAMIL4', 'KMTLEISU2', 'KMTLOLO3', 'KMTLOLO7', 'KMTMISSO44', 'KMTMISSO50', 'KMTMISSO59', 'KMTMISSO41', 'KMTMISSO60', 'KMTMISSO15', 'KMTSTEVE12', 'KMTSTEVE13', 'KMTSTEVE8', 'KMTVICTO9']


### Build date array

In [185]:
def daterange(start_date, end_date):
    for n in range(int ((end_date - start_date).days)):
        yield start_date + timedelta(n)

# Initialize date range start at 6/30 to compensate for UTC
start_date = date(2017, 6, 30)
end_date = date(2017, 11, 1)

dates = []

for single_date in daterange(start_date, end_date):
        dates.append(single_date.strftime("%Y-%m-%d"))

In [187]:
print('Start date:', dates[:4])
print('End date  :', dates[-4:])

Start date: ['2017-06-30', '2017-07-01', '2017-07-02', '2017-07-03']
End date  : ['2017-10-28', '2017-10-29', '2017-10-30', '2017-10-31']


### Build API query array

### Build station geolocation array

In [188]:
# restructure station location dictionary
stations_geocode = []
for station in collected:
    new_element = {'station': [], 'lat': [], 'lon': []}
    #print(station + ',' + str(geolookup.loc[station, 'lat']) + 
    #     ',' + str(geolookup.loc[station, 'lon']))
    new_element['station'] = station
    new_element['lat'] = str(geolookup.loc[station, 'lat'])
    new_element['lon'] = str(geolookup.loc[station, 'lon'])
    stations_geocode.append(new_element)

In [189]:
# build station location DataFrame
stations = pd.DataFrame(stations_geocode)
stations = stations[['station', 'lat', 'lon']]

In [190]:
stations.head(2)

Unnamed: 0,station,lat,lon
0,KMTCORVA9,46.277557,-114.045059
1,KMTCORVA10,46.284927,-114.106499


### Build API query array

In [191]:
def build_url_df(stations, dates): 
    url_station = []
    # loop through stations in weather_monitoring-locations.csv
    for index, row in stations.iterrows():
        # loop through date range set above
        for date in dates:
            row_id = row['station']
            lat = row['lat']
            lon = row['lon']
            row_url = 'https://api.sunrise-sunset.org/json?lat=' + str(lat) + '&lng=' + str(lon) + '&date=' + date + '&formatted=0'
            url_station.append(dict({'station_id': row_id, 'url': row_url, 'date': date}))
    return(url_station)

In [192]:
# Generate url array for API calls
url_batch = build_url_df(stations, dates)

In [193]:
print('Start:')
print(url_batch[0:1])
print('End:')
print(url_batch[-1:])

Start:
[{'station_id': 'KMTCORVA9', 'url': 'https://api.sunrise-sunset.org/json?lat=46.277557&lng=-114.04505900000001&date=2017-06-30&formatted=0', 'date': '2017-06-30'}]
End:
[{'station_id': 'KMTVICTO9', 'url': 'https://api.sunrise-sunset.org/json?lat=46.364567&lng=-114.242287&date=2017-10-31&formatted=0', 'date': '2017-10-31'}]


In [194]:
len(url_batch)

4092

### Call API and store results

In [None]:
def api_call(url):
    f = urlopen(url)
    json_string = f.read()
    parsed_json = json.loads(json_string)
    
    return parsed_json['results']

In [None]:
def save_results(df_day, filepath):
    if os.path.isfile(filepath):
        with open(filepath, 'a') as f:
            df_day.to_csv(f, header=False, index=False)
    else:
        df_day.to_csv(filepath, index=False)

In [None]:
%%timeit
successfull_calls = 0

# Loop over url_batch to call api and store results
for url in url_batch:
    # sleep to limit API call speed
    #time.sleep(3)
    
    print(url['station_id'] + '\n' + url['url'])
    result = api_call(url['url'])
    
    # include station_id
    result['station_id'] = url['station_id']
    
    # include date 
    result['date'] = url['date']
    
    df_day = pd.DataFrame.from_dict([result])

    # uncomment to save
    #save_results(df_day, '../data/sunrise_sunset-wunderground-utc.csv')
    
    successfull_calls += 1
    print('successfull_calls: ' + str(successfull_calls))