In [161]:
import pandas as pd
import requests
import json
import progressbar
import time
from datetime import datetime
import pytz

In [193]:
def get_keys(path):
    with open(path) as f:
        return json.load(f)
keys = get_keys("../darksky.json")
api_key = keys['api_key']

In [194]:
def darksky_weather(lat, lon, start_time, days):

    # convert %d/%m/%Y %H:%M to unix time
    # input must be in format 01/01/2015 12:00 UTC
    start = int(time.mktime(time.strptime(start_time, '%d/%m/%Y %H:%M')))
    
    nyc_weather = []
    pbar = progressbar.ProgressBar()
    for date in pbar(range(start, start + days*60*60*24, 86400)):

        link = f'https://api.darksky.net/forecast/{api_key}/{lat},{lon},{date}'
        r = requests.get(link)
        skydata = r.json()
        if len(skydata) >= 8:
            for i in range(len(skydata['hourly']['data'])):
                hourly_weather = skydata['hourly']['data'][i]
                nyc_weather.append(hourly_weather)
        else:
            hourly_weather = f'No data. Status={r.status_code}'
            nyc_data.append(hourly_weather)
            
    return nyc_weather

In [136]:
# Newark Airport coordinates
lat = 40.6895
lon = -74.1745
start_time = '01/01/2015 17:00' # noon in NYC
days = 365

# Run API for 1 year
raw_weather = darksky_weather(lat, lon, start_time, days)
len(raw_weather)

100% |########################################################################|


8760

In [186]:
weather_data = pd.DataFrame(raw_weather)

keep_columns = ['time', 'icon', 'precipIntensity', 'temperature', 'windSpeed', 'visibility']
weather_data = weather_data[keep_columns]

# Clean up summary
replace_dict = {'-day':'', '-night':''}
for key, value in replace_dict.items():
    weather_data['icon'] = weather_data['icon'].apply(lambda row: str(row).replace(key, value))

# Convert F to Celcius
weather_data['temperature'] = weather_data['temperature'].apply(lambda row: (row-32)*5/9)

# Convert to local NYC time
weather_data['nyc_time'] = weather_data['time'].apply(lambda row: datetime.fromtimestamp(row, tz= pytz.timezone('America/New_York')).strftime('%d/%m/%Y %H:%M'))
weather_data.drop('time', axis=1, inplace=True)
weather_data['year'] = weather_data['nyc_time'].apply(lambda row: int(row[6:10]))
weather_data['month'] = weather_data['nyc_time'].apply(lambda row: int(row[3:5]))
weather_data['day'] = weather_data['nyc_time'].apply(lambda row: int(row[0:2]))
weather_data['hour'] = weather_data['nyc_time'].apply(lambda row: int(row[11:13]))

weather_data.head()


Unnamed: 0,icon,precipIntensity,temperature,windSpeed,visibility,nyc_time,year,month,day,hour
0,clear,0.0,-4.411111,1.66,9.997,01/01/2015 00:00,2015,1,1,0
1,clear,0.0,-4.411111,1.91,9.997,01/01/2015 01:00,2015,1,1,1
2,clear,0.0,-4.411111,2.38,9.997,01/01/2015 02:00,2015,1,1,2
3,clear,0.0,-4.988889,1.85,9.997,01/01/2015 03:00,2015,1,1,3
4,clear,0.0,-4.411111,2.77,9.997,01/01/2015 04:00,2015,1,1,4


In [187]:
weather_data.isna().sum()

icon               0
precipIntensity    0
temperature        0
windSpeed          0
visibility         0
nyc_time           0
year               0
month              0
day                0
hour               0
dtype: int64

In [191]:
weather_data['icon'].value_counts(normalize=True)

partly-cloudy    0.431279
clear            0.257991
cloudy           0.236301
rain             0.050571
fog              0.018950
snow             0.003767
sleet            0.001142
Name: icon, dtype: float64

In [195]:
# Export as CSV
weather_data.to_csv('newark_weather.csv')