In [1]:
import requests
import datetime
import time
import json
import pandas as pd
import numpy as np
import os

In [2]:
from useful_functions import *

## Query daily weather data of Chicago
Data available at [API](https://darksky.net/dev/docs)

In [3]:
# Get API key (registration needed)
with open("api_key_darksky", 'r') as f:
    api_key = f.readline().strip()

# GPS coordinates of Chicago
lat = '41.836944'
long = '-87.684722'

def get_weather_from_darksky(year, month, day):
    """
    Input:
        :int year
        :int month
        :int day
    Output:
        :dict :dictionary of weather data from web api
    """
    # Get unixtime
    dt = datetime.datetime(year, month, day)
    unixtime = str(int(time.mktime(dt.timetuple())))
    
    # Request darksky.net for data
    r = requests.get('https://api.darksky.net/forecast/'+api_key+'/'+lat+','+long+','+unixtime)

    return json.loads(r.text)    

In [4]:
# Query data for a month and save to file
def save_weather_per_month(year, month):
    """
    Input:
        :int year
        :int month
    Output:
        :void
    """
    month_dict = {}
    
    days = days_in_month(year, month)
    
    for day in range(1, days+1):
        daily_dict = get_weather_from_darksky(year, month, day)
        month_dict[str(year)+'-'+str(month)+'-'+str(day)] = daily_dict
        
    with open("weather_data/"+str(year)+'-'+str(month)+'.json', 'w') as jf:
        json.dump(month_dict, jf)

In [5]:
## Global variable

YEAR = 2013

In [6]:
## Query and save yearly weather data from darksky

for m in range(1, 13):
    print(f'Dealing with month: {m}')
    if not os.path.exists("weather_data/"+str(YEAR)+'-'+str(m)+'.json'):
        print('Collecting data from darksky')
        save_weather_per_month(YEAR, m)
    else:
        print('Previous record found.')

Dealing with month: 1
Collecting data from darksky
Dealing with month: 2
Collecting data from darksky
Dealing with month: 3
Collecting data from darksky
Dealing with month: 4
Collecting data from darksky
Dealing with month: 5
Collecting data from darksky
Dealing with month: 6
Collecting data from darksky
Dealing with month: 7
Collecting data from darksky
Dealing with month: 8
Collecting data from darksky
Dealing with month: 9
Collecting data from darksky
Dealing with month: 10
Collecting data from darksky
Dealing with month: 11
Collecting data from darksky
Dealing with month: 12
Collecting data from darksky


## Read data from saved file

In [7]:
# drop info that won't be needed
labels_to_drop = ['time', 'summary', 'temperatureMin', 'temperatureMinTime', 'temperatureMax', 'temperatureMaxTime']

# dataframe for yearly weather data
weather_df = pd.DataFrame()

# save data into dataframe
for m in range(1, 13):
    with open("weather_data/"+str(YEAR)+"-"+str(m)+".json", "r") as jf:
        data = json.load(jf)
    for d in range(1, 32):
        try:
            data_dict = data[str(YEAR)+'-'+str(m)+'-'+str(d)]['daily']['data'][0]
            data_series = pd.Series(data_dict).drop(labels=labels_to_drop)
            data_series['day'] = d
            data_series['month'] = m
            data_series['year'] = YEAR
            weather_df = weather_df.append(data_series, ignore_index=True)
        except:
            pass

In [8]:
# Post processing: fill missing data
weather_df['precipType'] = weather_df['precipType'].fillna('None')
weather_df['precipAccumulation'] = weather_df['precipAccumulation'].fillna(0)
weather_df['precipIntensityMaxTime'] = weather_df['precipIntensityMaxTime'].fillna(-1)

In [9]:
# Convert unix time stamp
def convert_unix_timestamp(unixtime):
    """
    Input:
        :float unixtime
    Output:
        :float converted time
    """
    if unixtime == -1:
        return 0
    
    dt = datetime.datetime.fromtimestamp(unixtime)
    
    return dt.hour+dt.minute/60

for tc in ['apparentTemperatureHighTime', 'apparentTemperatureLowTime', 'apparentTemperatureMaxTime', 'apparentTemperatureMinTime',
           'temperatureHighTime', 'temperatureLowTime', 'uvIndexTime', 'windGustTime', 
           'sunsetTime', 'sunriseTime', 'precipIntensityMaxTime']:
    weather_df[tc] = weather_df[tc].apply(lambda x: convert_unix_timestamp(x))

In [10]:
# Add daylight_duration column
weather_df['daylight_duration'] = weather_df['sunsetTime'] - weather_df['sunriseTime']

In [11]:
# One hot encode categorical features
weather_df = pd.get_dummies(weather_df, columns=['icon', 'precipType'])

In [12]:
# Save to csv file
weather_df = weather_df.set_index(['month', 'day', 'year'])
weather_df.to_csv('weather_data/weather_'+str(YEAR)+'_chicago.csv')

In [13]:
weather_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,apparentTemperatureHigh,apparentTemperatureHighTime,apparentTemperatureLow,apparentTemperatureLowTime,apparentTemperatureMax,apparentTemperatureMaxTime,apparentTemperatureMin,apparentTemperatureMinTime,cloudCover,dewPoint,...,icon_clear-day,icon_cloudy,icon_partly-cloudy-day,icon_rain,icon_sleet,icon_snow,precipType_None,precipType_rain,precipType_sleet,precipType_snow
month,day,year,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
1.0,1.0,2013.0,19.38,11.0,7.04,21.0,19.38,11.0,7.04,21.0,0.33,7.06,...,0,0,1,0,0,0,1,0,0,0
1.0,2.0,2013.0,22.44,13.0,13.76,19.0,22.44,13.0,9.48,7.0,0.24,8.56,...,0,0,1,0,0,0,1,0,0,0
1.0,3.0,2013.0,22.67,8.0,4.36,3.0,22.67,8.0,10.71,22.0,0.83,19.03,...,0,0,1,0,0,0,1,0,0,0
1.0,4.0,2013.0,29.0,13.0,17.13,5.0,29.0,13.0,4.36,3.0,0.0,16.85,...,1,0,0,0,0,0,1,0,0,0
1.0,5.0,2013.0,31.72,10.0,18.17,6.0,31.72,10.0,17.13,5.0,0.55,25.91,...,0,0,0,0,0,1,0,0,0,1


In [14]:
weather_df.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 365 entries, (1.0, 1.0, 2013.0) to (12.0, 31.0, 2013.0)
Data columns (total 42 columns):
apparentTemperatureHigh        365 non-null float64
apparentTemperatureHighTime    365 non-null float64
apparentTemperatureLow         365 non-null float64
apparentTemperatureLowTime     365 non-null float64
apparentTemperatureMax         365 non-null float64
apparentTemperatureMaxTime     365 non-null float64
apparentTemperatureMin         365 non-null float64
apparentTemperatureMinTime     365 non-null float64
cloudCover                     365 non-null float64
dewPoint                       365 non-null float64
humidity                       365 non-null float64
moonPhase                      365 non-null float64
precipIntensity                365 non-null float64
precipIntensityMax             365 non-null float64
precipProbability              365 non-null float64
pressure                       365 non-null float64
sunriseTime                  