# Weather Forcast - 5 day Minneapolis
---
- Makes API call to OpenWeatherMap to retrieve 5-day forcast
- outputs csv with data

In [13]:
import pandas as pd
import numpy as np
import requests

api_key = 'blocked'

In [14]:
# columns to get
weather_cols = ['date_time', 'clouds_all', 'temp_f', 'pressure', 'humidity', 'wind_speed', 'wind_deg']

# Empty dataframe
forcast_df = pd.DataFrame()

# Use column names defined above
forcast_df = forcast_df.reindex(columns =  weather_cols)
forcast_df

Unnamed: 0,date_time,clouds_all,temp_f,pressure,humidity,wind_speed,wind_deg


In [15]:
# base url for requests
base_url = "http://api.openweathermap.org/data/2.5/forecast?q="
units = "imperial"
city = 'minneapolis'

# Create query url + use it to get weather json for city
query_url = f"{base_url}{city}&units={units}&APPID={api_key}"
forcast_json = requests.get(query_url).json()

In [16]:
data = forcast_json['list']


# set up lists for column data
date_time = []
clouds_all = []
temp_f = []
pressure = []
humidity = []
wind_speed = []
wind_deg = []

# loop over all forcast jsons
for i in range(len(data)):
    
    date_time.append(data[i]['dt_txt'])
    clouds_all.append(data[i]['clouds']['all'])
    temp_f.append(data[i]['main']['temp'])
    pressure.append(data[i]['main']['pressure'])
    humidity.append(data[i]['main']['humidity'])
    wind_speed.append(data[i]['wind']['speed'])
    wind_deg.append(data[i]['wind']['deg'])


In [17]:
# set columns in df to the data retrieved from json
forcast_df['date_time'] = date_time
forcast_df['clouds_all'] = clouds_all
forcast_df['temp_f'] = temp_f
forcast_df['pressure'] = pressure
forcast_df['humidity'] = humidity
forcast_df['wind_speed'] = wind_speed
forcast_df['wind_deg'] = wind_deg

# set date_time col to type datetime
forcast_df['date_time'] = pd.to_datetime(forcast_df['date_time'])

forcast_df.head()

Unnamed: 0,date_time,clouds_all,temp_f,pressure,humidity,wind_speed,wind_deg
0,2019-11-16 21:00:00,86,42.49,1017,68,9.93,175
1,2019-11-17 00:00:00,82,38.5,1017,78,10.49,169
2,2019-11-17 03:00:00,100,38.32,1016,80,11.1,184
3,2019-11-17 06:00:00,100,37.04,1015,84,8.41,198
4,2019-11-17 09:00:00,100,35.65,1013,95,7.4,215


In [18]:
def expand_timestamps(df, ts_column):
    '''Function that accepts a dataframe and string for the timestamp column. It outputs
    lists for hour, day, month. Index must be continuous - gaps with throw off values.'''
    
    # empty lists for outputs
    hours, days, months = [], [], []
    
    # loop over length of timestamp columnm
    for i in range(len(df[ts_column])):
        
        # append minutes, hours, days, months
        hours.append(df[ts_column][i].hour)
        days.append(df[ts_column][i].dayofyear)
        months.append(df[ts_column][i].month)
        
    return hours, days, months

In [19]:
# add columns for hour, day, month
forcast_df['hour'], forcast_df['day_of_year'], forcast_df['month'] = \
                                                expand_timestamps(forcast_df, 'date_time')

In [20]:
def cyclical_encoding(time_data, time_type):
    '''Function to encode as cyclical a list or Pandas.Series of time elements. Accepts time_data
    and time_type ('hour', 'day_of_year', 'month') and returns list of sine and cosine coordinates
    for each time element as part of a unit circle.'''
    
    # set max_times by time type
    max_time = {
        'hour': 24,
        'month': 12,
        'day_of_year': 365
    }
    
    # compute sin and cos coordinates of unit circle for each element in time_data
    sin_time = np.sin(2 * np.pi * time_data / max_time[time_type])
    cos_time = np.cos(2 * np.pi * time_data / max_time[time_type])
    
    return sin_time, cos_time

In [21]:
# cyclical encoding for hour, day, month
forcast_df['sin_day'], forcast_df['cos_day'] = cyclical_encoding(forcast_df.day_of_year, 'day_of_year')
forcast_df['sin_hour'], forcast_df['cos_hour'] = cyclical_encoding(forcast_df.hour, 'hour')
forcast_df['sin_month'], forcast_df['cos_month'] = cyclical_encoding(forcast_df.month, 'month')

forcast_df.head()

Unnamed: 0,date_time,clouds_all,temp_f,pressure,humidity,wind_speed,wind_deg,hour,day_of_year,month,sin_day,cos_day,sin_hour,cos_hour,sin_month,cos_month
0,2019-11-16 21:00:00,86,42.49,1017,68,9.93,175,21,320,11,-0.699458,0.714673,-0.707107,0.7071068,-0.5,0.866025
1,2019-11-17 00:00:00,82,38.5,1017,78,10.49,169,0,321,11,-0.687053,0.726608,0.0,1.0,-0.5,0.866025
2,2019-11-17 03:00:00,100,38.32,1016,80,11.1,184,3,321,11,-0.687053,0.726608,0.707107,0.7071068,-0.5,0.866025
3,2019-11-17 06:00:00,100,37.04,1015,84,8.41,198,6,321,11,-0.687053,0.726608,1.0,6.123234000000001e-17,-0.5,0.866025
4,2019-11-17 09:00:00,100,35.65,1013,95,7.4,215,9,321,11,-0.687053,0.726608,0.707107,-0.7071068,-0.5,0.866025


In [23]:
# columns to include
cols = [
    'date_time', 'clouds_all', 'temp_f', 'pressure', 'humidity', 'wind_speed', 'wind_deg',
    'sin_hour', 'cos_hour', 'sin_day', 'cos_day'
]
forcast_df = forcast_df[cols]
forcast_df.head()

Unnamed: 0,date_time,clouds_all,temp_f,pressure,humidity,wind_speed,wind_deg,sin_hour,cos_hour,sin_day,cos_day
0,2019-11-16 21:00:00,86,42.49,1017,68,9.93,175,-0.707107,0.7071068,-0.699458,0.714673
1,2019-11-17 00:00:00,82,38.5,1017,78,10.49,169,0.0,1.0,-0.687053,0.726608
2,2019-11-17 03:00:00,100,38.32,1016,80,11.1,184,0.707107,0.7071068,-0.687053,0.726608
3,2019-11-17 06:00:00,100,37.04,1015,84,8.41,198,1.0,6.123234000000001e-17,-0.687053,0.726608
4,2019-11-17 09:00:00,100,35.65,1013,95,7.4,215,0.707107,-0.7071068,-0.687053,0.726608


In [24]:
forcast_df.to_csv('resources/weather_forcast_5day.csv')