# Weather Forcast - 5 day Minneapolis
---
- Makes API call to OpenWeatherMap to retrieve 5-day forcast
- outputs csv with data

In [1]:
import pandas as pd
import numpy as np
import requests
import datetime as dt
from pprint import pprint

api_key = 'blocked'

In [2]:
# columns to get
weather_cols = ['date_time', 'clouds_all', 'temp_f', 'pressure', 'humidity', 'wind_speed', 'wind_deg']

# Empty dataframe
forcast_df = pd.DataFrame()

# Use column names defined above
forcast_df = forcast_df.reindex(columns =  weather_cols)
forcast_df

Unnamed: 0,date_time,clouds_all,temp_f,pressure,humidity,wind_speed,wind_deg


In [3]:
# base url for requests
base_url = "http://api.openweathermap.org/data/2.5/forecast?q="
units = "imperial"
city = 'minneapolis'

# Create query url + use it to get weather json for city
query_url = f"{base_url}{city}&units={units}&APPID={api_key}"
forcast_json = requests.get(query_url).json()

In [4]:
# print(query_url)

In [5]:
data = forcast_json['list']


# set up lists for column data
date_time = []
clouds_all = []
temp_f = []
pressure = []
humidity = []
wind_speed = []
wind_deg = []

# loop over all forcast jsons
for i in range(len(data)):
    
    date_time.append(data[i]['dt_txt'])
    clouds_all.append(data[i]['clouds']['all'])
    temp_f.append(data[i]['main']['temp'])
    pressure.append(data[i]['main']['pressure'])
    humidity.append(data[i]['main']['humidity'])
    wind_speed.append(data[i]['wind']['speed'])
    wind_deg.append(data[i]['wind']['deg'])


In [6]:
# set columns in df to the data retrieved from json
forcast_df['date_time'] = date_time
forcast_df['clouds_all'] = clouds_all
forcast_df['temp_f'] = temp_f
forcast_df['pressure'] = pressure
forcast_df['humidity'] = humidity
forcast_df['wind_speed'] = wind_speed
forcast_df['wind_deg'] = wind_deg

# set date_time col to type datetime
forcast_df['date_time'] = pd.to_datetime(forcast_df['date_time'])
forcast_df['date_time'] = forcast_df['date_time'] - dt.timedelta(hours=6)
forcast_df.head()

Unnamed: 0,date_time,clouds_all,temp_f,pressure,humidity,wind_speed,wind_deg
0,2019-11-18 15:00:00,91,37.81,1004,73,6.31,137
1,2019-11-18 18:00:00,91,36.01,1004,86,4.52,151
2,2019-11-18 21:00:00,100,35.33,1003,97,3.87,204
3,2019-11-19 00:00:00,100,36.36,1002,98,5.99,286
4,2019-11-19 03:00:00,100,37.54,1003,97,7.92,313


In [7]:
forcast_df['date_time'].describe()

count                      40
unique                     40
top       2019-11-22 00:00:00
freq                        1
first     2019-11-18 15:00:00
last      2019-11-23 12:00:00
Name: date_time, dtype: object

In [8]:
def expand_timestamps(df, ts_column):
    '''Function that accepts a dataframe and string for the timestamp column. It outputs
    lists for hour, day, month. Index must be continuous - gaps with throw off values.'''
    
    # empty lists for outputs
    hours, days, months = [], [], []
    
    # loop over length of timestamp columnm
    for i in range(len(df[ts_column])):
        
        # append minutes, hours, days, months
        hours.append(df[ts_column][i].hour)
        days.append(df[ts_column][i].dayofyear)
        months.append(df[ts_column][i].month)
        
    return hours, days, months

In [9]:
# add columns for hour, day, month
forcast_df['hour'], forcast_df['day_of_year'], forcast_df['month'] = \
                                                expand_timestamps(forcast_df, 'date_time')

In [10]:
def cyclical_encoding(time_data, time_type):
    '''Function to encode as cyclical a list or Pandas.Series of time elements. Accepts time_data
    and time_type ('hour', 'day_of_year', 'month') and returns list of sine and cosine coordinates
    for each time element as part of a unit circle.'''
    
    # set max_times by time type
    max_time = {
        'hour': 24,
        'month': 12,
        'day_of_year': 365
    }
    
    # compute sin and cos coordinates of unit circle for each element in time_data
    sin_time = np.sin(2 * np.pi * time_data / max_time[time_type])
    cos_time = np.cos(2 * np.pi * time_data / max_time[time_type])
    
    return sin_time, cos_time

In [11]:
# cyclical encoding for hour, day, month
forcast_df['sin_day'], forcast_df['cos_day'] = cyclical_encoding(forcast_df.day_of_year, 'day_of_year')
forcast_df['sin_hour'], forcast_df['cos_hour'] = cyclical_encoding(forcast_df.hour, 'hour')
forcast_df['sin_month'], forcast_df['cos_month'] = cyclical_encoding(forcast_df.month, 'month')

forcast_df.head()

Unnamed: 0,date_time,clouds_all,temp_f,pressure,humidity,wind_speed,wind_deg,hour,day_of_year,month,sin_day,cos_day,sin_hour,cos_hour,sin_month,cos_month
0,2019-11-18 15:00:00,91,37.81,1004,73,6.31,137,15,322,11,-0.674444,0.738326,-0.707107,-0.7071068,-0.5,0.866025
1,2019-11-18 18:00:00,91,36.01,1004,86,4.52,151,18,322,11,-0.674444,0.738326,-1.0,-1.83697e-16,-0.5,0.866025
2,2019-11-18 21:00:00,100,35.33,1003,97,3.87,204,21,322,11,-0.674444,0.738326,-0.707107,0.7071068,-0.5,0.866025
3,2019-11-19 00:00:00,100,36.36,1002,98,5.99,286,0,323,11,-0.661635,0.749826,0.0,1.0,-0.5,0.866025
4,2019-11-19 03:00:00,100,37.54,1003,97,7.92,313,3,323,11,-0.661635,0.749826,0.707107,0.7071068,-0.5,0.866025


In [12]:
def get_sec(time_str):
    """Get Seconds from time."""
    h, m, s = time_str.split(':')
    return int(h) * 3600 + int(m) * 60 + int(s)
url = "https://api.sunrise-sunset.org/json?lat=44.986656&lng=-93.258133&date="
list_daylength = []
mpls_second = []
for i in forcast_df['date_time']:
    date = i
    response = requests.get(f"{url}{date}").json()
    daylength = response['results']['day_length'] 
    list_daylength.append(daylength)
    print(f"{i} daylength is {daylength}")
    var = get_sec(daylength)
    mpls_second.append(var)
forcast_df['dl_sec'] = mpls_second

2019-11-18 15:00:00 daylength is 09:26:01
2019-11-18 18:00:00 daylength is 09:26:01
2019-11-18 21:00:00 daylength is 09:26:01
2019-11-19 00:00:00 daylength is 09:23:50
2019-11-19 03:00:00 daylength is 09:23:50
2019-11-19 06:00:00 daylength is 09:23:50
2019-11-19 09:00:00 daylength is 09:23:50
2019-11-19 12:00:00 daylength is 09:23:50
2019-11-19 15:00:00 daylength is 09:23:50
2019-11-19 18:00:00 daylength is 09:23:50
2019-11-19 21:00:00 daylength is 09:23:50
2019-11-20 00:00:00 daylength is 09:21:42
2019-11-20 03:00:00 daylength is 09:21:42
2019-11-20 06:00:00 daylength is 09:21:42
2019-11-20 09:00:00 daylength is 09:21:42
2019-11-20 12:00:00 daylength is 09:21:42
2019-11-20 15:00:00 daylength is 09:21:42
2019-11-20 18:00:00 daylength is 09:21:42
2019-11-20 21:00:00 daylength is 09:21:42
2019-11-21 00:00:00 daylength is 09:19:37
2019-11-21 03:00:00 daylength is 09:19:37
2019-11-21 06:00:00 daylength is 09:19:37
2019-11-21 09:00:00 daylength is 09:19:37
2019-11-21 12:00:00 daylength is 0

In [13]:
forcast_df.to_csv('resources/weather_forcast_5day.csv')

In [14]:
forcast_df.head()

Unnamed: 0,date_time,clouds_all,temp_f,pressure,humidity,wind_speed,wind_deg,hour,day_of_year,month,sin_day,cos_day,sin_hour,cos_hour,sin_month,cos_month,dl_sec
0,2019-11-18 15:00:00,91,37.81,1004,73,6.31,137,15,322,11,-0.674444,0.738326,-0.707107,-0.7071068,-0.5,0.866025,33961
1,2019-11-18 18:00:00,91,36.01,1004,86,4.52,151,18,322,11,-0.674444,0.738326,-1.0,-1.83697e-16,-0.5,0.866025,33961
2,2019-11-18 21:00:00,100,35.33,1003,97,3.87,204,21,322,11,-0.674444,0.738326,-0.707107,0.7071068,-0.5,0.866025,33961
3,2019-11-19 00:00:00,100,36.36,1002,98,5.99,286,0,323,11,-0.661635,0.749826,0.0,1.0,-0.5,0.866025,33830
4,2019-11-19 03:00:00,100,37.54,1003,97,7.92,313,3,323,11,-0.661635,0.749826,0.707107,0.7071068,-0.5,0.866025,33830
