In [82]:
import numpy as np
import pandas as pd
from bs4 import BeautifulSoup
import requests
import datetime

In [83]:
def get_raw_forecasts(day):
    '''
    Get raw text from weather.com's 10 day forecast, for the day passed into the function.
    Day = 1 will be today.
    
    Input: int
    Day between 1 and 15, representing the day of the desired forecast.
    
    Output: list of strings
    Raw text for the day chosen, in the following order:
        [daymonth date, weather forecast, hi/lo temp, % chance precipitation, 
         wind speed and direction, humidity]
    '''
    if day > 15 or type(day) != int or day <= 0:
        raise ValueError('the variable day must be an integer between 1 and 15')
    page_link = 'https://weather.com/weather/tenday/l/USWA0413:1:US'
    page_response = requests.get(page_link, timeout=5)
    page_content = BeautifulSoup(page_response.content, "html.parser")
    
    web_predictions = []
    for i in range(0,105):
        predictions = page_content.find_all('td')[i].text
        web_predictions.append(predictions)
    return web_predictions[(day*7 - 6):(day*7)]

In [84]:
get_raw_forecasts(1)

['TodayNOV 28', 'Snow Showers', '35°29°', '80%', 'ESE 5 mph ', '95%']

In [85]:
get_raw_forecasts(15)

['WedDEC 12', 'Snow Showers', '30°26°', '50%', 'SSE 4 mph ', '93%']

In [86]:
get_raw_forecasts(10)

['FriDEC 7', 'AM Snow Showers', '25°20°', '40%', 'ESE 5 mph ', '74%']

In [87]:
forecasts = []
for day in range(1,16):
    forecasts.append(get_raw_forecasts(day))

In [88]:
forecasts

[['TodayNOV 28', 'Snow Showers', '35°29°', '80%', 'ESE 5 mph ', '95%'],
 ['Thu\nNOV 29', 'Mostly Cloudy', '36°28°', '10%', 'NE 2 mph ', '92%'],
 ['FriNOV 30', 'Snow Showers', '31°24°', '60%', 'WNW 3 mph ', '95%'],
 ['SatDEC 1', 'Snow Showers', '30°18°', '40%', 'N 3 mph ', '89%'],
 ['Sun\nDEC 2', 'Partly Cloudy', '29°14°', '20%', 'NNW 3 mph ', '89%'],
 ['Mon\nDEC 3', 'Partly Cloudy', '27°16°', '10%', 'E 5 mph ', '84%'],
 ['Tue\nDEC 4', 'Partly Cloudy', '25°16°', '10%', 'E 7 mph ', '77%'],
 ['WedDEC 5', 'Sunny', '25°15°', '0%', 'E 6 mph ', '71%'],
 ['ThuDEC 6', 'Sunny', '25°15°', '10%', 'ESE 4 mph ', '69%'],
 ['FriDEC 7', 'AM Snow Showers', '25°20°', '40%', 'ESE 5 mph ', '74%'],
 ['SatDEC 8', 'Snow Showers', '27°24°', '50%', 'ESE 6 mph ', '81%'],
 ['SunDEC 9', 'Snow Showers', '29°25°', '50%', 'SE 6 mph ', '87%'],
 ['MonDEC 10', 'Snow Showers', '30°25°', '50%', 'SE 5 mph ', '91%'],
 ['TueDEC 11', 'Snow Showers', '30°26°', '50%', 'SW 4 mph ', '93%'],
 ['WedDEC 12', 'Snow Showers', '30°26°'

In [89]:
todays_date = datetime.datetime.now().date()
index = pd.date_range(todays_date, periods=15, freq='D')

columns = ['date','weather','temp', 'precipitation', 'wind', 'humidity']

In [90]:
forecasts_df = pd.DataFrame(forecasts, index=index, columns=columns)

In [91]:
forecasts_df

Unnamed: 0,date,weather,temp,precipitation,wind,humidity
2018-11-28,TodayNOV 28,Snow Showers,35°29°,80%,ESE 5 mph,95%
2018-11-29,Thu\nNOV 29,Mostly Cloudy,36°28°,10%,NE 2 mph,92%
2018-11-30,FriNOV 30,Snow Showers,31°24°,60%,WNW 3 mph,95%
2018-12-01,SatDEC 1,Snow Showers,30°18°,40%,N 3 mph,89%
2018-12-02,Sun\nDEC 2,Partly Cloudy,29°14°,20%,NNW 3 mph,89%
2018-12-03,Mon\nDEC 3,Partly Cloudy,27°16°,10%,E 5 mph,84%
2018-12-04,Tue\nDEC 4,Partly Cloudy,25°16°,10%,E 7 mph,77%
2018-12-05,WedDEC 5,Sunny,25°15°,0%,E 6 mph,71%
2018-12-06,ThuDEC 6,Sunny,25°15°,10%,ESE 4 mph,69%
2018-12-07,FriDEC 7,AM Snow Showers,25°20°,40%,ESE 5 mph,74%


In [92]:
forecasts_df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 15 entries, 2018-11-28 to 2018-12-12
Freq: D
Data columns (total 6 columns):
date             15 non-null object
weather          15 non-null object
temp             15 non-null object
precipitation    15 non-null object
wind             15 non-null object
humidity         15 non-null object
dtypes: object(6)
memory usage: 840.0+ bytes


In [93]:
def get_hi_temperature(temp_string):
    '''
    Take the hi/lo string from weather.com and convert to an integer of the hi.
    Input: string
    Output: int
    '''
    numbers = [str(num) for num in range(0,10)]
    temp = ''
    for char in temp_string:
        if char in numbers:
            temp += char
        else:
            break
    return int(temp)

In [94]:
forecasts_df['high'] = forecasts_df['temp'].map(get_hi_temperature)

In [95]:
forecasts_df

Unnamed: 0,date,weather,temp,precipitation,wind,humidity,high
2018-11-28,TodayNOV 28,Snow Showers,35°29°,80%,ESE 5 mph,95%,35
2018-11-29,Thu\nNOV 29,Mostly Cloudy,36°28°,10%,NE 2 mph,92%,36
2018-11-30,FriNOV 30,Snow Showers,31°24°,60%,WNW 3 mph,95%,31
2018-12-01,SatDEC 1,Snow Showers,30°18°,40%,N 3 mph,89%,30
2018-12-02,Sun\nDEC 2,Partly Cloudy,29°14°,20%,NNW 3 mph,89%,29
2018-12-03,Mon\nDEC 3,Partly Cloudy,27°16°,10%,E 5 mph,84%,27
2018-12-04,Tue\nDEC 4,Partly Cloudy,25°16°,10%,E 7 mph,77%,25
2018-12-05,WedDEC 5,Sunny,25°15°,0%,E 6 mph,71%,25
2018-12-06,ThuDEC 6,Sunny,25°15°,10%,ESE 4 mph,69%,25
2018-12-07,FriDEC 7,AM Snow Showers,25°20°,40%,ESE 5 mph,74%,25


In [96]:
def get_low_temperature(temp_string):
    '''
    Take the hi/lo string from weather.com and convert to an integer of the
    low temperature.
    Input: string
    Output: int
    '''
    numbers = [str(num) for num in range(0,10)]
    temp = ''
    hilo = temp_string
    for char in reversed(hilo[:-1]):
        if char in numbers:
            temp = char + temp
        else:
            break
    return int(temp)

In [97]:
forecasts_df['low'] = forecasts_df['temp'].map(get_low_temperature)

In [98]:
forecasts_df

Unnamed: 0,date,weather,temp,precipitation,wind,humidity,high,low
2018-11-28,TodayNOV 28,Snow Showers,35°29°,80%,ESE 5 mph,95%,35,29
2018-11-29,Thu\nNOV 29,Mostly Cloudy,36°28°,10%,NE 2 mph,92%,36,28
2018-11-30,FriNOV 30,Snow Showers,31°24°,60%,WNW 3 mph,95%,31,24
2018-12-01,SatDEC 1,Snow Showers,30°18°,40%,N 3 mph,89%,30,18
2018-12-02,Sun\nDEC 2,Partly Cloudy,29°14°,20%,NNW 3 mph,89%,29,14
2018-12-03,Mon\nDEC 3,Partly Cloudy,27°16°,10%,E 5 mph,84%,27,16
2018-12-04,Tue\nDEC 4,Partly Cloudy,25°16°,10%,E 7 mph,77%,25,16
2018-12-05,WedDEC 5,Sunny,25°15°,0%,E 6 mph,71%,25,15
2018-12-06,ThuDEC 6,Sunny,25°15°,10%,ESE 4 mph,69%,25,15
2018-12-07,FriDEC 7,AM Snow Showers,25°20°,40%,ESE 5 mph,74%,25,20


In [99]:
forecasts_df['average'] = ((forecasts_df['high'] + forecasts_df['low']) / 2)

In [100]:
forecasts_df

Unnamed: 0,date,weather,temp,precipitation,wind,humidity,high,low,average
2018-11-28,TodayNOV 28,Snow Showers,35°29°,80%,ESE 5 mph,95%,35,29,32.0
2018-11-29,Thu\nNOV 29,Mostly Cloudy,36°28°,10%,NE 2 mph,92%,36,28,32.0
2018-11-30,FriNOV 30,Snow Showers,31°24°,60%,WNW 3 mph,95%,31,24,27.5
2018-12-01,SatDEC 1,Snow Showers,30°18°,40%,N 3 mph,89%,30,18,24.0
2018-12-02,Sun\nDEC 2,Partly Cloudy,29°14°,20%,NNW 3 mph,89%,29,14,21.5
2018-12-03,Mon\nDEC 3,Partly Cloudy,27°16°,10%,E 5 mph,84%,27,16,21.5
2018-12-04,Tue\nDEC 4,Partly Cloudy,25°16°,10%,E 7 mph,77%,25,16,20.5
2018-12-05,WedDEC 5,Sunny,25°15°,0%,E 6 mph,71%,25,15,20.0
2018-12-06,ThuDEC 6,Sunny,25°15°,10%,ESE 4 mph,69%,25,15,20.0
2018-12-07,FriDEC 7,AM Snow Showers,25°20°,40%,ESE 5 mph,74%,25,20,22.5


In [102]:
forecasts_df = forecasts_df.drop(['temp', 'high', 'low'], axis=1)

In [103]:
forecasts_df

Unnamed: 0,date,weather,precipitation,wind,humidity,average
2018-11-28,TodayNOV 28,Snow Showers,80%,ESE 5 mph,95%,32.0
2018-11-29,Thu\nNOV 29,Mostly Cloudy,10%,NE 2 mph,92%,32.0
2018-11-30,FriNOV 30,Snow Showers,60%,WNW 3 mph,95%,27.5
2018-12-01,SatDEC 1,Snow Showers,40%,N 3 mph,89%,24.0
2018-12-02,Sun\nDEC 2,Partly Cloudy,20%,NNW 3 mph,89%,21.5
2018-12-03,Mon\nDEC 3,Partly Cloudy,10%,E 5 mph,84%,21.5
2018-12-04,Tue\nDEC 4,Partly Cloudy,10%,E 7 mph,77%,20.5
2018-12-05,WedDEC 5,Sunny,0%,E 6 mph,71%,20.0
2018-12-06,ThuDEC 6,Sunny,10%,ESE 4 mph,69%,20.0
2018-12-07,FriDEC 7,AM Snow Showers,40%,ESE 5 mph,74%,22.5


In [104]:
forecasts_df.rename(columns={'average':'temp'}, inplace=True)

In [105]:
forecasts_df

Unnamed: 0,date,weather,precipitation,wind,humidity,temp
2018-11-28,TodayNOV 28,Snow Showers,80%,ESE 5 mph,95%,32.0
2018-11-29,Thu\nNOV 29,Mostly Cloudy,10%,NE 2 mph,92%,32.0
2018-11-30,FriNOV 30,Snow Showers,60%,WNW 3 mph,95%,27.5
2018-12-01,SatDEC 1,Snow Showers,40%,N 3 mph,89%,24.0
2018-12-02,Sun\nDEC 2,Partly Cloudy,20%,NNW 3 mph,89%,21.5
2018-12-03,Mon\nDEC 3,Partly Cloudy,10%,E 5 mph,84%,21.5
2018-12-04,Tue\nDEC 4,Partly Cloudy,10%,E 7 mph,77%,20.5
2018-12-05,WedDEC 5,Sunny,0%,E 6 mph,71%,20.0
2018-12-06,ThuDEC 6,Sunny,10%,ESE 4 mph,69%,20.0
2018-12-07,FriDEC 7,AM Snow Showers,40%,ESE 5 mph,74%,22.5


Re-use get_hi_temperature function to get just the integer from precipitation

In [107]:
forecasts_df['precip'] = forecasts_df['precipitation'].map(get_hi_temperature)

In [108]:
forecasts_df

Unnamed: 0,date,weather,precipitation,wind,humidity,temp,precip
2018-11-28,TodayNOV 28,Snow Showers,80%,ESE 5 mph,95%,32.0,80
2018-11-29,Thu\nNOV 29,Mostly Cloudy,10%,NE 2 mph,92%,32.0,10
2018-11-30,FriNOV 30,Snow Showers,60%,WNW 3 mph,95%,27.5,60
2018-12-01,SatDEC 1,Snow Showers,40%,N 3 mph,89%,24.0,40
2018-12-02,Sun\nDEC 2,Partly Cloudy,20%,NNW 3 mph,89%,21.5,20
2018-12-03,Mon\nDEC 3,Partly Cloudy,10%,E 5 mph,84%,21.5,10
2018-12-04,Tue\nDEC 4,Partly Cloudy,10%,E 7 mph,77%,20.5,10
2018-12-05,WedDEC 5,Sunny,0%,E 6 mph,71%,20.0,0
2018-12-06,ThuDEC 6,Sunny,10%,ESE 4 mph,69%,20.0,10
2018-12-07,FriDEC 7,AM Snow Showers,40%,ESE 5 mph,74%,22.5,40


In [109]:
forecasts_df['is_precipitating'] = forecasts_df['precip'].apply(lambda x: 1 if (x >= 30) else 0)

In [110]:
forecasts_df

Unnamed: 0,date,weather,precipitation,wind,humidity,temp,precip,is_precipitating
2018-11-28,TodayNOV 28,Snow Showers,80%,ESE 5 mph,95%,32.0,80,1
2018-11-29,Thu\nNOV 29,Mostly Cloudy,10%,NE 2 mph,92%,32.0,10,0
2018-11-30,FriNOV 30,Snow Showers,60%,WNW 3 mph,95%,27.5,60,1
2018-12-01,SatDEC 1,Snow Showers,40%,N 3 mph,89%,24.0,40,1
2018-12-02,Sun\nDEC 2,Partly Cloudy,20%,NNW 3 mph,89%,21.5,20,0
2018-12-03,Mon\nDEC 3,Partly Cloudy,10%,E 5 mph,84%,21.5,10,0
2018-12-04,Tue\nDEC 4,Partly Cloudy,10%,E 7 mph,77%,20.5,10,0
2018-12-05,WedDEC 5,Sunny,0%,E 6 mph,71%,20.0,0,0
2018-12-06,ThuDEC 6,Sunny,10%,ESE 4 mph,69%,20.0,10,0
2018-12-07,FriDEC 7,AM Snow Showers,40%,ESE 5 mph,74%,22.5,40,1


In [111]:
forecasts_df = forecasts_df.drop(['precipitation', 'precip'], axis=1)

In [112]:
forecasts_df

Unnamed: 0,date,weather,wind,humidity,temp,is_precipitating
2018-11-28,TodayNOV 28,Snow Showers,ESE 5 mph,95%,32.0,1
2018-11-29,Thu\nNOV 29,Mostly Cloudy,NE 2 mph,92%,32.0,0
2018-11-30,FriNOV 30,Snow Showers,WNW 3 mph,95%,27.5,1
2018-12-01,SatDEC 1,Snow Showers,N 3 mph,89%,24.0,1
2018-12-02,Sun\nDEC 2,Partly Cloudy,NNW 3 mph,89%,21.5,0
2018-12-03,Mon\nDEC 3,Partly Cloudy,E 5 mph,84%,21.5,0
2018-12-04,Tue\nDEC 4,Partly Cloudy,E 7 mph,77%,20.5,0
2018-12-05,WedDEC 5,Sunny,E 6 mph,71%,20.0,0
2018-12-06,ThuDEC 6,Sunny,ESE 4 mph,69%,20.0,0
2018-12-07,FriDEC 7,AM Snow Showers,ESE 5 mph,74%,22.5,1


In [113]:
forecasts_df.rename(columns={'is_precipitating':'precipitation'}, inplace=True)

In [114]:
forecasts_df

Unnamed: 0,date,weather,wind,humidity,temp,precipitation
2018-11-28,TodayNOV 28,Snow Showers,ESE 5 mph,95%,32.0,1
2018-11-29,Thu\nNOV 29,Mostly Cloudy,NE 2 mph,92%,32.0,0
2018-11-30,FriNOV 30,Snow Showers,WNW 3 mph,95%,27.5,1
2018-12-01,SatDEC 1,Snow Showers,N 3 mph,89%,24.0,1
2018-12-02,Sun\nDEC 2,Partly Cloudy,NNW 3 mph,89%,21.5,0
2018-12-03,Mon\nDEC 3,Partly Cloudy,E 5 mph,84%,21.5,0
2018-12-04,Tue\nDEC 4,Partly Cloudy,E 7 mph,77%,20.5,0
2018-12-05,WedDEC 5,Sunny,E 6 mph,71%,20.0,0
2018-12-06,ThuDEC 6,Sunny,ESE 4 mph,69%,20.0,0
2018-12-07,FriDEC 7,AM Snow Showers,ESE 5 mph,74%,22.5,1


In [117]:
def get_wind_speed(temp_string):
    '''
    Take the wind string from weather.com and convert to an integer of the wind speed.
    Input: string
    Output: int
    '''
    numbers = [str(num) for num in range(0,10)]
    temp = ''
    for char in temp_string:
        if char in numbers:
            temp += char
    return int(temp)

In [118]:
forecasts_df['wind_int'] = forecasts_df['wind'].map(get_wind_speed)

In [120]:
forecasts_df['windy'] = forecasts_df['wind_int'].apply(lambda x: 1 if (x >= 10) else 0)

In [121]:
forecasts_df

Unnamed: 0,date,weather,wind,humidity,temp,precipitation,wind_int,windy
2018-11-28,TodayNOV 28,Snow Showers,ESE 5 mph,95%,32.0,1,5,0
2018-11-29,Thu\nNOV 29,Mostly Cloudy,NE 2 mph,92%,32.0,0,2,0
2018-11-30,FriNOV 30,Snow Showers,WNW 3 mph,95%,27.5,1,3,0
2018-12-01,SatDEC 1,Snow Showers,N 3 mph,89%,24.0,1,3,0
2018-12-02,Sun\nDEC 2,Partly Cloudy,NNW 3 mph,89%,21.5,0,3,0
2018-12-03,Mon\nDEC 3,Partly Cloudy,E 5 mph,84%,21.5,0,5,0
2018-12-04,Tue\nDEC 4,Partly Cloudy,E 7 mph,77%,20.5,0,7,0
2018-12-05,WedDEC 5,Sunny,E 6 mph,71%,20.0,0,6,0
2018-12-06,ThuDEC 6,Sunny,ESE 4 mph,69%,20.0,0,4,0
2018-12-07,FriDEC 7,AM Snow Showers,ESE 5 mph,74%,22.5,1,5,0


In [122]:
forecasts_df = forecasts_df.drop(['wind', 'wind_int'], axis=1)

In [123]:
forecasts_df

Unnamed: 0,date,weather,humidity,temp,precipitation,windy
2018-11-28,TodayNOV 28,Snow Showers,95%,32.0,1,0
2018-11-29,Thu\nNOV 29,Mostly Cloudy,92%,32.0,0,0
2018-11-30,FriNOV 30,Snow Showers,95%,27.5,1,0
2018-12-01,SatDEC 1,Snow Showers,89%,24.0,1,0
2018-12-02,Sun\nDEC 2,Partly Cloudy,89%,21.5,0,0
2018-12-03,Mon\nDEC 3,Partly Cloudy,84%,21.5,0,0
2018-12-04,Tue\nDEC 4,Partly Cloudy,77%,20.5,0,0
2018-12-05,WedDEC 5,Sunny,71%,20.0,0,0
2018-12-06,ThuDEC 6,Sunny,69%,20.0,0,0
2018-12-07,FriDEC 7,AM Snow Showers,74%,22.5,1,0


In [130]:
def get_overcast(weather_string):
    """
    Take the weather string from weather.com and convert to a 1 if string contains words in overcast_list
    otherwise return a 0
    Input: string
    output: int(0 or 1)
    """
    overcast_list = ['Cloudy', 'Snow', 'Rain', 'Showers', 'Thunderstorms']
    for word in weather_string.split():
        if word in overcast_list:
            return 1
        else:
            return 0

In [131]:
forecasts_df['overcast'] = forecasts_df['weather'].map(get_overcast)

In [132]:
forecasts_df

Unnamed: 0,date,weather,humidity,temp,precipitation,windy,overcast
2018-11-28,TodayNOV 28,Snow Showers,95%,32.0,1,0,1
2018-11-29,Thu\nNOV 29,Mostly Cloudy,92%,32.0,0,0,0
2018-11-30,FriNOV 30,Snow Showers,95%,27.5,1,0,1
2018-12-01,SatDEC 1,Snow Showers,89%,24.0,1,0,1
2018-12-02,Sun\nDEC 2,Partly Cloudy,89%,21.5,0,0,0
2018-12-03,Mon\nDEC 3,Partly Cloudy,84%,21.5,0,0,0
2018-12-04,Tue\nDEC 4,Partly Cloudy,77%,20.5,0,0,0
2018-12-05,WedDEC 5,Sunny,71%,20.0,0,0,0
2018-12-06,ThuDEC 6,Sunny,69%,20.0,0,0,0
2018-12-07,FriDEC 7,AM Snow Showers,74%,22.5,1,0,0


In [134]:
def get_poor_visibility(weather_string):
    """
    Take the weather string from weather.com and convert to a 1 if string contains words in poor_visibility_list
    otherwise return a 0
    Input: string
    output: int(0 or 1)
    """
    poor_visibility_list = ['Snow', 'Rain', 'Fog', 'Mist']
    for word in weather_string.split():
        if word in poor_visibility_list:
            return 1
        else:
            return 0

In [135]:
forecasts_df['poor_visibility'] = forecasts_df['weather'].map(get_poor_visibility)

In [136]:
forecasts_df

Unnamed: 0,date,weather,humidity,temp,precipitation,windy,overcast,poor_visibility
2018-11-28,TodayNOV 28,Snow Showers,95%,32.0,1,0,1,1
2018-11-29,Thu\nNOV 29,Mostly Cloudy,92%,32.0,0,0,0,0
2018-11-30,FriNOV 30,Snow Showers,95%,27.5,1,0,1,1
2018-12-01,SatDEC 1,Snow Showers,89%,24.0,1,0,1,1
2018-12-02,Sun\nDEC 2,Partly Cloudy,89%,21.5,0,0,0,0
2018-12-03,Mon\nDEC 3,Partly Cloudy,84%,21.5,0,0,0,0
2018-12-04,Tue\nDEC 4,Partly Cloudy,77%,20.5,0,0,0,0
2018-12-05,WedDEC 5,Sunny,71%,20.0,0,0,0,0
2018-12-06,ThuDEC 6,Sunny,69%,20.0,0,0,0,0
2018-12-07,FriDEC 7,AM Snow Showers,74%,22.5,1,0,0,0


In [137]:
forecasts_df = forecasts_df.drop(['weather', 'humidity'], axis=1)

In [138]:
forecasts_df

Unnamed: 0,date,temp,precipitation,windy,overcast,poor_visibility
2018-11-28,TodayNOV 28,32.0,1,0,1,1
2018-11-29,Thu\nNOV 29,32.0,0,0,0,0
2018-11-30,FriNOV 30,27.5,1,0,1,1
2018-12-01,SatDEC 1,24.0,1,0,1,1
2018-12-02,Sun\nDEC 2,21.5,0,0,0,0
2018-12-03,Mon\nDEC 3,21.5,0,0,0,0
2018-12-04,Tue\nDEC 4,20.5,0,0,0,0
2018-12-05,WedDEC 5,20.0,0,0,0,0
2018-12-06,ThuDEC 6,20.0,0,0,0,0
2018-12-07,FriDEC 7,22.5,1,0,0,0
