## Dependencies

In [1]:
from splinter import Browser
from selenium.common.exceptions import InvalidElementStateException
from selenium.common.exceptions import ElementNotInteractableException
from bs4 import BeautifulSoup
import shutil
import time


## Variables

In [2]:
# Set the zip codes
codes = []
codes.append('27606')   # just hard code one in
code = codes[0]    # set the variable for zip_and_click
global browser

In [3]:
# Set the url of the website to be scraped
url = 'https://weather.com/'

## Functions

In [4]:
def chrome():
    '''
    Finds the chromedriver in the system and creates a Chrome browser
    '''
    executable_path = {'executable_path': shutil.which('chromedriver')}
    browser = Browser('chrome', **executable_path)
    return(browser)

In [5]:
def zip_and_click(code):
    '''
    Enter zip codes into the search bar on weather.com and click the first result.
    No return, just leaves browser at the first data page.
    '''
    import time
    
    filled = False
    clicked = False
    inputs = browser.find_by_tag('input') # get the search box reference
    search_box = inputs[0]
    
    while not filled:
        try:
            search_box.fill(code)
            filled = True
        except InvalidElementStateException:
            time.sleep(1)
    while not clicked:
        try:
            browser.click_link_by_partial_href('/weather/today/l')
            clicked = True
        except ElementNotInteractableException:
            time.sleep(1)
    return(browser)     

In [6]:
def scrape_now(browser):
    ''' Get observed weather data from the current weather page '''
    divs = browser.find_by_tag('div').first
    nowcard = divs.find_by_tag('section.today_nowcard-container').value
    nowlist = nowcard.split('\n')
    return(nowlist)

In [None]:
def wind_dict(wind_str):
    ''' handle the cases where the wind value is simply "calm" rather than direction and speed '''
    if wind_str == 'Calm':
        return {
            'speed': 0,
            'direction': None
        }
    else:
        return {
            'speed': wind_str.split()[1],
            'direction': wind_str.split()[0]
        }

In [1]:
def scrape_next36(browser):
    ''' Get the "at a glance" data from the next 36 hours '''
    next_36 = {}
    # Pulling data from the "next 36 hours" overview displays. There are 5 sections
    # that are pulled from. Each section gets a different label for the predicitons
    # for that hour.
    hour_code = {'0':'now',
                '1':'at_start',
                '2':'at_12',
                '3':'at_24',
                '4':'at_36'
                }
    for i in range(5):
        print(f'looking for {hour_code[str(i)]} snapshot')
        try:
            browser.find_by_tag(f'div#daypart-{i}').click()
        except ElementNotInteractableException:
            print('ElementNotInteractableException occured: breaking loop.')
            break
        browser.find_by_tag(f'div#daypart-{i}').click()
        snapshot = browser.find_by_tag(f'div#daypart-{i}')
        snap_values = snapshot.value
        details = browser.find_by_tag('span.wx-detail-value')
        detaillist = [details[j].value for j in range(4)]
        print(f'completed section {i} snapshot')

        #store the data
        next_36.update({hour_code[str(i)]: {'hour': 'CALCULATED HOUR OF PREDICTION',
                                            'condition': snap_values.split('\n')[1],
                                            'high/low': snap_values.split('\n')[2],
                                            'temp_f': snap_values.split('\n')[3],
                                            'chance_precip': snap_values.split('\n')[4],
                                            'description': browser.find_by_id(f'dp{i}-details-narrative').first.value,
                                            'wind': wind_dict(detaillist[0]),
                                            'humidity': detaillist[1],
                                            'uv_index': detaillist[2],
                                            'sun': {'rise': detaillist[3],
                                                    'set': detaillist[3]
                                                   }
                                              }
                      })
    return(next_36)

In [None]:
def goto_hourly(browser):
    ''' Take the browser to the hourly numbers '''
    browser.find_by_text('Hourly').click()
    time.sleep(.5)
    while browser.is_text_present('Next 8 Hours'):
        browser.find_by_text('Next 8 Hours').click()
        time.sleep(.5)
    return()

## Run the functions

In [7]:
browser = chrome()

In [8]:
browser.visit(url)

In [None]:
zip_and_click(code)

1



In [None]:
scrape_now(browser)

In [None]:
scrape_next36(browser)

In [None]:
browser.find_by_text('Hourly').click()

In [53]:
browser.quit()