In [28]:
from splinter import Browser
from selenium.common.exceptions import InvalidElementStateException
from selenium.common.exceptions import ElementNotInteractableException
from bs4 import BeautifulSoup
import pandas as pd
import shutil
import time

In [30]:
# Set the zip codes
codes = [i for i in range(27006, 28909)]
# codes.append('27606')   # just hard code one in
code = codes[0]    # set the variable for zip_and_click
len(codes)

1903

In [31]:
# inititial website to visit
url = 'https://weather.com'

In [32]:
def chrome():
    '''
    Finds the chromedriver in the system and creates a Chrome browser
    '''
    executable_path = {'executable_path': shutil.which('chromedriver')}
    browser = Browser('chrome', **executable_path)
    return(browser)

In [33]:
def zip_and_click(code):
    '''
    Enter zip codes into the search bar on weather.com and click the first result.
    No return, just leaves browser at the first data page.
    '''
    import time
    
    filled = False
    clicked = False
    
    inputs = browser.find_by_tag('input') # get the search box reference
    search_box = inputs[0]
    while not filled:
        try:
            search_box.fill(code)
            filled = True
        except InvalidElementStateException:
            time.sleep(1)
    while not clicked:
        try:
            browser.click_link_by_partial_href('/weather/today/l')
            print('no exception this time')
            clicked = True
        except ElementNotInteractableException:
            print('exception found....waiting...')
            time.sleep(1)
    return(browser)     

In [34]:
def scrape_now(browser):
    ''' Get observed weather data from the current weather page '''
    divs = browser.find_by_tag('div').first
    nowcard = divs.find_by_tag('section.today_nowcard-container').value
    nowlist = nowcard.split('\n')
    return(nowlist)

In [35]:
def wind_dict(wind_str):
    ''' handle the cases where the wind value is simply "calm" rather than direction and speed '''
    if wind_str == 'Calm':
        return {
            'speed': 0,
            'direction': None
        }
    else:
        return {
            'speed': wind_str.split()[1],
            'direction': wind_str.split()[0]
        }

In [36]:
def scrape_next36(browser):
    ''' Get the "at a glance" data from the next 36 hours '''

    next_36 = {}
    
    # Pulling data from the "next 36 hours" overview displays. There are 5 sections
    # that are pulled from. Each section gets a different label.
    hour_code = {'0':'now',
                '1':'at_start',
                '2':'at_12',
                '3':'at_24',
                '4':'at_36'
                }
    for i in range(5):
        print(f'looking for {hour_code[str(i)]} snapshot')
        browser.find_by_tag(f'div#daypart-{i}').click()
        snapshot = browser.find_by_tag(f'div#daypart-{i}')
        snap_values = snapshot.value
        details = browser.find_by_tag('span.wx-detail-value')
        detaillist = [details[j].value for j in range(4)]
        print(f'completed section {i} snapshot')

        #store the data
        next_36.update({hour_code[str(i)]: {'hour': 'CALCULATED HOUR OF PREDICTION',
                                            'condition': snap_values.split('\n')[1],
                                            'high/low': snap_values.split('\n')[2],
                                            'temp_f': snap_values.split('\n')[3],
                                            'chance_precip': snap_values.split('\n')[4],
                                            'description': browser.find_by_id(f'dp{i}-details-narrative').first.value,
                                            'wind': wind_dict(detaillist[0]),
#                                             {'speed_mph': detaillist[0].split(' ')[1],
#                                                            'direction': detaillist[0].split(' ')[0]
#                                                    },
                                            'humidity': detaillist[1],
                                            'uv_index': detaillist[2],
                                            'sun': {'rise': detaillist[3],
                                                    'set': detaillist[3]
                                                   }
                                              }
                      })
    return(next_36)

In [37]:
def goto_hourly(browser):
    ''' Take the browser to the hourly numbers '''
    browser.find_by_text('Hourly').click()
    time.sleep(.5)
    while browser.is_text_present('Next 8 Hours'):
        browser.find_by_text('Next 8 Hours').click()
        time.sleep(.5)
    return()

In [39]:
browser = chrome()

In [40]:
browser.visit(url)

In [None]:
zip_and_click(code)

In [13]:
now = scrape_now(browser)

In [14]:
now

['ADVANCE, NC',
 'as of 9:18 pm EST',
 '29°',
 'CLEAR',
 'feels like 21°',
 'H -- L 20°',
 'UV Index 0 of 10',
 'Forecast: Record Low Temps Ahead',
 'RIGHT NOW',
 'Wind',
 'N 8 mph',
 'Humidity',
 '50%',
 'Dew Point',
 '13°',
 'Pressure',
 '30.30 in',
 'Visibility',
 '10.0 mi']

In [15]:
next_36 = scrape_next36(browser)

looking for now snapshot
completed section 0 snapshot
looking for at_start snapshot
completed section 1 snapshot
looking for at_12 snapshot
completed section 2 snapshot
looking for at_24 snapshot
completed section 3 snapshot
looking for at_36 snapshot
completed section 4 snapshot


In [16]:
from pprint import pprint
pprint(next_36)
next_36

{'at_12': {'chance_precip': '0%',
           'condition': 'MOSTLY CLEAR',
           'description': 'Clear to partly cloudy. Low near 20F. Winds light '
                          'and variable.',
           'high/low': 'LOW',
           'hour': 'CALCULATED HOUR OF PREDICTION',
           'humidity': '80%',
           'sun': {'rise': '6:56 am5:15 pm', 'set': '6:56 am5:15 pm'},
           'temp_f': '20°',
           'uv_index': '0 of 10',
           'wind': {'direction': 'E', 'speed': '1'}},
 'at_24': {'chance_precip': '20%',
           'condition': 'CLOUDY',
           'description': 'Cloudy. Slight chance of a rain shower. High 43F. '
                          'Winds light and variable.',
           'high/low': 'HIGH',
           'hour': 'CALCULATED HOUR OF PREDICTION',
           'humidity': '60%',
           'sun': {'rise': '6:57 am5:14 pm', 'set': '6:57 am5:14 pm'},
           'temp_f': '43°',
           'uv_index': '2 of 10',
           'wind': {'direction': 'NE', 'speed': '2'}},
 

{'now': {'hour': 'CALCULATED HOUR OF PREDICTION',
  'condition': 'CLEAR',
  'high/low': 'LOW',
  'temp_f': '20°',
  'chance_precip': '0%',
  'description': 'Clear skies. Low near 20F. Winds N at 5 to 10 mph.',
  'wind': {'speed': '9', 'direction': 'N'},
  'humidity': '57%',
  'uv_index': '0 of 10',
  'sun': {'rise': '6:55 am5:16 pm', 'set': '6:55 am5:16 pm'}},
 'at_start': {'hour': 'CALCULATED HOUR OF PREDICTION',
  'condition': 'SUNNY',
  'high/low': 'HIGH',
  'temp_f': '42°',
  'chance_precip': '0%',
  'description': 'A mainly sunny sky. High 42F. Winds ENE at 5 to 10 mph.',
  'wind': {'speed': '6', 'direction': 'ENE'},
  'humidity': '43%',
  'uv_index': '3 of 10',
  'sun': {'rise': '6:56 am5:15 pm', 'set': '6:56 am5:15 pm'}},
 'at_12': {'hour': 'CALCULATED HOUR OF PREDICTION',
  'condition': 'MOSTLY CLEAR',
  'high/low': 'LOW',
  'temp_f': '20°',
  'chance_precip': '0%',
  'description': 'Clear to partly cloudy. Low near 20F. Winds light and variable.',
  'wind': {'speed': '1', 'dir

In [17]:
goto_hourly(browser)

()

In [None]:
def scrape_hourly(browser):
    

In [37]:
url = browser.url
response = requests.get(url)
soup = BeautifulSoup(response.text, 'lxml')
soup.find_all()

https://weather.com/weather/hourbyhour/l/15d7cae6a3bd8dea6586e2ea77ef72be81e7593b073dcd3ed31cf719e686f364


In [18]:
url = browser.url
print(url)
tables = pd.read_html(url)
tables

https://weather.com/weather/hourbyhour/l/15d7cae6a3bd8dea6586e2ea77ef72be81e7593b073dcd3ed31cf719e686f364


ImportError: html5lib not found, please install it

In [20]:
browser.find_by_tag('div#daypart-0').click()
snapshot = browser.find_by_tag('div#daypart-0').value
snaplist = snapshot.split('\n')
details = browser.find_by_tag('span.wx-detail-value')
detailslist = [details[i].value for i in range(4)]
print(snaplist)
detailslist

['TODAY', 'SUNNY', 'HIGH', '51°', '0%']


['ESE 5 mph', '46%', '4 of 10', '6:44 am5:12 pm']

In [49]:
print(snaplist)

['TONIGHT', 'RAIN', 'LOW', '42°', '90%']


In [43]:
details = exec("browser.find_by_tag('span#dp0-details-wind')")
# dp_details = {}
# for i in range(4):
#     dp_details[exec("browser.find_by_tag('span#dp{i}-details-wind'.format(i))").value]
print(details)

None


In [35]:
details = browser.find_by_tag('span.wx-detail-value')
print(details)

<splinter.element_list.ElementList object at 0x104ad09d0>


In [36]:
for i in range(4):
    print(details[i].value)

NNW 10 mph
86%
0 of 10
6:43 am5:13 pm


In [12]:
# browser.find_by_tag('div.today-daypart-content').first.value

'TODAY\nHIGH\n70°\n10%'

In [26]:
description = browser.find_by_id('dp1-details-narrative').first.value
description

'A clear sky. Scattered frost possible. Low around 30F. Winds light and variable.'

In [38]:
browser.quit()

1

'Today'

got input box


1

In [25]:
hour_code = {'0':'now',
            '1':'at_start',
            '2':'at_12',
            '3':'at_24',
            '4':'at_36'
}
print(hour_code['0'])
for i in range(5):
    print(hour_code[str(i)])
    print(f'looking for {hour_code[str(i)]} snapshot')

now
now
looking for now snapshot
at_start
looking for at_start snapshot
at_12
looking for at_12 snapshot
at_24
looking for at_24 snapshot
at_36
looking for at_36 snapshot


In [None]:
### SEARCH FOR THE TABLE THAT HOLDS THE SEARCH RESULTS ###