In [1]:
from splinter import Browser
from selenium.common.exceptions import InvalidElementStateException
from selenium.common.exceptions import ElementNotInteractableException
from bs4 import BeautifulSoup
import shutil
import time

In [2]:
# Set the zip codes
codes = [i for i in range(27006, 28909)]
# codes.append('27606')   # just hard code one in
code = codes[0]    # set the variable for zip_and_click
len(codes)

1903

In [3]:
# inititial website to visit
url = 'https://weather.com'

In [4]:
def chrome():
    '''
    Finds the chromedriver in the system and creates a Chrome browser
    '''
    executable_path = {'executable_path': shutil.which('chromedriver')}
    browser = Browser('chrome', **executable_path)
    return(browser)

In [5]:
def zip_and_click(code):
    '''
    Enter zip codes into the search bar on weather.com and click the first result.
    No return, just leaves browser at the first data page.
    '''
    import time
    
    filled = False
    clicked = False
    
    inputs = browser.find_by_tag('input') # get the search box reference
    search_box = inputs[0]
    while not filled:
        try:
            search_box.fill(code)
            filled = True
        except InvalidElementStateException:
            time.sleep(1)
    while not clicked:
        try:
            browser.click_link_by_partial_href('/weather/today/l')
            print('no exception this time')
            clicked = True
        except ElementNotInteractableException:
            print('exception found....waiting...')
            time.sleep(1)
    return(browser)     

In [6]:
def scrape_now(browser):
    ''' Get observed weather data from the current weather page '''
    divs = browser.find_by_tag('div').first
    nowcard = divs.find_by_tag('section.today_nowcard-container').value
    nowlist = nowcard.split('\n')
    return(nowlist)

In [7]:
def wind_dict(wind_str):
    ''' handle the cases where the wind value is simply "calm" rather than direction and speed '''
    if wind_str == 'Calm':
        return {
            'speed': 0,
            'direction': None
        }
    else:
        return {
            'speed': wind_str.split()[1],
            'direction': wind_str.split()[0]
        }

In [8]:
def scrape_next36(browser):
    ''' Get the "at a glance" data from the next 36 hours '''

    next_36 = {}
    
    # Pulling data from the "next 36 hours" overview displays. There are 5 sections
    # that are pulled from. Each section gets a different label.
    hour_code = {'0':'now',
                '1':'at_start',
                '2':'at_12',
                '3':'at_24',
                '4':'at_36'
                }
    for i in range(5):
        print(f'looking for {hour_code[str(i)]} snapshot')
        browser.find_by_tag(f'div#daypart-{i}').click()
        snapshot = browser.find_by_tag(f'div#daypart-{i}')
        snap_values = snapshot.value
        details = browser.find_by_tag('span.wx-detail-value')
        detaillist = [details[j].value for j in range(4)]
        print(f'completed section {i} snapshot')

        #store the data
        next_36.update({hour_code[str(i)]: {'hour': 'CALCULATED HOUR OF PREDICTION',
                                            'condition': snap_values.split('\n')[1],
                                            'high/low': snap_values.split('\n')[2],
                                            'temp_f': snap_values.split('\n')[3],
                                            'chance_precip': snap_values.split('\n')[4],
                                            'description': browser.find_by_id(f'dp{i}-details-narrative').first.value,
                                            'wind': wind_dict(detaillist[0]),
#                                             {'speed_mph': detaillist[0].split(' ')[1],
#                                                            'direction': detaillist[0].split(' ')[0]
#                                                    },
                                            'humidity': detaillist[1],
                                            'uv_index': detaillist[2],
                                            'sun': {'rise': detaillist[3],
                                                    'set': detaillist[3]
                                                   }
                                              }
                      })
    return(next_36)

In [9]:
browser = chrome()

In [10]:
browser.visit(url)

In [11]:
zip_and_click(code)

exception found....waiting...
no exception this time


<splinter.driver.webdriver.chrome.WebDriver at 0x108e34750>

In [12]:
now = scrape_now(browser)

In [13]:
now

['ADVANCE, NC',
 'as of 5:30 pm EST',
 '35°',
 'CLEAR',
 'feels like 26°',
 'H -- L 20°',
 'UV Index 0 of 10',
 'Forecast: Record Low Temps Ahead',
 'RIGHT NOW',
 'Wind',
 'NNW 11 mph',
 'Humidity',
 '49%',
 'Dew Point',
 '17°',
 'Pressure',
 '30.17 in',
 'Visibility',
 '10.0 mi']

In [14]:
next_36 = scrape_next36(browser)

looking for now snapshot
completed section 0 snapshot
looking for at_start snapshot
completed section 1 snapshot
looking for at_12 snapshot
completed section 2 snapshot
looking for at_24 snapshot
completed section 3 snapshot
looking for at_36 snapshot
completed section 4 snapshot


In [15]:
from pprint import pprint
pprint(next_36)
next_36

{'at_12': {'chance_precip': '0%',
           'condition': 'MOSTLY CLEAR',
           'description': 'Clear to partly cloudy. Low near 20F. Winds light '
                          'and variable.',
           'high/low': 'LOW',
           'hour': 'CALCULATED HOUR OF PREDICTION',
           'humidity': '80%',
           'sun': {'rise': '6:56 am5:15 pm', 'set': '6:56 am5:15 pm'},
           'temp_f': '20°',
           'uv_index': '0 of 10',
           'wind': {'direction': 'E', 'speed': '1'}},
 'at_24': {'chance_precip': '20%',
           'condition': 'CLOUDY',
           'description': 'Cloudy. Slight chance of a rain shower. High 43F. '
                          'Winds light and variable.',
           'high/low': 'HIGH',
           'hour': 'CALCULATED HOUR OF PREDICTION',
           'humidity': '60%',
           'sun': {'rise': '6:57 am5:14 pm', 'set': '6:57 am5:14 pm'},
           'temp_f': '43°',
           'uv_index': '2 of 10',
           'wind': {'direction': 'NNE', 'speed': '2'}},


{'now': {'hour': 'CALCULATED HOUR OF PREDICTION',
  'condition': 'CLEAR',
  'high/low': 'LOW',
  'temp_f': '20°',
  'chance_precip': '0%',
  'description': 'Clear skies. Low near 20F. Winds N at 10 to 15 mph.',
  'wind': {'speed': '11', 'direction': 'N'},
  'humidity': '57%',
  'uv_index': '0 of 10',
  'sun': {'rise': '6:55 am5:16 pm', 'set': '6:55 am5:16 pm'}},
 'at_start': {'hour': 'CALCULATED HOUR OF PREDICTION',
  'condition': 'SUNNY',
  'high/low': 'HIGH',
  'temp_f': '42°',
  'chance_precip': '0%',
  'description': 'Mainly sunny. High 42F. Winds ENE at 5 to 10 mph.',
  'wind': {'speed': '6', 'direction': 'ENE'},
  'humidity': '43%',
  'uv_index': '3 of 10',
  'sun': {'rise': '6:56 am5:15 pm', 'set': '6:56 am5:15 pm'}},
 'at_12': {'hour': 'CALCULATED HOUR OF PREDICTION',
  'condition': 'MOSTLY CLEAR',
  'high/low': 'LOW',
  'temp_f': '20°',
  'chance_precip': '0%',
  'description': 'Clear to partly cloudy. Low near 20F. Winds light and variable.',
  'wind': {'speed': '1', 'directi

In [16]:
def browser.find_by_text('Hourly').click()

In [20]:
browser.find_by_text('Next 8 Hours').click()

In [31]:
next_36 = {}
next_36['now'] = {}
next_36['at_start'] = {}
next_36['at_12'] = {}
next_36['at_24'] = {}
next_36['at_36'] = {}
next_36['now']['wind'] = {}
next_36['at_start']['wind'] = {}
next_36['at_12']['wind'] = {}
next_36['at_24']['wind'] = {}
next_36['at_36']['wind'] = {}
next_36['now']['sun'] = {}
next_36['at_start']['sun'] = {}
next_36['at_12']['sun'] = {}
next_36['at_24']['sun'] = {}
next_36['at_36']['sun'] = {}

# Pulling data from the "next 36 hours" overview displays. There are 5 sections
# that are pulled from. Each section gets a different label.
for i in range(5):
    print(f'looking for section {i} snapshot')
    browser.find_by_tag(f'div#daypart-{i}').click()
#     print('clicked first')
    snapshot = browser.find_by_tag(f'div#daypart-{i}')
#     print('found snapshot')
    snap_values = snapshot.value
#     print('stored snap values:', snap_values)
    details = browser.find_by_tag('span.wx-detail-value')
#     print('found details')
    detaillist = [details[j].value for j in range(4)]
#     print('created the details list: ', detaillist)
    print(f'completed section {i} snapshot')
    
    # getting the values labeled as is appropriate
    print(f'looking for section {i} details')
    if i==0:
        next_36['now']['hour'] = 'now'
#         print('entered "hour"')
        next_36['now']['condition'] = snap_values.split('\n')[1]
#         print('entered "conditions"')
        next_36['now']['high/low'] = snap_values.split('\n')[2]
#         print('entered "high/low"')
        next_36['now']['temp_f'] = snap_values.split('\n')[3]
#         print('entered "temp"')
        next_36['now']['chance_precip'] = snap_values.split('\n')[4]
#         print('entered "chance precip"')
        next_36['now']['description'] = browser.find_by_id(f'dp{i}-details-narrative').value
#         print('entered "desctiption"')
        next_36['now']['wind']['speed_mph'] = detaillist[0].split(' ')[0]
#         print('entered "windspeed"')
        next_36['now']['wind']['direction'] = detaillist[0].split(' ')[1]
#         print('entered "winddir"')
        next_36['now']['humidity'] = detaillist[1]
#         print('entered "hum"')
        next_36['now']['uv_index'] = detaillist[2]
#         print('entered "uv_index"')
        next_36['now']['sun']['rise'] = detaillist[3]
#         print('entered "sunrise"')
        next_36['now']['sun']['set'] = detaillist[3]
#         print('entered "sunset"')
        print(f'section {i} complete')
    elif i==1:
        next_36['at_start']['hour'] = 'at_start'
        next_36['at_start']['condition'] = snap_values.split('\n')[1]
        next_36['at_start']['high/low'] = snap_values.split('\n')[2]
        next_36['at_start']['temp_f'] = snap_values.split('\n')[3]
        next_36['at_start']['chance_precip'] = snap_values.split('\n')[4]
        next_36['now']['description'] = browser.find_by_id(f'dp{i}-details-narrative').first.value
        next_36['at_start']['wind']['speed_mph'] = detaillist[0].split(' ')[0]
        next_36['at_start']['wind']['direction'] = detaillist[0].split(' ')[1]
        next_36['at_start']['humidity'] = detaillist[1]
        next_36['at_start']['uv_index'] = detaillist[2]
        next_36['at_start']['sun']['rise'] = detaillist[3]
        next_36['at_start']['sun']['set'] = detaillist[3]
        print(f'section {i} complete')
    elif i==2:
        next_36['at_12']['hour'] = 'at_12'
        next_36['at_12']['condition'] = snap_values.split('\n')[1]
        next_36['at_12']['high/low'] = snap_values.split('\n')[2]
        next_36['at_12']['temp_f'] = snap_values.split('\n')[3]
        next_36['at_12']['chance_precip'] = snap_values.split('\n')[4]
        next_36['now']['description'] = browser.find_by_id(f'dp{i}-details-narrative').first.value
        next_36['at_12']['wind']['speed_mph'] = detaillist[0].split(' ')[0]
        next_36['at_12']['wind']['direction'] = detaillist[0].split(' ')[1]
        next_36['at_12']['humidity'] = detaillist[1]
        next_36['at_12']['uv_index'] = detaillist[2]
        next_36['at_12']['sun']['rise'] = detaillist[3]
        next_36['at_12']['sun']['set'] = detaillist[3]
        print(f'section {i} complete')
    elif i==3:
        next_36['at_24']['hour'] = 'at_24'
        next_36['at_24']['condition'] = snap_values.split('\n')[1]
        next_36['at_24']['high/low'] = snap_values.split('\n')[2]
        next_36['at_24']['temp_f'] = snap_values.split('\n')[3]
        next_36['at_24']['chance_precip'] = snap_values.split('\n')[4]
        next_36['now']['description'] = browser.find_by_id(f'dp{i}-details-narrative').first.value
        next_36['at_24']['wind']['speed_mph'] = detaillist[0].split(' ')[0]
        next_36['at_24']['wind']['direction'] = detaillist[0].split(' ')[1]
        next_36['at_24']['humidity'] = detaillist[1]
        next_36['at_24']['uv_index'] = detaillist[2]
        next_36['at_24']['sun']['rise'] = detaillist[3]
        next_36['at_24']['sun']['set'] = detaillist[3]
        print(f'section {i} complete')
    elif i ==4:
        next_36['at_36']['hour'] = 'at_36'
        next_36['at_36']['condition'] = snap_values.split('\n')[1]
        next_36['at_36']['high/low'] = snap_values.split('\n')[2]
        next_36['at_36']['temp_f'] = snap_values.split('\n')[3]
        next_36['at_36']['chance_precip'] = snap_values.split('\n')[4]
        next_36['now']['description'] = browser.find_by_id(f'dp{i}-details-narrative').first.value
        next_36['at_36']['wind']['speed_mph'] = detaillist[0].split(' ')[0]
        next_36['at_36']['wind']['direction'] = detaillist[0].split(' ')[1]
        next_36['at_36']['humidity'] = detaillist[1]
        next_36['at_36']['uv_index'] = detaillist[2]
        next_36['at_36']['sun']['rise'] = detaillist[3]
        next_36['at_36']['sun']['set'] = detaillist[3]
        print(f'section {i} complete')
    else:
        print('Everytihng checked and nothing done.')
        break


looking for section 0 snapshot
completed section 0 snapshot
looking for section 0 details
section 0 complete
looking for section 1 snapshot
completed section 1 snapshot
looking for section 1 details
section 1 complete
looking for section 2 snapshot
completed section 2 snapshot
looking for section 2 details
section 2 complete
looking for section 3 snapshot
completed section 3 snapshot
looking for section 3 details
section 3 complete
looking for section 4 snapshot
completed section 4 snapshot
looking for section 4 details
section 4 complete


In [20]:
browser.find_by_tag('div#daypart-0').click()
snapshot = browser.find_by_tag('div#daypart-0').value
snaplist = snapshot.split('\n')
details = browser.find_by_tag('span.wx-detail-value')
detailslist = [details[i].value for i in range(4)]
print(snaplist)
detailslist

['TODAY', 'SUNNY', 'HIGH', '51°', '0%']


['ESE 5 mph', '46%', '4 of 10', '6:44 am5:12 pm']

In [49]:
print(snaplist)

['TONIGHT', 'RAIN', 'LOW', '42°', '90%']


In [43]:
details = exec("browser.find_by_tag('span#dp0-details-wind')")
# dp_details = {}
# for i in range(4):
#     dp_details[exec("browser.find_by_tag('span#dp{i}-details-wind'.format(i))").value]
print(details)

None


In [35]:
details = browser.find_by_tag('span.wx-detail-value')
print(details)

<splinter.element_list.ElementList object at 0x104ad09d0>


In [36]:
for i in range(4):
    print(details[i].value)

NNW 10 mph
86%
0 of 10
6:43 am5:13 pm


In [12]:
# browser.find_by_tag('div.today-daypart-content').first.value

'TODAY\nHIGH\n70°\n10%'

In [26]:
description = browser.find_by_id('dp1-details-narrative').first.value
description

'A clear sky. Scattered frost possible. Low around 30F. Winds light and variable.'

In [32]:
browser.quit()

1

'Today'

got input box


1

In [25]:
hour_code = {'0':'now',
            '1':'at_start',
            '2':'at_12',
            '3':'at_24',
            '4':'at_36'
}
print(hour_code['0'])
for i in range(5):
    print(hour_code[str(i)])
    print(f'looking for {hour_code[str(i)]} snapshot')

now
now
looking for now snapshot
at_start
looking for at_start snapshot
at_12
looking for at_12 snapshot
at_24
looking for at_24 snapshot
at_36
looking for at_36 snapshot


In [None]:
### SEARCH FOR THE TABLE THAT HOLDS THE SEARCH RESULTS ###