In [40]:
from splinter import Browser
from selenium.common.exceptions import InvalidElementStateException
from selenium.common.exceptions import ElementNotInteractableException
from bs4 import BeautifulSoup
import pandas as pd
import html5lib
import lxml
import shutil
import time
import urllib3
import requests

In [41]:
# Set the zip codes
codes = [i for i in range(27006, 28909)]
# codes.append('27606')   # just hard code one in
code = codes[0]    # set the variable for zip_and_click
len(codes)

1903

In [42]:
# inititial website to visit
url = 'https://weather.com'

In [43]:
def chrome():
    '''
    Finds the chromedriver in the system and creates a Chrome browser
    '''
    executable_path = {'executable_path': shutil.which('chromedriver')}
    browser = Browser('chrome', **executable_path)
    return(browser)

In [44]:
def zip_and_click(code):
    '''
    Enter zip codes into the search bar on weather.com and click the first result.
    No return, just leaves browser at the first data page.
    '''
    import time
    
    filled = False
    clicked = False
    
    inputs = browser.find_by_tag('input') # get the search box reference
    search_box = inputs[0]
    while not filled:
        try:
            search_box.fill(code)
            filled = True
        except InvalidElementStateException:
            time.sleep(1)
    while not clicked:
        try:
            browser.click_link_by_partial_href('/weather/today/l')
            print('no exception this time')
            clicked = True
        except ElementNotInteractableException:
            print('exception found....waiting...')
            time.sleep(1)
    return(browser)     

In [45]:
def scrape_now(browser):
    ''' Get observed weather data from the current weather page '''
    divs = browser.find_by_tag('div').first
    nowcard = divs.find_by_tag('section.today_nowcard-container').value
    nowlist = nowcard.split('\n')
    return(nowlist)

In [46]:
def wind_dict(wind_str):
    ''' handle the cases where the wind value is simply "calm" rather than direction and speed '''
    if wind_str == 'Calm':
        return {
            'speed': 0,
            'direction': None
        }
    else:
        return {
            'speed': wind_str.split()[1],
            'direction': wind_str.split()[0]
        }

In [47]:
def scrape_next36(browser):
    ''' Get the "at a glance" data from the next 36 hours '''

    next_36 = {}
    
    # Pulling data from the "next 36 hours" overview displays. There are 5 sections
    # that are pulled from. Each section gets a different label.
    hour_code = {'0':'now',
                '1':'at_start',
                '2':'at_12',
                '3':'at_24',
                '4':'at_36'
                }
    for i in range(5):
        print(f'looking for {hour_code[str(i)]} snapshot')
        browser.find_by_tag(f'div#daypart-{i}').click()
        snapshot = browser.find_by_tag(f'div#daypart-{i}')
        snap_values = snapshot.value
        details = browser.find_by_tag('span.wx-detail-value')
        detaillist = [details[j].value for j in range(4)]
        print(f'completed section {i} snapshot')

        #store the data
        next_36.update({hour_code[str(i)]: {'hour': 'CALCULATED HOUR OF PREDICTION',
                                            'condition': snap_values.split('\n')[1],
                                            'high/low': snap_values.split('\n')[2],
                                            'temp_f': snap_values.split('\n')[3],
                                            'chance_precip': snap_values.split('\n')[4],
                                            'description': browser.find_by_id(f'dp{i}-details-narrative').first.value,
                                            'wind': wind_dict(detaillist[0]),
#                                             {'speed_mph': detaillist[0].split(' ')[1],
#                                                            'direction': detaillist[0].split(' ')[0]
#                                                    },
                                            'humidity': detaillist[1],
                                            'uv_index': detaillist[2],
                                            'sun': {'rise': detaillist[3],
                                                    'set': detaillist[3]
                                                   }
                                              }
                      })
    return(next_36)

In [48]:
def goto_hourly(browser):
    ''' Take the browser to the hourly numbers '''
    browser.find_by_text('Hourly').click()
    time.sleep(1)
    n=0
    while browser.is_text_present('Next 8 Hours', wait_time=.5):
        n+=1
        print(n)
        next8 = browser.find_by_text('Next 8 Hours')
        print('got next 8 hour spotted')
        next8.click()
        print('clicked and waiting')
        time.sleep(.5)
        print(f'end n={n} th pass')
    return

In [49]:
shutil.which('chromedriver')

'/Users/mastacow/data/forcast-forcast/env/bin/chromedriver'

In [50]:
browser = chrome()

In [20]:
browser.quit()

In [51]:
browser.visit(url)

In [52]:
zip_and_click(code)

exception found....waiting...
no exception this time


<splinter.driver.webdriver.chrome.WebDriver at 0x11c602160>

In [53]:
now = scrape_now(browser)

In [55]:
now

['ADVANCE, NC',
 'as of 11:27 pm EST',
 '43°',
 'CLEAR',
 'feels like 41°',
 'H -- L 35°',
 'UV Index 0 of 10',
 'Holiday Week Travel Forecast',
 'RIGHT NOW',
 'Wind',
 'NNW 4 mph',
 'Humidity',
 '57%',
 'Dew Point',
 '29°',
 'Pressure',
 '30.31 in',
 'Visibility',
 '10.0 mi']

In [56]:
next_36 = scrape_next36(browser)

looking for now snapshot
completed section 0 snapshot
looking for at_start snapshot
completed section 1 snapshot
looking for at_12 snapshot
completed section 2 snapshot
looking for at_24 snapshot
completed section 3 snapshot
looking for at_36 snapshot
completed section 4 snapshot


In [57]:
from pprint import pprint
pprint(next_36['now'])
# next_36

{'chance_precip': '0%',
 'condition': 'MOSTLY CLEAR',
 'description': 'Clear to partly cloudy. Low near 35F. Winds light and '
                'variable.',
 'high/low': 'LOW',
 'hour': 'CALCULATED HOUR OF PREDICTION',
 'humidity': '68%',
 'sun': {'rise': '7:10 am5:08 pm', 'set': '7:10 am5:08 pm'},
 'temp_f': '35°',
 'uv_index': '0 of 10',
 'wind': {'direction': 'NNE', 'speed': '4'}}


In [58]:
goto_hourly(browser)

1
got next 8 hour spotted
clicked and waiting
end n=1 th pass
2
got next 8 hour spotted
clicked and waiting
end n=2 th pass
3
got next 8 hour spotted
clicked and waiting
end n=3 th pass
4
got next 8 hour spotted
clicked and waiting
end n=4 th pass


In [59]:
# get the weather forcast table
url = browser.url
r = requests.get(url)
dfs = pd.read_html(r.text)
dfs[0]

Unnamed: 0,Time,Description,Temp,Feels,Precip,Humidity,Wind,Unnamed: 7
0,,11:45 pmThu,Clear,42°,40°,0%,59%,N 4 mph
1,,12:00 amFri,Clear,42°,40°,0%,59%,N 4 mph
2,,1:00 amFri,Clear,41°,39°,0%,61%,NNE 3 mph
3,,2:00 amFri,Clear,41°,41°,0%,63%,NNE 2 mph
4,,3:00 am Fri,Mostly Clear,39°,39°,0%,70%,NNE 1 mph
5,,4:00 am Fri,Mostly Clear,38°,38°,0%,72%,Calm
6,,5:00 am Fri,Mostly Clear,37°,37°,0%,77%,Calm
7,,6:00 am Fri,Mostly Clear,36°,36°,5%,80%,Calm
8,,7:00 am Fri,Mostly Clear,36°,36°,5%,79%,Calm
9,,8:00 am Fri,Mostly Sunny,37°,37°,5%,78%,Calm


In [36]:
browser.back()

In [35]:
len(dfs)

1

In [19]:
# A line that will click out of an advertisement
browser.find_by_tag('a#bx-close-inside-970188')[0].click()

# snapshot = browser.find_by_tag('div#daypart-0').value
# snaplist = snapshot.split('\n')
# details = browser.find_by_tag('span.wx-detail-value')
# detailslist = [details[i].value for i in range(4)]
# print(snaplist)
# detailslist

In [49]:
print(snaplist)

['TONIGHT', 'RAIN', 'LOW', '42°', '90%']


In [43]:
details = exec("browser.find_by_tag('span#dp0-details-wind')")
# dp_details = {}
# for i in range(4):
#     dp_details[exec("browser.find_by_tag('span#dp{i}-details-wind'.format(i))").value]
print(details)

None


In [35]:
details = browser.find_by_tag('span.wx-detail-value')
print(details)

<splinter.element_list.ElementList object at 0x104ad09d0>


In [36]:
for i in range(4):
    print(details[i].value)

NNW 10 mph
86%
0 of 10
6:43 am5:13 pm


In [12]:
# browser.find_by_tag('div.today-daypart-content').first.value

'TODAY\nHIGH\n70°\n10%'

In [26]:
description = browser.find_by_id('dp1-details-narrative').first.value
description

'A clear sky. Scattered frost possible. Low around 30F. Winds light and variable.'

In [40]:
browser.quit()

1

'Today'

got input box


1

In [25]:
hour_code = {'0':'now',
            '1':'at_start',
            '2':'at_12',
            '3':'at_24',
            '4':'at_36'
}
print(hour_code['0'])
for i in range(5):
    print(hour_code[str(i)])
    print(f'looking for {hour_code[str(i)]} snapshot')

now
now
looking for now snapshot
at_start
looking for at_start snapshot
at_12
looking for at_12 snapshot
at_24
looking for at_24 snapshot
at_36
looking for at_36 snapshot


In [None]:
### SEARCH FOR THE TABLE THAT HOLDS THE SEARCH RESULTS ###