In [1]:
import os
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from selenium.common.exceptions import ElementNotVisibleException
from selenium.webdriver.support.ui import Select

## Historical Daily Temperatures 

In [2]:
# from http://www.marinamele.com/selenium-tutorial-web-scraping-with-selenium-and-python
def init_driver(download_path):
    profile = webdriver.FirefoxProfile();
    profile.set_preference("browser.download.dir", download_path)
    profile.set_preference("browser.download.folderList", 2) # ?
    profile.set_preference("browser.helperApps.neverAsk.saveToDisk", "application/force-download, text/csv")
    driver = webdriver.Firefox(firefox_profile=profile)
    driver.wait = WebDriverWait(driver, 5)
    return driver

def get_stn_id_from_url(url):
    for s in url.split('&'):
        if 'StationID' in s: return s.split('=')[1]
    return None

def get_almanac(driver, search_term, download_path):
    almanac_search_url = ('http://climate.weather.gc.ca/climateData/almanacselection_e.html'
                          '?Month=9&Day=23&Year=2015&timeframe=4&txtStationName=')
    driver.get(almanac_search_url)
    box = driver.wait.until(EC.presence_of_element_located((By.ID, 'stationName')))
    button = driver.wait.until(EC.element_to_be_clickable((By.NAME, "stnSubmit")))
    box.send_keys(search_term)
    button.click()
    
    # if there is a search results screen
    if 'almanac_results' in driver.current_url:
        # there are many stnSubmit buttons on the page. click the first button. 
        first_button = driver.wait.until(EC.element_to_be_clickable((By.NAME, "stnSubmit")))
        first_button.click()
    
    stn_id = get_stn_id_from_url(driver.current_url)
    download_data_button = driver.wait.until(EC.element_to_be_clickable((By.NAME, "submit")))
    # delete old datafile
    if os.path.exists(download_path + 'eng-almanac-0101-1231.csv'):
        os.remove(download_path + 'eng-almanac-0101-1231.csv')
    download_data_button.click()
    time.sleep(5)
    os.rename(download_path + 'eng-almanac-0101-1231.csv', download_path+create_filename(search_term))
    
    
def get_metadata(file_path):
    f = open(file_path, 'r')
    s = f.readlines()
    f.close()
    obj = {}
    n = 0
    while s[n] != '\n':
        prop = s[n].split(',')[0][1:-1]
        val = s[n].split(',')[1][1:-2]
        obj[prop] = val
        n+=1
    return obj

def create_filename(name):
    return ''.join(['_' if c == ' ' else c for c in name]) + '.csv'

In [3]:
almanac_path = "/Users/stephenmcmurtry/work/weather_chart/data/almanac/"
driver = init_driver(almanac_path)
# driver.quit()

In [5]:
list_of_stations = ['TORONTO INTL A', 'OTTAWA INTL A', 'MONTREAL INTL A', 
                    'VANCOUVER INTL A', 'CALGARY INTL A', 'YELLOWKNIFE A',
                    'WHITEHORSE A', 'SASKATOON INTL A', 'REGINA INTL A', 
                    'EDMONTON INTL A', 'PETERBOROUGH', 'HALIFAX INTL A', 
                    'FREDERICTON', 'IQALUIT A', "ST. JOHN'S INTL A", 
                    'VICTORIA INTL A', 'WINNIPEG INTL A', 'CHARLOTTETOWN A']

In [4]:
get_almanac(driver, 'peterborough', almanac_path)

In [31]:
for c in list_of_stations:
    try:
        get_almanac(driver, c, almanac_path)
    except:
        continue

## Historical Monthly Precipitation 

In [57]:
def get_climage_url(stn_id):
    return 'http://climate.weather.gc.ca/climate_normals/results_1981_2010_e.html?stnID=' + str(stn_id) + '&autofwd=1'

In [74]:
def get_precip_data(driver, stn_id, download_path):
    driver.get(get_climage_url(stn_id))
    button = driver.wait.until(EC.element_to_be_clickable((By.LINK_TEXT, "Normals Data")))
    button.click()
    download_button = driver.wait.until(EC.element_to_be_clickable((By.NAME, "submit")))
    download_button.click()
    time.sleep(5)
    for f in os.listdir(precip_path):
        if '.csv' in f and 'normals' in f:
            filename = f
            break
    os.rename(download_path+filename, download_path + str(stn_id) + '.csv')

In [67]:
precip_path = "/Users/stephenmcmurtry/work/weather_chart/data/precipitation/"
driver = init_driver(precip_path)

In [77]:
for f in os.listdir(almanac_path):
    if '.csv' in f:
        stn_id = f.split('.')[0]
        try:
            get_precip_data(driver, stn_id, precip_path)
        except:
            continue

## Last year's daily high and low temperatures and precipitation 

In [113]:
def get_daily_weather_data(driver, stn_name, download_path):
    driver.get('http://climate.weather.gc.ca/')
    daily_button = driver.wait.until(EC.element_to_be_clickable((By.ID, "daily")))
    daily_button.click()
    year_selector = Select(driver.wait.until(EC.presence_of_element_located((By.ID, 'year'))))
    year_selector.select_by_value('2014')
    box = driver.wait.until(EC.presence_of_element_located((By.ID, 'stationName')))
    box.send_keys(stn_name)
    submit_button = driver.wait.until(EC.element_to_be_clickable((By.NAME, "stnSubmit")))
    submit_button.click()
    download_button = driver.wait.until(EC.element_to_be_clickable((By.NAME, "submit")))
    download_button.click()
    stn_id = get_stn_id_from_url(driver.current_url)
    time.sleep(5)
    for f in os.listdir(download_path):
        if '.csv' in f and 'daily' in f:
            filename = f
            break
    os.rename(download_path+filename, download_path + str(stn_id) + '.csv')

In [111]:
daily_weather_path = "/Users/stephenmcmurtry/work/weather_chart/data/daily_weather/"
driver = init_driver(daily_weather_path)

In [114]:
get_daily_weather_data(driver, 'toronto intl a', daily_weather_path)

In [124]:
for f in os.listdir(almanac_path):
    if '.csv' in f:
        alm_file = open(almanac_path + f, 'r')
        first_line = alm_file.readline()
        alm_file.close()
        stn_name = first_line.split('"')[-2]
        try:
            get_daily_weather_data(driver, stn_name, daily_weather_path)
        except:
            continue

In [None]:
http://climate.weather.gc.ca/climateData/almanacselection_e.html?Month=9&Day=23&Year=2015&timeframe=4&txtStationName=

In [14]:
driver.quit()

In [6]:
import test as t

### 

In [7]:
t('abc')

TypeError: 'module' object is not callable

In [8]:
import argparse

parser = argparse.ArgumentParser(description='Process some integers.')
parser.add_argument('integers', metavar='N', type=int, nargs='+',
                   help='an integer for the accumulator')

_StoreAction(option_strings=[], dest='integers', nargs='+', const=None, default=None, type=<type 'int'>, choices=None, help='an integer for the accumulator', metavar='N')

In [None]:
parser.add_argument()