In [1]:
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select
import csv

In [2]:
URL = 'https://www150.statcan.gc.ca/t1/tbl1/en/tv.action?pid=1710000901'
START_MONTH = '01'
START_YEAR = '1946'
END_MONTH = '10'
END_YEAR = '2018'

In [3]:
options = Options()
options.headless = True

driver = webdriver.Firefox(options=options, executable_path=r'/usr/bin/geckodriver')

driver.get(URL)

In [4]:
# Navigate to the table customization 
button = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, 'downloadButton')))
button.find_element_by_css_selector('a[title="Add/Remove reference period"]').click()  

element = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, "startMonth")))

In [5]:
# Select period
Select(driver.find_element_by_css_selector('#startMonth')).select_by_value(START_MONTH)
Select(driver.find_element_by_css_selector('#startYear')).select_by_value(START_YEAR)
Select(driver.find_element_by_css_selector('#endMonth')).select_by_value(END_MONTH)
Select(driver.find_element_by_css_selector('#endYear')).select_by_value(END_YEAR)

driver.find_element_by_css_selector('#cvApplyButton').click()
element = WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.ID, "cvApplyButton"))) 

In [6]:
# Scraping the header
headerlist = [elem.text for elem in driver.find_elements_by_css_selector('.pub-table thead tr th')]
headerlist[:3]

['Geography', 'Q1 1946', 'Q2 1946']

In [7]:
## Description and Populations
rows = []
for i, tr in enumerate(driver.find_elements_by_css_selector('.pub-table tbody tr')):
    if i == 0:
        continue
        
    title = tr.find_elements_by_css_selector('th')[0].text.replace(' (map)', '')
    vals = []
    for td in tr.find_elements_by_css_selector('td'):
        if td.text == '..':
            vals.append(0)
        else:
            vals.append(int(td.text.replace(',', '')))
    
    rows.append([title] + vals)
    
rows[:2]

[['Canada',
  12188000,
  12241000,
  12316000,
  12393000,
  12450000,
  12507000,
  12576000,
  12646000,
  12710000,
  12773000,
  12852000,
  12930000,
  12998000,
  13399000,
  13475000,
  13548000,
  13607000,
  13663000,
  13737000,
  13807000,
  13870000,
  13937000,
  14050000,
  14163000,
  14277000,
  14376000,
  14496000,
  14598000,
  14682000,
  14763000,
  14886000,
  15001000,
  15105000,
  15199000,
  15330000,
  15444000,
  15535000,
  15620000,
  15736000,
  15834000,
  15919000,
  16004000,
  16123000,
  16235000,
  16352000,
  16479000,
  16677000,
  16810000,
  16907000,
  16997000,
  17120000,
  17225000,
  17318000,
  17406000,
  17522000,
  17624000,
  17710000,
  17793000,
  17909000,
  18009000,
  18092000,
  18172000,
  18271000,
  18363000,
  18442000,
  18519000,
  18614000,
  18708000,
  18787000,
  18864000,
  18964000,
  19061000,
  19142000,
  19222000,
  19325000,
  19420000,
  19501000,
  19578000,
  19678000,
  19777000,
  19857000,
  19939000,
  20

In [8]:
## Final Table. Headers are appended to the table body.
tabledata = [headerlist] + rows  

In [9]:
# Outputting CSV data to the same directory of Jupyter notebook
with open("output.csv", "w") as f:
    writer = csv.writer(f)
    writer.writerows(tabledata)

In [10]:
!cat output.csv | head -n2

Geography,Q1 1946,Q2 1946,Q3 1946,Q4 1946,Q1 1947,Q2 1947,Q3 1947,Q4 1947,Q1 1948,Q2 1948,Q3 1948,Q4 1948,Q1 1949,Q2 1949,Q3 1949,Q4 1949,Q1 1950,Q2 1950,Q3 1950,Q4 1950,Q1 1951,Q2 1951,Q3 1951,Q4 1951,Q1 1952,Q2 1952,Q3 1952,Q4 1952,Q1 1953,Q2 1953,Q3 1953,Q4 1953,Q1 1954,Q2 1954,Q3 1954,Q4 1954,Q1 1955,Q2 1955,Q3 1955,Q4 1955,Q1 1956,Q2 1956,Q3 1956,Q4 1956,Q1 1957,Q2 1957,Q3 1957,Q4 1957,Q1 1958,Q2 1958,Q3 1958,Q4 1958,Q1 1959,Q2 1959,Q3 1959,Q4 1959,Q1 1960,Q2 1960,Q3 1960,Q4 1960,Q1 1961,Q2 1961,Q3 1961,Q4 1961,Q1 1962,Q2 1962,Q3 1962,Q4 1962,Q1 1963,Q2 1963,Q3 1963,Q4 1963,Q1 1964,Q2 1964,Q3 1964,Q4 1964,Q1 1965,Q2 1965,Q3 1965,Q4 1965,Q1 1966,Q2 1966,Q3 1966,Q4 1966,Q1 1967,Q2 1967,Q3 1967,Q4 1967,Q1 1968,Q2 1968,Q3 1968,Q4 1968,Q1 1969,Q2 1969,Q3 1969,Q4 1969,Q1 1970,Q2 1970,Q3 1970,Q4 1970,Q1 1971,Q2 1971,Q3 1971,Q4 1971,Q1 1972,Q2 1972,Q3 1972,Q4 1972,Q1 1973,Q2 1973,Q3 1973,Q4 1973,Q1 1974,Q2 1974,Q3 1974,Q4 1974,Q1 1975,Q2 1975,Q3 1975,Q4 1975,Q1 1976,Q2 1976,Q3 1976,Q4 197