<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Imports" data-toc-modified-id="Imports-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Imports</a></span></li><li><span><a href="#Helper-functions" data-toc-modified-id="Helper-functions-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Helper functions</a></span></li><li><span><a href="#Extract-wheater-data" data-toc-modified-id="Extract-wheater-data-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Extract wheater data</a></span></li><li><span><a href="#Export-to-.csv" data-toc-modified-id="Export-to-.csv-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Export to .csv</a></span></li></ul></div>

## Imports

In [14]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select
import pandas as pd
from datetime import datetime, timedelta
import time
import calendar

## Helper functions

In [46]:
def extract_content(table, list_day, date):
    '''
    This method extracts the weather data for a speficfic date and save it in a list
    
    param table:   table which contains the wheater data
    '''
    
    # get columns of the table
    thead = table.find_element(By.XPATH, "./thead")
    # get the column name
    names = thead.find_elements(By.TAG_NAME, "th")
    #get the body of the table
    tbody = table.find_element(By.XPATH, "./tbody")
    # Find all rows in the table
    rows = tbody.find_elements(By.TAG_NAME, "tr")

    # Iterate over each row
    for row in rows:
        
        # get all cells of the row
        cells = row.find_elements(By.TAG_NAME, "td")
        # create dictionary to save the content of the row
        dict_hour = {
             'date' : date   
        }
        # Iterate over each cell in the row
        for i in range(0, len(cells)):
            
            # Save content of the cell in the dictionary
            dict_hour[names[i].text] = cells[i].text
            
        # add row to the list    
        list_day.append(dict_hour)


def create_date_list():
    '''
    This method creates a list of dates 
    
    returns:  list of dates 
    '''
    # create a startdate
    start = datetime(2018, 1, 1)
    # create the enddate
    end = datetime(2018, 12, 31)
    # create timedelta to increase days
    next_day = timedelta(days=1)
    list_dates=[]
    
    while start <= end:
        
        # add date to list
        list_dates.append(start)
        # increase date by one day
        start = start + next_day
    
    return list_dates
        
    
def change_date(date):
    '''
    This method takes a date and submits the date on the website to get new
    wheater data
    
    param date:  the date to be displayed
    '''
    
    print(date)
    
    # find the year dropdown
    input_year = driver.find_element(By.ID, 'yearSelection')
    # select the new year
    dropdown_year = Select(input_year)
    dropdown_year.select_by_visible_text(str(date.year))
    
    # find the month dropdown
    input_month = driver.find_element(By.ID, 'monthSelection')
    # select the new month
    dropdown_month = Select(input_month)
    dropdown_month.select_by_visible_text(calendar.month_name[date.month])
    
    # find the day dropdown
    input_day = driver.find_element(By.ID, 'daySelection')
    # select the new day
    dropdown_day = Select(input_day)
    dropdown_day.select_by_visible_text(str(date.day))
    
    # Submit the new date
    button = driver.find_element(By.ID, 'dateSubmit')
    button.click()
    

## Extract wheater data

In [35]:
# set up the ChromeDriver
# insert the path to your chromedriver executable -> need to be compatible to your chrome installation
# for further information inspect the readme file
service = Service(r'C:\Users\lukas\ChromeDriver\chromedriver.exe') 
driver = webdriver.Chrome(service= service)

# load the webpage
driver.get('https://www.wunderground.com/history/daily/us/il/chicago/KMDW/date/2018-5-17')

# wait some seconds for the content to load
driver.implicitly_wait(3) 

In [47]:
list_day=[]
dates = create_date_list()
for i in range(0, len(dates)):
    change_date(dates[i])
    table = driver.find_element(By.CSS_SELECTOR, "table.mat-table")
    extract_content(table, list_day, dates[i])
    time.sleep(1)

2018-01-01 00:00:00
2018-01-02 00:00:00
2018-01-03 00:00:00
2018-01-04 00:00:00
2018-01-05 00:00:00
2018-01-06 00:00:00
2018-01-07 00:00:00
2018-01-08 00:00:00
2018-01-09 00:00:00
2018-01-10 00:00:00
2018-01-11 00:00:00
2018-01-12 00:00:00
2018-01-13 00:00:00
2018-01-14 00:00:00
2018-01-15 00:00:00
2018-01-16 00:00:00
2018-01-17 00:00:00
2018-01-18 00:00:00
2018-01-19 00:00:00
2018-01-20 00:00:00
2018-01-21 00:00:00
2018-01-22 00:00:00
2018-01-23 00:00:00
2018-01-24 00:00:00
2018-01-25 00:00:00
2018-01-26 00:00:00
2018-01-27 00:00:00
2018-01-28 00:00:00
2018-01-29 00:00:00
2018-01-30 00:00:00
2018-01-31 00:00:00
2018-02-01 00:00:00
2018-02-02 00:00:00
2018-02-03 00:00:00
2018-02-04 00:00:00
2018-02-05 00:00:00
2018-02-06 00:00:00
2018-02-07 00:00:00
2018-02-08 00:00:00
2018-02-09 00:00:00
2018-02-10 00:00:00
2018-02-11 00:00:00
2018-02-12 00:00:00
2018-02-13 00:00:00
2018-02-14 00:00:00
2018-02-15 00:00:00
2018-02-16 00:00:00
2018-02-17 00:00:00
2018-02-18 00:00:00
2018-02-19 00:00:00


In [52]:
print(list_day)

[{'date': datetime.datetime(2018, 1, 1, 0, 0), 'Time': '1:53 AM', 'Temperature': '-1 °F', 'Dew Point': '-12 °F', 'Humidity': '60 %', 'Wind': 'WNW', 'Wind Speed': '8 mph', 'Wind Gust': '0 mph', 'Pressure': '29.96 in', 'Precip.': '0.0 in', 'Condition': 'Fair'}, {'date': datetime.datetime(2018, 1, 1, 0, 0), 'Time': '2:53 AM', 'Temperature': '-2 °F', 'Dew Point': '-13 °F', 'Humidity': '60 %', 'Wind': 'WNW', 'Wind Speed': '10 mph', 'Wind Gust': '0 mph', 'Pressure': '29.98 in', 'Precip.': '0.0 in', 'Condition': 'Fair'}, {'date': datetime.datetime(2018, 1, 1, 0, 0), 'Time': '3:53 AM', 'Temperature': '-3 °F', 'Dew Point': '-14 °F', 'Humidity': '59 %', 'Wind': 'NW', 'Wind Speed': '12 mph', 'Wind Gust': '0 mph', 'Pressure': '29.98 in', 'Precip.': '0.0 in', 'Condition': 'Fair'}, {'date': datetime.datetime(2018, 1, 1, 0, 0), 'Time': '4:53 AM', 'Temperature': '-4 °F', 'Dew Point': '-15 °F', 'Humidity': '59 %', 'Wind': 'NW', 'Wind Speed': '12 mph', 'Wind Gust': '0 mph', 'Pressure': '29.98 in', 'Prec

## Export to .csv

In [53]:
weather_data_raw = pd.DataFrame(list_day)

weather_data_raw.head(5)

Unnamed: 0,date,Time,Temperature,Dew Point,Humidity,Wind,Wind Speed,Wind Gust,Pressure,Precip.,Condition
0,2018-01-01,1:53 AM,-1 °F,-12 °F,60 %,WNW,8 mph,0 mph,29.96 in,0.0 in,Fair
1,2018-01-01,2:53 AM,-2 °F,-13 °F,60 %,WNW,10 mph,0 mph,29.98 in,0.0 in,Fair
2,2018-01-01,3:53 AM,-3 °F,-14 °F,59 %,NW,12 mph,0 mph,29.98 in,0.0 in,Fair
3,2018-01-01,4:53 AM,-4 °F,-15 °F,59 %,NW,12 mph,0 mph,29.98 in,0.0 in,Fair
4,2018-01-01,5:53 AM,-5 °F,-15 °F,62 %,WNW,9 mph,0 mph,29.98 in,0.0 in,Partly Cloudy


In [55]:
weather_data_raw.to_csv('weather_raw.csv', index=False)