In [1]:
from selenium import webdriver 
from selenium.webdriver.common.by import By 
from selenium.webdriver.support.ui import WebDriverWait 
from selenium.webdriver.support import expected_conditions as EC 
from selenium.common.exceptions import TimeoutException
from selenium.common.exceptions import StaleElementReferenceException

import pandas as pd
import datetime
from datedelta import datedelta
import time

In [2]:
def addMonths(date, numMonths):
    return date + datedelta(months=numMonths)

In [15]:
def addDays(date, numDays):
    return date + datedelta(days=numDays)

In [16]:
def goToDate(browser, datetime_object):
    # prepare date and time
    script_change_time = "$('#dateTimePickerMinute').val('" + str(datetime_object.year) + "/" + str(datetime_object.month) + "/" + str(datetime_object.day) + " 00:00')"

    # go to date and time
    browser.execute_script(script_change_time)

    # search
    browser.find_elements_by_xpath('//*[@id="searchNow"]/img')[0].click()

In [17]:
def getData(browser, datetime_object):
    # initiate header and data
    header = ['Time', 'WL [El.m]', 'Change [+/-]']
    data = []

    # get data
    table = browser.find_element_by_id("RightContentsBody")
    rows = table.find_elements_by_tag_name("ul")
    
    isInterrupted = False
    
    try:
        for row in rows:
            cells = row.find_elements_by_tag_name("li")
            cellContents = []
            for cell in cells:
                cellContents.append(cell.text)

            data.append(cellContents)
    except StaleElementReferenceException:
        isInterrupted = True
        
    # if interrupted, start all over again
    if(isInterrupted):
        goToDate(browser, datetime_object)
        
        data = []

        # get data
        table = browser.find_element_by_id("RightContentsBody")
        rows = table.find_elements_by_tag_name("ul")
        
        for row in rows:
            cells = row.find_elements_by_tag_name("li")
            cellContents = []
            for cell in cells:
                cellContents.append(cell.text)
            data.append(cellContents)

    # make dataframe
    data = pd.DataFrame(data, columns=header) 
    
    return data    

In [18]:
URL = 'http://121.58.193.173/html/wl/wl_table.html#'
options = webdriver.ChromeOptions()
options.add_argument("--start-maximized")
options.add_argument("--incognito")

browser = webdriver.Chrome(executable_path='C:/drivers/chromedriver', options=options)
browser.get(URL)

# Wait 10 seconds for page to load
timeout = 10
try:
    WebDriverWait(browser, timeout).until(EC.visibility_of_element_located((By.XPATH, "//img[@src='/html/images/logo.jpg']")))
except TimeoutException:
    print('Timed out waiting for page to load')
    browser.quit()

# click fort santiago
browser.find_elements_by_xpath('//*[@id="CenterContentsBody"]/ul[11]/li[1]/a/font')[0].click()    

data_list = []

datetime_object = datetime.datetime.strptime('2016/09/02 00:00', '%Y/%m/%d %H:%M')
endtime_object = datetime.datetime.strptime('2018/01/02 00:00', '%Y/%m/%d %H:%M')

first_day_of_next_month = addMonths(datetime_object, 1)
current_year = datetime_object.year

while(datetime_object < endtime_object):

    # go to date
    goToDate(browser, datetime_object)

    # get data from html
    data = getData(browser, datetime_object)

    # remove excess row
    data = data.drop(data.index[0])

    # reverse data frame order
    data = data.iloc[::-1]

    # add to list of df
    data_list.append(data)
    
    # change date
    datetime_object = addDays(datetime_object, 1)
    
    # save to csv every month
    if(datetime_object == first_day_of_next_month):
        main_data = pd.concat(data_list, ignore_index=True)
        
        if(current_year == datetime_object.year):
            main_data.to_csv('flood_' + str(datetime_object.year) + '-' + str(datetime_object.month - 1) + '.csv')
        else:
            temp_date = addMonths(datetime_object, -1)
            main_data.to_csv('flood_' + str(temp_date.year) + '-' + str(temp_date.month) + '.csv')
        data_list = []
        first_day_of_next_month = addMonths(datetime_object, 1)
    
    # wait for 3 seconds
    time.sleep(3)


In [4]:
# merge everything

datasets = []
startdate = datetime.datetime.strptime('2015/01/01 00:00', '%Y/%m/%d %H:%M')

for i in range(0,36):
    dataset = pd.read_csv('flood_' + str(startdate.year) + '-' + str(startdate.month) + '.csv')
    datasets.append(dataset)
    startdate = addMonths(startdate, 1)
    
main_dataset = pd.concat(datasets, ignore_index=True)
main_dataset.to_csv('flood_pagasa_2015_2016_2017.csv')