In [1]:
from selenium import webdriver  # Automate web browser control
from selenium.webdriver.edge.options import Options  # Options for Edge browser
from selenium.webdriver.common.by import By  # Search for elements on the web page
from selenium.webdriver.support.ui import WebDriverWait  # Wait for a certain condition before the next operation
from selenium.webdriver.support import expected_conditions as EC  # Expected conditions in Selenium
import pandas as pd  
from datetime import datetime  
import time  # Library to create delay during execution

**code for crawl data except fed funds rate**

In [2]:
# set url, output_name, start_date, end_date

# Step 1: set def with parameters
def get_data(url, start_date_input, end_date_input):
    options=Options()     # create an Options object, # options for the Edge browser
    options.add_experimental_option('detach',True)    # add option: 'detach' - website runs in detached mode (detach model)
    driver=webdriver.Edge(options=options)  # create a webdrive object using the Edge browser and pass it the options object
    driver.get(url)
    
    # Step 2: set range date 
    def set_date_and_apply(start_date, end_date):
        time.sleep(5)
        driver.find_element(By.XPATH,'//*[@id="Col1-1-HistoricalDataTable-Proxy"]/section/div[1]/div[1]/div[1]/div/div/div[1]').click()
        # WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH,'//*[@id="Col1-1-HistoricalDataTable-Proxy"]/section/div[1]/div[1]/div[1]/div/div/div[1]'))).click()
          
        # set start_day, end_day and apply change
        driver.find_element(By.XPATH, '//*[@id="dropdown-menu"]/div/div[1]/input').send_keys(start_date) #### .send_keys(...): Sends the specified key to that element.
        driver.find_element(By.XPATH,'//*[@id="dropdown-menu"]/div/div[2]/input').send_keys(end_date)
        driver.find_element(By.XPATH,'//*[@id="dropdown-menu"]/div/div[3]/button[1]').click()
        driver.find_element(By.XPATH,'//*[@id="Col1-1-HistoricalDataTable-Proxy"]/section/div[1]/div[1]/button').click()
 
    set_date_and_apply(start_date_input, end_date_input)
    # Step3: Scroll down to download all data, (if you haven't scrolled down yet, the data will only load up to 100 lines)
    time.sleep(5) # WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, '//*[@id="Col1-1-HistoricalDataTable-Proxy"]/section/div[2]/table')))  # wait load web
    previous_length = 0
    while True:

        # Cuộn xuống bảng
        bottom = driver.find_element(By.XPATH, '//*[@id="Col1-1-HistoricalDataTable-Proxy"]/section/div[2]/table/tfoot/tr')
        driver.execute_script("arguments[0].scrollIntoView(true);", bottom)
        time.sleep(5) ### wait for update web after each roll

        # # Check if the len(rows) changes or not
        current_length = len(driver.find_elements(By.XPATH, '//*[@id="Col1-1-HistoricalDataTable-Proxy"]/section/div[2]/table/tbody/tr'))
        if current_length == previous_length:
            break
        else:
            previous_length = current_length
    rows = driver.find_elements(By.XPATH, '//*[@id="Col1-1-HistoricalDataTable-Proxy"]/section/div[2]/table/tbody/tr')

    # Step 4: crawl rows, (avoid index out of the range, when collect for col)
    data = []   # list save
    for row in rows:
        cells = row.find_elements(By.TAG_NAME, 'td')     
        # find_elements, find all cells (td tags in HTML). find_elements(By.TAG_NAME, ...), find_elements(By.XPATH, ...)
        
        if len(cells) >= 5:
            date_text = cells[0].text
            close_text = cells[4].text    # close ở cột cells[4]
            numerical_date = datetime.strptime(date_text, '%b %d, %Y').strftime('%d/%m/%Y')
            # datetime - Convert STRP string to STRF specific date %Y-%m-%d, or %d-%m-%Y, ... (for example from 'Jan 01, 2020' to ' 2020-01-01').
            data.append({'time': numerical_date, 'close': close_text})

    df = pd.DataFrame(data)
    # df.to_csv('crude_oil.csv', index=False)
    return df


**for fed funds rate**

In [8]:
def crawl_fed(link, start,end):
    options=Options()
    options.add_experimental_option('detach',True)
    driver=webdriver.Edge(options=options)
    driver.get(link)
    time.sleep(5)
    show_more=driver.find_element(By.XPATH,'//*[@id="showMoreHistory168"]')
    while True:
        try:
            patial=driver.find_elements(By.XPATH,' //*[starts-with(@id, "historicEvent_")]/td[1]')
            yesterday=datetime.strptime(patial[-1].text, '%b %d, %Y')
            if yesterday<=start:
                break
            show_more.click()
            time.sleep(2)
        except:
            driver.find_element(By.XPATH,'//*[@id="PromoteSignUpPopUp"]/div[2]/i').click()
            time.sleep(2)
            patial=driver.find_elements(By.XPATH,' //*[starts-with(@id, "historicEvent_")]/td[1]')
            yesterday=datetime.strptime(patial[-1].text, '%b %d, %Y')
            if yesterday<=start:
                break
            show_more.click()
            time.sleep(2)
    date=driver.find_elements(By.XPATH,' //*[starts-with(@id, "historicEvent_")]/td[1]')
    rate=driver.find_elements(By.XPATH,' //*[starts-with(@id, "historicEvent_")]/td[3]')
    data_len=len(date)
    save={
        'time':[],
        'rate':[],

    }

    for day in range(data_len):
        try:
            curr_day=datetime.strptime(date[day].text, '%b %d, %Y')
            if curr_day>end:
                continue
            numerical_form = curr_day.strftime('%Y-%m-%d')
            save['time'].append(numerical_form)
            save['rate'].append(rate[day].text)
        except:
            driver.find_element(By.XPATH,'//*[@id="PromoteSignUpPopUp"]/div[2]/i').click()
            time.sleep(2)
            curr_day=datetime.strptime(date[day].text, '%b %d, %Y')
            if curr_day>end:
                continue
            numerical_form = curr_day.strftime('%Y-%m-%d')
            save['time'].append(numerical_form)
            save['rate'].append(rate[day].text)

    df=pd.DataFrame(save)
    
    return df

In [9]:
# for fed rates
start=datetime(2010,1,4)
end=datetime(2023,11,3)
link='https://www.investing.com/economic-calendar/interest-rate-decision-168/'
df=crawl_fed(link,start,end)