In [1]:
# Import packages for scraping
import pandas as pd
import time

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as ec
from selenium.webdriver.chrome.service import Service

In [2]:
# Create list of currency pairs and populate url list
pairs = ['EURUSD','USDJPY','GBPUSD','USDCHF','USDCAD','AUDUSD','NZDUSD','EURJPY','AUDJPY','XAUUSD']
pair_urls = []
url = 'https://www.myfxbook.com/forex-market/currencies'
for pair in pairs:
    pair_urls += [url+'/'+str(pair)+'-historical-data/']

In [3]:
# Create dictionary of urls and currency pairs
fx_dict = dict(zip(pair_urls, pairs)) 

In [4]:
# Initialize webdriver and launch browser
path = Service("C:\\Users\\HP\\WebDriver\\chromedriver.exe")
options = webdriver.ChromeOptions()
driver = webdriver.Chrome(service=path, options=options)
wait = WebDriverWait(driver, 5) # preset waiting time to 3 seconds

In [5]:
# Define function to extract data from urls
def get_fx(curr):
    """currency pair to be scraped from url"""
    link = [key for key, x in fx_dict.items() if x == curr]
    for url in link:
        try:
            driver.get(url) # access url
        except TimeoutException:
            driver.refresh()
            driver.get(url)
        time.sleep(4)
        
        dates = wait.until(ec.presence_of_element_located(
            (By.XPATH, "//div/input[@id='startDate' and @class='form-control input-sm']"))) # get start date elements
        dates.click()
        for i in range(16): # press left arrow key 16 times to move cursor to the start of text box
            dates.send_keys(Keys.ARROW_LEFT)
        dates.send_keys("010120190000") # input start date
        driver.find_element('xpath', '//a[contains(@id, "historicalFilterBtn")]').click() # filter results
        time.sleep(10)
        
        headers = [] # get header elements
        heads = driver.find_elements(By.XPATH, "//tr//th[contains(@data-disable-sort, 'true')]")
        date = driver.find_element(By.XPATH, "//tr//th[contains(@data-sort-type, 'sortDate')]//span")
        changes = driver.find_elements(By.XPATH, "//tr//th[contains(@data-sort-type, 'sortNumber')]//span")
        headers += [date.text]
        for head in heads:
            headers += [head.text]
        for change in changes:
            headers += [change.text]
        val = [] # get elements for trading data
        rows = driver.find_elements(By.XPATH, "//tbody//tr//td//span")
        for row in rows:
            try:
                val += [row.text]
            except StaleElementReferenceException:
                val += [row.text]
        values = [x for x in val if "%" not in x] # remove values containing '%'  
        values = [values[x:x+7] for x in range(0, len(values), 7)] # split list into multiple lists of daily trades
        
        df = pd.DataFrame(values, columns=headers).drop('Change (%)', axis=1)[1:] # save as dataframe
        break
        driver.quit()
    return df

In [6]:
# Get data for all currency pairs
eurusd = get_fx('EURUSD')
usdjpy = get_fx('USDJPY')
gbpusd = get_fx('GBPUSD')
usdchf = get_fx('USDCHF')
usdcad = get_fx('USDCAD')
audusd = get_fx('AUDUSD')
nzdusd = get_fx('NZDUSD')
eurjpy = get_fx('EURJPY')
audjpy = get_fx('AUDJPY')
xauusd = get_fx('XAUUSD')

driver.close() # close browser

In [7]:
# Show all dataframes in memory
%who DataFrame

audjpy	 audusd	 eurjpy	 eurusd	 gbpusd	 nzdusd	 usdcad	 usdchf	 usdjpy	 
xauusd	 


In [8]:
# Preview a dataframe
eurusd.head(10)

Unnamed: 0,Date,Open,High,Low,Close,Change (Pips)
1,"Sep 29, 2022 00:00",0.97306,0.98147,0.96355,0.9813,82.4
2,"Sep 28, 2022 00:00",0.95928,0.97508,0.95357,0.97361,143.3
3,"Sep 27, 2022 00:00",0.96089,0.96709,0.95692,0.95932,-15.7
4,"Sep 26, 2022 00:00",0.96699,0.97097,0.95502,0.96082,-61.7
5,"Sep 23, 2022 00:00",0.98315,0.9852,0.96683,0.96887,-142.8
6,"Sep 22, 2022 00:00",0.98357,0.99073,0.98071,0.98338,-1.9
7,"Sep 21, 2022 00:00",0.99676,0.99753,0.98126,0.98368,-130.8
8,"Sep 20, 2022 00:00",1.0021,1.00508,0.99553,0.99698,-51.2
9,"Sep 19, 2022 00:00",1.00113,1.00292,0.99659,1.00249,13.6
10,"Sep 16, 2022 00:00",0.99979,1.00365,0.99451,1.0013,15.1


In [9]:
# Store dataframes as csv files
dfs = [eurusd, usdjpy, gbpusd, usdchf, usdcad, audusd, nzdusd, eurjpy, xauusd]
for pair, df in zip(pairs, dfs):
    df.to_csv('{}.csv'.format(pair), sep=',', index=False, encoding='utf-8')