In [8]:
#Selenium imports
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options

# Rest
from bs4 import BeautifulSoup
import time
import pandas as pd
import datetime
import h5py

# Global settings for the driver
chrome_options = Options()
chrome_options.add_experimental_option("detach", True)

In [41]:
def launch_chrome(url):
    '''A method to install the latest Chrome driver and get url
    ---------
    Parameters:
    url = url defined by user'''
    
    global driver
    # installing driver making sure that new driver is up to date
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
    driver.get(url)
    return driver

def yahoo_header():
    '''Returns headers from finance.yahoo.com/cryptocurrencies'''

    global yahoo_cols
    target_columns = ['Name', 'Price (Intraday)', 'Market Cap', 'Circulating Supply'] 
    # Getting all columns on URL using XPATH
    cols_mask = driver.find_elements(By.XPATH,'//*[@id="scr-res-table"]/div[1]/table/thead/tr/th')
    # Sequencing all elements to list if the header is in target columns
    yahoo_cols = [i.text for i in cols_mask if i.text in target_columns]
    yahoo_cols.insert(0,'Date')
    # Return final list
    return yahoo_cols

def yahoo_body():
    '''A method that returns the body of the table from finance.yahoo.com/cryptocurrencies'''
    
    global cryptocurrencies_list
    cryptocurrencies_list = []
    
    # Checking length of table on URL
    x = len(driver.find_elements(By.XPATH,'//*[@id="scr-res-table"]/div[1]/table/tbody/tr'))

    # Looping through every row on URL until last row
    for row_n in range(1, x): 
        # date_time = ct = datetime.datetime.now().strftime("%D %T")

        # Finding elements matching to headers using relative XPATH
        date = datetime.datetime.now().strftime('%D')
        name = driver.find_element(By.XPATH,f'//*[@id="scr-res-table"]/div[1]/table/tbody/tr[{row_n}]/td[2]') # Name
        price = driver.find_element(By.XPATH,f'//*[@id="scr-res-table"]/div[1]/table/tbody/tr[{row_n}]/td[3]') # Price (Intraday)
        marketcap = driver.find_element(By.XPATH,f'//*[@id="scr-res-table"]/div[1]/table/tbody/tr[{row_n}]/td[6]') # MarketCap
        circ_supply = driver.find_element(By.XPATH,f'//*[@id="scr-res-table"]/div[1]/table/tbody/tr[{row_n}]/td[10]') # Circulating Supply

        # Creating Tuple as preperation for HDF5 file
        my_tuple = (date, name.text, price.text, marketcap.text, circ_supply.text)
        # Adding to body
        cryptocurrencies_list.append(my_tuple)

In [42]:
launch_chrome('https://finance.yahoo.com/cryptocurrencies/')
yahoo_header()

['Date', 'Name', 'Price (Intraday)', 'Market Cap', 'Circulating Supply']

In [43]:
yahoo_body()

In [44]:
cryptocurrencies_list

[('10/10/22', 'Bitcoin USD', '19,039.50', '365.096B', '19.176M'),
 ('10/10/22', 'Ethereum USD', '1,279.51', '157.047B', '122.74M'),
 ('10/10/22', 'Tether USD', '1.0000', '68.423B', '68.422B'),
 ('10/10/22', 'USD Coin USD', '1.0000', '45.994B', '45.995B'),
 ('10/10/22', 'BNB USD', '269.34', '43.454B', '161.337M'),
 ('10/10/22', 'XRP USD', '0.4969', '24.827B', '49.964B'),
 ('10/10/22', 'Binance USD USD', '1.0002', '21.632B', '21.628B'),
 ('10/10/22', 'Cardano USD', '0.3944', '13.517B', '34.271B'),
 ('10/10/22', 'Solana USD', '31.60', '11.294B', '357.457M'),
 ('10/10/22', 'Dogecoin USD', '0.0587', '7.789B', '132.671B'),
 ('10/10/22', 'Polygon USD', '0.7993', '6.981B', '8.734B'),
 ('10/10/22', 'Polkadot USD', '6.19', '6.969B', '1.125B'),
 ('10/10/22', 'Dai USD', '0.9994', '6.734B', '6.738B'),
 ('10/10/22', 'Wrapped TRON USD', '0.0621', '6.309B', '101.676B'),
 ('10/10/22', 'TRON USD', '0.0619', '5.713B', '92.328B'),
 ('10/10/22', 'HEX USD', '0.0326', '5.659B', '173.411B'),
 ('10/10/22', 'Sh

In [46]:
with h5py.File('/Users/emre/Documents/GitHub/crypto_scraping_project/hdf5_data.h5', 'w') as hdf:
    yahoo_group = hdf.create_group('crypto_prices')
    yahoo_prices = yahoo_group.create_dataset('yahoo_prices', data=cryptocurrencies_list)
    yahoo_prices.attrs['USER'] = 'Emre Erturk'
