In [8]:
#Selenium imports
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options

# Rest
from bs4 import BeautifulSoup
import time
import pandas as pd
import datetime
import h5py

# Global settings for the driver
chrome_options = Options()
chrome_options.add_experimental_option("detach", True)

In [28]:
def launch_chrome(url):
    '''A method to install the latest Chrome driver and get url
    ---------
    Parameters:
    url = url defined by user'''
    
    global driver
    # installing driver making sure that new driver is up to date
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
    driver.get(url)
    return driver

def yahoo_header():
    '''Returns headers from finance.yahoo.com/cryptocurrencies'''

    global yahoo_cols
    target_columns = ['Name', 'Price (Intraday)', 'Market Cap', 'Circulating Supply'] 
    # Getting all columns on URL using XPATH
    cols_mask = driver.find_elements(By.XPATH,'//*[@id="scr-res-table"]/div[1]/table/thead/tr/th')
    # Sequencing all elements to list if the header is in target columns
    yahoo_cols = [i.text for i in cols_mask if i.text in target_columns]
    yahoo_cols.insert(0,'Date')
    # Return final list
    return yahoo_cols

def yahoo_body():
    '''A method that returns the body of the table from finance.yahoo.com/cryptocurrencies'''

    global cryptocurrencies_list
    try:
        mask = pd.read_hdf('/Users/emre/Documents/GitHub/crypto_scraping_project/hdf5_data.h5')
        records = mask.to_records(index=False)
        cryptocurrencies_list = list(records)
    except:
        cryptocurrencies_list = []

    # Checking length of table on URL
    x = len(driver.find_elements(By.XPATH,'//*[@id="scr-res-table"]/div[1]/table/tbody/tr'))

    # Looping through every row on URL until last row
    for row_n in range(1, x): 
        # date_time = ct = datetime.datetime.now().strftime("%D %T")

        # Finding elements matching to headers using relative XPATH
        date = datetime.datetime.now().strftime('%D')
        name = driver.find_element(By.XPATH,f'//*[@id="scr-res-table"]/div[1]/table/tbody/tr[{row_n}]/td[2]') # Name
        price = driver.find_element(By.XPATH,f'//*[@id="scr-res-table"]/div[1]/table/tbody/tr[{row_n}]/td[3]') # Price (Intraday)
        marketcap = driver.find_element(By.XPATH,f'//*[@id="scr-res-table"]/div[1]/table/tbody/tr[{row_n}]/td[6]') # MarketCap
        circ_supply = driver.find_element(By.XPATH,f'//*[@id="scr-res-table"]/div[1]/table/tbody/tr[{row_n}]/td[10]') # Circulating Supply

        # Creating Tuple as preperation for HDF5 file
        my_tuple = (date, name.text, price.text, marketcap.text, circ_supply.text)
        # Adding to body
        cryptocurrencies_list.append(my_tuple)

    return cryptocurrencies_list

def save_to_hdf(filename=hdf5, groupname, data):
    '''A method saving the results from any scraper to hdf5 file'''

    with h5py.File('{filename}.h5', 'w') as hdf:
        group = hdf.create_group(groupname)
        filename = groupname.create_dataset(
            filename, 
            data=data, 
            maxshape=(None,len(data)),
            )
        filename.attrs['USER'] = 'Emre Erturk'

    

In [9]:
launch_chrome('https://finance.yahoo.com/cryptocurrencies/')
yahoo_header()

['Date', 'Name', 'Price (Intraday)', 'Market Cap', 'Circulating Supply']

In [30]:
yahoo_body()

[('10/11/22', 'Bitcoin USD', '19,048.12', '365.278B', '19.177M'),
 ('10/11/22', 'Ethereum USD', '1,281.19', '157.272B', '122.755M'),
 ('10/11/22', 'Tether USD', '0.999943', '68.418B', '68.422B'),
 ('10/11/22', 'USD Coin USD', '1.0000', '46.032B', '46.032B'),
 ('10/11/22', 'BNB USD', '270.97', '43.718B', '161.337M'),
 ('10/11/22', 'XRP USD', '0.490298', '24.497B', '49.964B'),
 ('10/11/22', 'Binance USD USD', '1.0002', '21.632B', '21.628B'),
 ('10/11/22', 'Cardano USD', '0.392674', '13.46B', '34.279B'),
 ('10/11/22', 'Solana USD', '31.34', '11.204B', '357.459M'),
 ('10/11/22', 'Dogecoin USD', '0.060024', '7.963B', '132.671B'),
 ('10/11/22', 'Polkadot USD', '6.1956', '6.973B', '1.126B'),
 ('10/11/22', 'Polygon USD', '0.793203', '6.928B', '8.734B'),
 ('10/11/22', 'Dai USD', '0.999732', '6.673B', '6.675B'),
 ('10/11/22', 'Wrapped TRON USD', '0.061556', '6.259B', '101.676B'),
 ('10/11/22', 'TRON USD', '0.061745', '5.701B', '92.325B'),
 ('10/11/22', 'Shiba Inu USD', '0.000010', '5.637B', '549

In [33]:
# Context Manager to create HDF5 file for yahoo

with h5py.File('/Users/emre/Documents/GitHub/crypto_scraping_project/hdf5_data3.h5', 'w') as hdf:
    yahoo_group = hdf.create_group('crypto_prices')
    yahoo_prices = yahoo_group.create_dataset('yahoo_prices', data=cryptocurrencies_list, maxshape=(None,len(cryptocurrencies_list)))
    yahoo_prices.attrs['USER'] = 'Emre Erturk'


In [20]:
with h5py.File('/Users/emre/Documents/GitHub/crypto_scraping_project/hdf5_data.h5', 'a') as hdf:
    
    


'10/11/22'