In [47]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.firefox.options import Options

import pandas as pd

import time
from datetime import date

opts = Options()
#opts.add_argument("--headless")
driver = webdriver.Firefox(options=opts)
driver.maximize_window()

driver.get("https://coincodex.com/historical-data/crypto/")

date = load_date(1, 10, 2024)

show_all_coins()

# Add gather metadata function here
elements = driver.find_elements(By.CLASS_NAME, 'coin')[1:]
number_of_coins = len(elements)
print(number_of_coins)

scrape_page(date)

driver.close()

9742


In [43]:
def show_all_coins():
    '''
    Changes the filter on the page from the top 100 coins to all coins
    '''
    driver.find_element(By.XPATH, value='/html/body/app-root/app-historical-data/div/div/div[3]/section[2]/div/div/div/div[2]/app-select-item').click()
    time.sleep(15)
    driver.find_element(By.XPATH, '/html/body/app-root/app-historical-data/div/div/div[3]/section[2]/div/div/div/div[2]/app-select-item/div/div[2]/div[3]').click()
    time.sleep(60)

In [44]:
def load_date(year, month, day):
    '''
    Modifies the date field and submits the form to load the desired date

    Args:
        year (int): year
        month (int): month
        day (int): day
    '''
    year = str(year)
    month = str(month)
    day = str(day)
    if len(month) == 1:
        month = '0' + month
    if len(day) == 1:
        day = '0' + day
    desired_date = str(year + '-' + month + '-' + day)

    date_input = driver.find_element(By.ID, 'select-date')
    date_input.clear()
    date_input.send_keys(desired_date)

    time.sleep(1)
    driver.find_element(By.XPATH, '/html/body/app-root/app-historical-data/div/div/div[2]/form/div[2]/button').click()
    time.sleep(3)
    return desired_date

In [45]:
def scrape_page(date):
    '''
    Scrapes all the coins from a page and saves them into a CSV file named for the date at noon.
    The CSV file does not have an index or header
    The columns of the CSV file:
        ticker: ticker of the coin
        full_name: full name of the coin
        price (USD): price of 1 coin
        market_cap (USD): total value of all coins
        day_volume (USD): total volume within the past 24 hours
        c_supply: number of coins in circulation

    Args:
        date (str): Date in format YYYY-MM-DD
    
    Output:
        CSV file saved in directory, day_csvs, with name YYYY-MM-DD.csv
    '''

    ticker = []
    full_name = []
    price = []
    market_cap = []
    day_volume = []
    c_supply = []

    for i in range(0,number_of_coins):
        # Scrape each coin, and scroll the page to keep up with scraping
        # elements = driver.find_elements(By.CLASS_NAME, 'coin')[1:] # Redundant, but necessary to keep it fresh in the DOM (could be optimized to only load desired index)
        # element = elements[i]
        # It is 2.25% faster to find elements individually rather than finding all elements and selecting on a page with 1251 coins
        # This percentage increase should become greater when even more coins are on a page
        # This was only tested once, but logically it makes sense. Could run multiple days to produce a graph and model how the percentage scales with number of coins
        element = driver.find_elements(By.CLASS_NAME, 'coin')[i+1]
        driver.execute_script("arguments[0].scrollIntoView();", element)

        ticker.append(element.find_element(By.CLASS_NAME, 'ticker').text)
        full_name.append(element.find_element(By.CLASS_NAME, 'full-name').text)
        price.append(element.find_element(By.CLASS_NAME, 'price').text)
        market_cap.append(element.find_element(By.CLASS_NAME, 'market-cap').text)
        day_volume.append(element.find_element(By.CLASS_NAME, 'volume').text)
        c_supply.append(element.find_element(By.CLASS_NAME, 'circulating-supply').text)

    # Save 1 day of data to CSV
    dict = {'ticker': ticker, 'full_name': full_name, 'price': price, 'market_cap': market_cap, 'day_volume': day_volume, 'c_supply': c_supply}
    df = pd.DataFrame(dict)
    path = f'E:\Projects\pinksheetcrypto\day_csvs\{date}.csv'
    df.to_csv(path, header=False, index=False)

  path = f'E:\Projects\pinksheetcrypto\day_csvs\{date}.csv'
  path = f'E:\Projects\pinksheetcrypto\day_csvs\{date}.csv'


In [46]:
for i in range(1, 31):
    print(i)
    opts = Options()
    #opts.add_argument("--headless")
    driver = webdriver.Firefox(options=opts)
    driver.maximize_window()

    driver.get("https://coincodex.com/historical-data/crypto/")

    date = load_date(i, 9, 2024)

    show_all_coins()

    # Add gather metadata function here
    elements = driver.find_elements(By.CLASS_NAME, 'coin')[1:]
    number_of_coins = len(elements)
    print(number_of_coins)

    scrape_page(date)

    driver.close()

1
9724
2
9721
3
9721
4
9724
5
9725
6
9725
7
9724
8
9723
9
9723
10
9725
11
9724
12
9724
13
9726
14
9730
15
9731
16
9731
17
9732
18
9734
19
9736
20
9736
21
9736
22
9736
23
9736
24
9737
25
9737
26
9739
27
9739
28
9739
29
9740
30
9740


In [None]:
def scrape_by_date(year, month, day):
    
    today = date.today()
    years = [20 + i for i in range()]


    

    for i in range(1, 31):
        print(i)
        opts = Options()
        #opts.add_argument("--headless")
        driver = webdriver.Firefox(options=opts)
        driver.maximize_window()

        driver.get("https://coincodex.com/historical-data/crypto/")

        date = load_date(i, 9, 2024)

        show_all_coins()

        # Add gather metadata function here
        elements = driver.find_elements(By.CLASS_NAME, 'coin')[1:]
        number_of_coins = len(elements)
        print(number_of_coins)

        scrape_page(date)

        driver.close()