In [1]:
#Import Libraries
import polars as pl
from dotenv import load_dotenv,find_dotenv
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options as chromeoptions
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
import requests
import json
import os
import re
from multiprocessing import Pool

In [2]:
#Creating webdriver instance as well as options for fullscreen
options = chromeoptions()
options.add_argument("--window-size=1920,1080")
driver = webdriver.Chrome(options=options,service=Service(ChromeDriverManager().install()))

In [3]:
#retrieve api key from hidden .env file
load_dotenv(find_dotenv())
hidden_key = os.getenv("hidden_api_key")

In [4]:
#create function to extract data from cryptocompare api
def get_data(symbol,api_key):
    #url created with symbol and api_key taken as global parameters
    url=f"https://min-api.cryptocompare.com/data/v2/histoday?fsym={symbol}&tsym=USD&limit=365&api_key={api_key}"
    #get api response
    response=requests.get(url)
    #store pertinent data
    data=response.json()["Data"]["Data"]
    #create a holder for symbols
    data_with_symbol = [{'symbol': symbol, **item} for item in data]
    return data_with_symbol

In [5]:
#store manipulative variables
symbols=["BTC", "ETH", "LTC"]
api_key= hidden_key

In [6]:
#intialize list to hold different ticker crypto data
alls_df =[]
with Pool(4) as pool:
    for symbol in symbols:
        result= get_data(symbol, api_key)
        alls_df.append(pl.DataFrame(result))

In [7]:
#combine all the tickers into one frame
df_final=pl.concat(alls_df)

In [8]:
#convert to datetime
df_fixed=df_final.with_columns(pl.from_epoch(pl.col("time"),time_unit="s"))

In [9]:
df_fixed

symbol,time,high,low,open,volumefrom,volumeto,close,conversionType,conversionSymbol
str,datetime[μs],f64,f64,f64,f64,f64,f64,str,str
"""BTC""",2023-03-21 00:00:00,28494.87,27416.8,27807.05,45696.4,1.2822e9,28188.11,"""direct""",""""""
"""BTC""",2023-03-22 00:00:00,28902.98,26676.69,28188.11,81267.64,2.2779e9,27316.18,"""direct""",""""""
"""BTC""",2023-03-23 00:00:00,28807.82,27190.13,27316.18,63194.55,1.7692e9,28345.7,"""direct""",""""""
"""BTC""",2023-03-24 00:00:00,28421.94,27054.76,28345.7,52231.16,1.4570e9,27493.01,"""direct""",""""""
"""BTC""",2023-03-25 00:00:00,27819.55,27186.95,27493.01,22474.29,6.1810e8,27492.83,"""direct""",""""""
…,…,…,…,…,…,…,…,…,…
"""LTC""",2024-03-16 00:00:00,90.71,82.26,89.76,427439.26,3.7080e7,84.07,"""direct""",""""""
"""LTC""",2024-03-17 00:00:00,86.74,80.67,84.07,306301.37,2.6016e7,85.85,"""direct""",""""""
"""LTC""",2024-03-18 00:00:00,88.06,80.77,85.85,444501.1,3.7344e7,86.78,"""direct""",""""""
"""LTC""",2024-03-19 00:00:00,87.8,77.1,86.78,960656.03,7.7679e7,78.46,"""direct""",""""""


In [10]:
#drop unnecessary columns
df_fixed=df_fixed.drop("volumefrom","volumeto","conversionType","conversionSymbol")

In [11]:
df_fixed

symbol,time,high,low,open,close
str,datetime[μs],f64,f64,f64,f64
"""BTC""",2023-03-21 00:00:00,28494.87,27416.8,27807.05,28188.11
"""BTC""",2023-03-22 00:00:00,28902.98,26676.69,28188.11,27316.18
"""BTC""",2023-03-23 00:00:00,28807.82,27190.13,27316.18,28345.7
"""BTC""",2023-03-24 00:00:00,28421.94,27054.76,28345.7,27493.01
"""BTC""",2023-03-25 00:00:00,27819.55,27186.95,27493.01,27492.83
…,…,…,…,…,…
"""LTC""",2024-03-16 00:00:00,90.71,82.26,89.76,84.07
"""LTC""",2024-03-17 00:00:00,86.74,80.67,84.07,85.85
"""LTC""",2024-03-18 00:00:00,88.06,80.77,85.85,86.78
"""LTC""",2024-03-19 00:00:00,87.8,77.1,86.78,78.46


In [14]:
#transform stored data into a more efficent format
df_fixed.write_parquet("collated_crypto.parquet")