In [1]:
#Import Libraries
import polars as pl
from dotenv import load_dotenv,find_dotenv
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options as chromeoptions
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
import requests
import json
import os
import re
from multiprocessing import Pool

In [2]:
#Creating webdriver instance as well as options for fullscreen
options = chromeoptions()
options.add_argument("--window-size=1920,1080")
driver = webdriver.Chrome(options=options,service=Service(ChromeDriverManager().install()))

In [3]:
#retrieve api key from hidden .env file
load_dotenv(find_dotenv())
hidden_key = os.getenv("hidden_api_key")

In [4]:
#create function to extract data from cryptocompare api
def get_data(symbol,api_key):
    #url created with symbol and api_key taken as global parameters
    url=f"https://min-api.cryptocompare.com/data/v2/histoday?fsym={symbol}&tsym=USD&limit=365&api_key={api_key}"
    #get api response
    response=requests.get(url)
    #store pertinent data
    data=response.json()["Data"]["Data"]
    #create a holder for symbols
    data_with_symbol = [{'symbol': symbol, **item} for item in data]
    return data_with_symbol

In [5]:
#store manipulative variables
symbols=["BTC", "ETH", "LTC"]
api_key= hidden_key

In [6]:
#intialize list to hold different ticker crypto data
alls_df =[]
with Pool(4) as pool:
    for symbol in symbols:
        result= get_data(symbol, api_key)
        alls_df.append(pl.DataFrame(result))

In [7]:
#combine all the tickers into one frame
df_final=pl.concat(alls_df)

In [8]:
#convert to datetime
df_fixed=df_final.with_columns(pl.from_epoch(pl.col("time"),time_unit="s"))

In [9]:
df_fixed

symbol,time,high,low,open,volumefrom,volumeto,close,conversionType,conversionSymbol
str,datetime[μs],f64,f64,f64,f64,f64,f64,str,str
"""BTC""",2023-04-17 00:00:00,30331.32,29265.91,30322.37,34966.91,1.0375e9,29446.34,"""direct""",""""""
"""BTC""",2023-04-18 00:00:00,30485.26,29135.27,29446.34,36469.48,1.0952e9,30395.53,"""direct""",""""""
"""BTC""",2023-04-19 00:00:00,30419.74,28615.79,30395.53,54525.22,1.6005e9,28821.91,"""direct""",""""""
"""BTC""",2023-04-20 00:00:00,29094.98,28007.09,28821.91,41297.76,1.1806e9,28248.11,"""direct""",""""""
"""BTC""",2023-04-21 00:00:00,28365.2,27173.44,28248.11,43368.57,1.2077e9,27261.17,"""direct""",""""""
…,…,…,…,…,…,…,…,…,…
"""LTC""",2024-04-12 00:00:00,99.51,80.31,98.75,922658.84,8.2456e7,86.16,"""direct""",""""""
"""LTC""",2024-04-13 00:00:00,86.6,70.87,86.16,1.1034e6,8.8090e7,77.5,"""direct""",""""""
"""LTC""",2024-04-14 00:00:00,80.44,73.93,77.5,668565.82,5.1936e7,79.82,"""direct""",""""""
"""LTC""",2024-04-15 00:00:00,82.87,75.7,79.82,546475.85,4.3354e7,78.11,"""direct""",""""""


In [10]:
#drop unnecessary columns
df_fixed=df_fixed.drop("volumefrom","volumeto","conversionType","conversionSymbol")

In [11]:
df_fixed

symbol,time,high,low,open,close
str,datetime[μs],f64,f64,f64,f64
"""BTC""",2023-04-17 00:00:00,30331.32,29265.91,30322.37,29446.34
"""BTC""",2023-04-18 00:00:00,30485.26,29135.27,29446.34,30395.53
"""BTC""",2023-04-19 00:00:00,30419.74,28615.79,30395.53,28821.91
"""BTC""",2023-04-20 00:00:00,29094.98,28007.09,28821.91,28248.11
"""BTC""",2023-04-21 00:00:00,28365.2,27173.44,28248.11,27261.17
…,…,…,…,…,…
"""LTC""",2024-04-12 00:00:00,99.51,80.31,98.75,86.16
"""LTC""",2024-04-13 00:00:00,86.6,70.87,86.16,77.5
"""LTC""",2024-04-14 00:00:00,80.44,73.93,77.5,79.82
"""LTC""",2024-04-15 00:00:00,82.87,75.7,79.82,78.11


In [12]:
#transform stored data into a more efficent format
df_fixed.write_parquet("collated_crypto.parquet")