In [1]:
from typing import Optional, Any, Union, Dict

import pandas as pd

from cryptodatapy.extract.data_vendors.datavendor import DataVendor
from cryptodatapy.extract.datarequest import DataRequest
from cryptodatapy.transform.convertparams import ConvertParams
from cryptodatapy.transform.wrangle import WrangleData, WrangleInfo
from cryptodatapy.util.datacredentials import DataCredentials


# data credentials
data_cred = DataCredentials()

# url endpoints
urls = {'assets_info': 'assets', 'fields_info': 'endpoints'}


In [2]:

class Glassnode(DataVendor):
    """
    Retrieves data from Glassnode API.
    """

    def __init__(
            self,
            categories=None,
            exchanges: Optional[list[str]] = None,
            indexes: Optional[list[str]] = None,
            assets: Optional[list[str]] = None,
            markets: Optional[list[str]] = None,
            market_types=None,
            fields: Optional[list[str]] = None,
            frequencies=None,
            base_url: str = data_cred.glassnode_base_url,
            api_key: str = data_cred.glassnode_api_key,
            max_obs_per_call: Optional[int] = None,
            rate_limit: Optional[Any] = None
    ):
        """
        Constructor

        Parameters
        ----------
        categories: list or str, {'crypto', 'fx', 'rates', 'eqty', 'commodities', 'credit', 'macro', 'alt'}
            List or string of available categories, e.g. ['crypto', 'fx', 'alt'].
        exchanges: list, optional, default None
            List of available exchanges, e.g. ['Binance', 'Coinbase', 'Kraken', 'FTX', ...].
        indexes: list, optional, default None
            List of available indexes, e.g. ['mvda', 'bvin'].
        assets: list, optional, default None
            List of available assets, e.g. ['btc', 'eth'].
        markets: list, optional, default None
            List of available markets as asset/quote currency pairs, e.g. ['btcusdt', 'ethbtc'].
        market_types: list
            List of available market types, e.g. [spot', 'perpetual_future', 'future', 'option'].
        fields: list, optional, default None
            List of available fields, e.g. ['open', 'high', 'low', 'close', 'volume'].
        frequencies: list
            List of available frequencies, e.g. ['tick', '1min', '5min', '10min', '20min', '30min', '1h', '2h', '4h',
            '8h', 'd', 'w', 'm']
        base_url: str
            Base url used for GET requests. If not provided, default is set to base_url stored in DataCredentials.
        api_key: str
            Api key, e.g. 'dcf13983adf7dfa79a0dfa35adf'. If not provided, default is set to
            api_key stored in DataCredentials.
        max_obs_per_call: int, optional, default None
            Maximum number of observations returned per API call. If not provided, default is set to
            api_limit stored in DataCredentials.
        rate_limit: Any, optional, Default None
            Number of API calls made and left, by time frequency.
        """
        DataVendor.__init__(self, categories, exchanges, indexes, assets, markets, market_types, fields,
                            frequencies, base_url, api_key, max_obs_per_call, rate_limit)

        if frequencies is None:
            self.frequencies = ['10min', '15min', '30min', '1h', '2h', '4h', '8h', 'd', 'w', 'm', 'q', 'y']
        if market_types is None:
            self.market_types = ['spot', 'perpetual_future', 'future', 'option']
        if categories is None:
            self.categories = ['crypto']
        if api_key is None:
            raise TypeError("Set your api key. We recommend setting your api key in environment variables as"
                            "'GLASSNODE_API_KEY', will allow DataCredentials to automatically load it.")
        if assets is None:
            self.assets = self.get_assets_info(as_list=True)
        if fields is None:
            self.fields = self.get_fields_info(data_type=None, as_list=True)

    def get_exchanges_info(self) -> None:
        """
        Gets exchanges info.
        """
        return None

    def get_indexes_info(self) -> None:
        """
        Gets indexes info.
        """
        return None

    def req_assets(self) -> Dict[str, Any]:
        """
        Get request for assets info.

        Returns
        -------
        dict: dictionary
            Data response with asset info in json format.
        """
        return DataRequest().get_req(url=self.base_url + urls['assets_info'], params={'api_key': self.api_key})

    def get_assets_info(self, as_list: bool = False) -> Union[list[str], pd.DataFrame]:
        """
        Get assets info.

        Parameters
        ----------
        as_list: bool, default False
            Returns assets info as list.

        Returns
        -------
        assets: list or pd.DataFrame
            List or dataframe with info on available assets.
        """
        # data req
        data_resp = self.req_assets()
        # wrangle data resp
        assets = WrangleInfo(data_resp).gn_assets_info(as_list=as_list)

        return assets

    def get_markets_info(self) -> None:
        """
        Get markets info.
        """
        return None

    def req_fields(self) -> Dict[str, Any]:
        """
        Get request for fields info.

        Returns
        -------
        dict: dictionary
            Data response with fields info in json format.
        """
        return DataRequest().get_req(url=self.base_url.replace('v1', 'v2') + urls['fields_info'],
                                     params={'api_key': self.api_key})

    def get_fields_info(self, data_type: Optional[str] = None, as_list: bool = False) -> Union[list[str], pd.DataFrame]:
        """
        Get fields info.

        Parameters
        ----------
        data_type: str, {'market', 'on-chain', 'off-chain'}, default None
            Type of data.
        as_list: bool, default False
            Returns available fields info as list.

        Returns
        -------
        fields: list or pd.DataFrame
            List or dataframe with info on available fields.
        """
        # data req
        data_resp = self.req_fields()
        # wrangle data resp
        fields = WrangleInfo(data_resp).gn_fields_info(data_type=data_type, as_list=as_list)

        return fields

    def get_rate_limit_info(self) -> None:
        """
        Get rate limit info.
        """
        return None

    def req_data(self, data_req: DataRequest, ticker: str, field: str) -> Dict[str, Any]:
        """
        Submits data request to API.

        Parameters
        ----------
        data_req: DataRequest
            Data request parameters in CryptoDataPy format.
        ticker: str
            Requested ticker symbol.
        field: str
            Requested field.

        Returns
        -------
        data_resp: dict
            Data response in json format.
        """
        # convert data request parameters to CryptoCompare format
        gn_data_req = ConvertParams(data_req).to_glassnode()

        # set url, params
        url = self.base_url + field
        params = {
            'api_key': self.api_key,
            'a': ticker,
            's': gn_data_req['start_date'],
            'u': gn_data_req['end_date'],
            'i': gn_data_req['freq'],
            'c': gn_data_req['quote_ccy']
        }
        print(ticker)
        # data req
        data_resp = DataRequest().get_req(url=url, params=params)

        return data_resp

    @staticmethod
    def wrangle_data_resp(data_req: DataRequest, data_resp: Dict[str, Any], field: str) -> pd.DataFrame:
        """
        Wrangle data response into tidy data format.

        Parameters
        ----------
        data_req: DataRequest
            Data request parameters in CryptoDataPy format.
        data_resp: dictionary
            Data response in JSON format.
        field: str
            Requested field.

        Returns
        -------
        df: pd.DataFrame
            Wrangled dataframe with DatetimeIndex and selected field values (cols), in tidy format.
        """
        # wrangle data resp
        if data_resp is None or len(data_resp) == 0:
            df = None
        else:
            df = WrangleData(data_req, data_resp).glassnode(field)

        return df

    def get_tidy_data(self, data_req: DataRequest, ticker: str, field: str) -> pd.DataFrame:
        """
        Submits data request and wrangles the data response into tidy data format.

        Parameters
        ----------
        data_req: DataRequest
            Data request parameters in CryptoDataPy format.
        ticker: str
            Requested ticker symbol.
        field: str
            Requested field.

        Returns
        -------
        df: pd.DataFrame
            Dataframe with DatetimeIndex and field values (col) wrangled into tidy data format.
        """
        # get entire data history
        df = self.req_data(data_req, ticker=ticker, field=field)
        # wrangle df
        df = self.wrangle_data_resp(data_req, df, field)

        return df

    def get_all_fields(self, data_req: DataRequest, ticker: str) -> pd.DataFrame:
        """
        Loops list of tickers, retrieves data in tidy format for each ticker and stores it in a dataframe.

        Parameters
        ----------
        data_req: DataRequest
            Data request parameters in CryptoDataPy format.
        ticker: str
            Requested ticker symbol.

        Returns
        -------
        df: pd.DataFrame
            Dataframe with DatetimeIndex and values for fields (cols), in tidy data format.
        """
        # convert data request parameters to CryptoCompare format
        gn_data_req = ConvertParams(data_req).to_glassnode()

        df = pd.DataFrame()  # empty fields df
        counter = 0  # ohlc counter to avoid requesting OHLC data multiples times

        for field in gn_data_req['fields']:  # loop through fields
            print(field)

            # get tidy data
            if field == 'market/price_usd_ohlc' and counter == 0:
                df0 = self.get_tidy_data(data_req, ticker, field)
                counter += 1
            elif field != 'market/price_usd_ohlc':
                df0 = self.get_tidy_data(data_req, ticker, field)

            # add field to fields df
            if df0 is not None:
                df = pd.concat([df, df0], axis=1)

        return df

    def check_params(self, data_req: DataRequest) -> None:
        """
        Check data request parameters before calling API to improve efficiency.

        Parameters
        ----------
        data_req: DataRequest
            Data request parameters in CryptoDataPy format.
        """
        # convert data request parameters to CryptoCompare format
        gn_data_req = ConvertParams(data_req).to_glassnode()

        # check tickers
        if not all([ticker.upper() in self.assets for ticker in gn_data_req['tickers']]):
            raise ValueError(f"Some of the selected assets are not available."
                             " See assets attribute for a list of available assets.")

        # check fields
        if not all([field in self.fields for field in gn_data_req['fields']]):
            raise ValueError(f"Some of the selected fields are not available."
                             " See fields attribute for a list of available fields.")

        # check freq
        if data_req.freq not in self.frequencies:
            raise ValueError(f"On-chain data is only available for {self.frequencies} frequencies."
                             f" Change data request frequency and try again.")

        return None

    def get_data(self, data_req: DataRequest) -> pd.DataFrame:
        """
        Get market, on-chain or off-chain data.

        Parameters
        ----------
        data_req: DataRequest
            Data request parameters in CryptoDataPy format.

        Returns
        -------
        df: pd.DataFrame - MultiIndex
            DataFrame with DatetimeIndex (level 0), ticker (level 1), and values for market, on-chain and/or
            off-chain fields (cols), in tidy data format.
        """
        # convert data request parameters to CryptoCompare format
        gn_data_req = ConvertParams(data_req).to_glassnode()

        # check params
        self.check_params(data_req)

        # empty df to add data
        df = pd.DataFrame()

        # loop through tickers and fields
        for ticker in gn_data_req['tickers']:  # loop tickers

            # get all fields for ticker
            df0 = self.get_all_fields(data_req, ticker)
            # add ticker to index
            df0['ticker'] = ticker.upper()
            df0.set_index(['ticker'], append=True, inplace=True)
            # stack ticker dfs
            df = pd.concat([df, df0])

        # filter df for desired fields and typecast
        fields = [field for field in data_req.fields if field in df.columns]
        df = df.loc[:, fields]

        return df.sort_index()


In [3]:
gn = Glassnode()

In [4]:
gn_tickers1 = ['BTC',
 'ETH',
 'LTC',
 'APE',
 'SAND',
 'CRV',
 'SUSHI',
 'UNI',
 'AAVE',
 'RSR',
 'YFI',
 'FTT',
 'OCEAN',
 'BAL',
 'BAND',
 'MKR',
 'BAT',
 'OMG',
 'LINK',
 'HOT']

In [5]:
gn_tickers2 = ['ZRX',
 'ENJ',
 'MANA',
 'DENT',
 'RLC',
 'LRC',
 'STORJ',
 'CVC',
 'QNT',
 'ANT',
 'CELR',
 'MTL',
 'REN',
 'MATIC',
 'SNX',
 'COMP',
 'LDO']

In [6]:
gn_tickers = gn_tickers1 + gn_tickers2

In [7]:
len(gn_tickers)

37

In [8]:
gn_tickers1 = gn_tickers[:5]
gn_tickers2 = gn_tickers[5:10]
gn_tickers3 = gn_tickers[10:15]
gn_tickers4 = gn_tickers[15:20]
gn_tickers5 = gn_tickers[20:25]
gn_tickers6 = gn_tickers[25:30]
gn_tickers7 = gn_tickers[30:]

In [28]:
# fields
fields=['tfr_val']

In [25]:
# get trf val
data_req = DataRequest(tickers='btc', fields=['tfr_val', 'tfr_val_mean', 'tfr_count', 'tx_count'])

In [29]:
test_df = gn.get_data(data_req=data_req)

transactions/transfers_volume_sum
btc
transactions/transfers_volume_mean
btc
transactions/transfers_count
btc
transactions/count
btc


In [30]:
test_df.dropna()

Unnamed: 0_level_0,Unnamed: 1_level_0,tfr_val,tfr_val_mean,tx_count
date,ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2010-07-17,BTC,1193.083563,5.076951,235
2010-07-18,BTC,2328.528604,9.389228,248
2010-07-19,BTC,4280.779131,12.092596,354
2010-07-20,BTC,2379.357954,5.761157,413
2010-07-21,BTC,1945.07564,7.597952,256
...,...,...,...,...
2022-11-30,BTC,14649299971.105022,54001.061535,271278
2022-12-01,BTC,15193157248.129272,53250.654535,285314
2022-12-02,BTC,13812900011.593798,50857.511088,271600
2022-12-03,BTC,11651779739.822557,46454.747388,250820


In [53]:
# get trf val
data_req = DataRequest(tickers=gn_tickers2, fields=gn_add_fields)

In [54]:
add_df2 = gn.get_data(data_req=data_req)

addresses/active_count
CRV
addresses/new_non_zero_count
CRV
addresses/count
CRV
addresses/non_zero_count
CRV
addresses/active_count
SUSHI
addresses/new_non_zero_count
SUSHI
addresses/count
SUSHI
addresses/non_zero_count
SUSHI
addresses/active_count
UNI
addresses/new_non_zero_count
UNI
addresses/count
UNI
addresses/non_zero_count
UNI
addresses/active_count
AAVE
addresses/new_non_zero_count
AAVE
addresses/count
AAVE
addresses/non_zero_count
AAVE
addresses/active_count
RSR
addresses/new_non_zero_count
RSR
addresses/count
RSR
addresses/non_zero_count
RSR


In [55]:
# get trf val
data_req = DataRequest(tickers=gn_tickers3, fields=gn_add_fields)

In [56]:
add_df3 = gn.get_data(data_req=data_req)

addresses/active_count
YFI
addresses/new_non_zero_count
YFI
addresses/count
YFI
addresses/non_zero_count
YFI
addresses/active_count
FTT
addresses/new_non_zero_count
FTT
addresses/count
FTT
addresses/non_zero_count
FTT
addresses/active_count
OCEAN
addresses/new_non_zero_count
OCEAN
addresses/count
OCEAN
addresses/non_zero_count
OCEAN
addresses/active_count
BAL
addresses/new_non_zero_count
BAL
addresses/count
BAL
addresses/non_zero_count
BAL
addresses/active_count
BAND
addresses/new_non_zero_count
BAND
addresses/count
BAND
addresses/non_zero_count
BAND


In [57]:
# get trf val
data_req = DataRequest(tickers=gn_tickers4, fields=gn_add_fields)

In [58]:
add_df4 = gn.get_data(data_req=data_req)

addresses/active_count
MKR
addresses/new_non_zero_count
MKR
addresses/count
MKR
addresses/non_zero_count
MKR
addresses/active_count
BAT
addresses/new_non_zero_count
BAT
addresses/count
BAT
addresses/non_zero_count
BAT
addresses/active_count
OMG
addresses/new_non_zero_count
OMG
addresses/count
OMG
addresses/non_zero_count
OMG
addresses/active_count
LINK
addresses/new_non_zero_count
LINK
addresses/count
LINK
addresses/non_zero_count
LINK
addresses/active_count
HOT
addresses/new_non_zero_count
HOT
addresses/count
HOT
addresses/non_zero_count
HOT


In [60]:
# get trf val
data_req = DataRequest(tickers=gn_tickers5, fields=gn_add_fields)

In [61]:
add_df5 = gn.get_data(data_req=data_req)

addresses/active_count
ZRX
addresses/new_non_zero_count
ZRX
addresses/count
ZRX
addresses/non_zero_count
ZRX
addresses/active_count
ENJ
addresses/new_non_zero_count
ENJ
addresses/count
ENJ
addresses/non_zero_count
ENJ
addresses/active_count
MANA
addresses/new_non_zero_count
MANA
addresses/count
MANA
addresses/non_zero_count
MANA
addresses/active_count
DENT
addresses/new_non_zero_count
DENT
addresses/count
DENT
addresses/non_zero_count
DENT
addresses/active_count
RLC
addresses/new_non_zero_count
RLC
addresses/count
RLC
addresses/non_zero_count
RLC


In [66]:
# get trf val
data_req = DataRequest(tickers=gn_tickers6, fields=gn_add_fields)

In [67]:
add_df6 = gn.get_data(data_req=data_req)

addresses/active_count
LRC
addresses/new_non_zero_count
LRC
addresses/count
LRC
addresses/non_zero_count
LRC
addresses/active_count
STORJ
addresses/new_non_zero_count
STORJ
addresses/count
STORJ
addresses/non_zero_count
STORJ
addresses/active_count
CVC
addresses/new_non_zero_count
CVC
addresses/count
CVC
addresses/non_zero_count
CVC
addresses/active_count
QNT
addresses/new_non_zero_count
QNT
addresses/count
QNT
addresses/non_zero_count
QNT
addresses/active_count
ANT
addresses/new_non_zero_count
ANT
addresses/count
ANT
addresses/non_zero_count
ANT


In [62]:
# get trf val
data_req = DataRequest(tickers=gn_tickers7, fields=gn_add_fields)

In [63]:
add_df7 = gn.get_data(data_req=data_req)

addresses/active_count
CELR
addresses/new_non_zero_count
CELR
addresses/count
CELR
addresses/non_zero_count
CELR
addresses/active_count
MTL
addresses/new_non_zero_count
MTL
addresses/count
MTL
addresses/non_zero_count
MTL
addresses/active_count
REN
addresses/new_non_zero_count
REN
addresses/count
REN
addresses/non_zero_count
REN
addresses/active_count
MATIC
addresses/new_non_zero_count
MATIC
addresses/count
MATIC
addresses/non_zero_count
MATIC
addresses/active_count
SNX
addresses/new_non_zero_count
SNX
addresses/count
SNX
addresses/non_zero_count
SNX
addresses/active_count
COMP
addresses/new_non_zero_count
COMP
addresses/count
COMP
addresses/non_zero_count
COMP
addresses/active_count
LDO
addresses/new_non_zero_count
LDO
addresses/count
LDO
addresses/non_zero_count
LDO


In [70]:
gn_add_df = pd.concat([add_df1, add_df2, add_df3, add_df4, add_df5, add_df6, add_df7]).sort_index()

In [71]:
gn_add_df.to_csv('/Users/nickl/projects/systamental/factorlab/src/factorlab/datasets/data/gn_add_data.csv')