In [1]:
import pandas as pd
import numpy as np
import logging
import requests
from datetime import datetime, timedelta
from time import sleep
from importlib import resources
from typing import Optional, Union, Any
from cryptodatapy.util.datacredentials import DataCredentials, set_credential
from cryptodatapy.util.convertparams import ConvertParams
from cryptodatapy.data_vendors.datavendor import DataVendor
from cryptodatapy.data_requests.datarequest import DataRequest
import nasdaqdatalink as nasdl

In [3]:
# data credentials
data_cred = DataCredentials()

In [12]:
class NasdaqDataLink(DataVendor):
    """
    Retrieves data from Nasdaq Data Link (formerly Quandl) API.
    """

    def __init__(
            self,
            source_type: str = 'data_vendor',
            categories: list[str] = ['rates', 'credit', 'macro'],
            vendors: dict[str, str] = None,
            assets: list[str] = None,
            indexes: list[str] = None,
            markets: list[str] = None,
            market_types: list[str] = ['spot'],
            fields: dict[str, list[str]] = None,
            frequencies: dict[str, list[str]] = {'rates': ['d', 'w', 'm', 'q', 'y'],
                                                 'credit': ['d', 'w', 'm', 'q', 'y'],
                                                'macro': ['m', 'q', 'y']},
            exchanges: list[str] = None,
            base_url: str = None,
            api_key: str = data_cred.quandl_api_key,
            max_obs_per_call: int = None,
            rate_limit: str = data_cred.ndl_api_rate_limit
    ):
        """
        Constructor

        Parameters
        ----------
        source_type: string, {'data_vendor', 'exchange', 'library', 'on-chain', 'web'}
            Type of data source, e.g. 'data_vendor', 'exchange', etc.
        categories: list, {'crypto', 'fx', 'rates', 'eqty', 'commodities', 'credit', 'macro', 'alt'}
            List of available categories, e.g. ['crypto', 'fx', 'alt']
        vendors: list
            List of available data vendors, e.g. ['IMF Cross Country Macroeconomic Statistics']
        assets: list
            List of available assets, e.g. ['btc', 'eth']
        indexes: list
            List of available indexes, e.g. ['mvda', 'bvin']
        markets: list
            List of available markets as asset/quote currency pairs, e.g. ['btcusdt', 'ethbtc']
        market_types: list
            List of available market types/contracts, e.g. [spot', 'perpetual_future', 'future', 'option']
        fields: list
            List of available fields, e.g. ['open', 'high', 'low', 'close', 'volume']
        frequencies: list
            List of available frequencies, e.g. ['tick', '1min', '5min', '10min', '20min', '30min', '1h', '2h', '4h',
            '8h', 'd', 'w', 'm']
        exchanges: list
            List of available exchanges, e.g. ['Binance', 'Coinbase', 'Kraken', 'FTX']
        base_url: str
            Cryptocompare base url used in GET requests, e.g. 'https://min-api.cryptocompare.com/data/'
            If not provided, default is set to cryptocompare_base_url stored in DataCredentials
        api_key: str
            Cryptocompare api key, e.g. 'dcf13983adf7dfa79a0dfa35adf'
            If not provided, default is set to cryptocompare_api_key stored in DataCredentials
        max_obs_per_call: int
            Maximum number of observations returns per API call.
            If not provided, default is set to cryptocompare_api_limit storeed in DataCredentials
        rate_limit: pd.DataFrame
            Number of API calls made and left by frequency.
        """

        DataVendor.__init__(self, source_type, categories, assets, indexes, markets, market_types, fields, frequencies,
                            exchanges, base_url, api_key, max_obs_per_call, rate_limit)
        
        self.vendors = vendors  # data vendors

        # set vendors
        if vendors is None:
            self.vendors = self.get_vendors_info(vendor=None)
        # set fields
        if fields is None:
            self.fields = self.get_fields_info()

    def get_vendors_info(self, vendor=None) -> Union[pd.DataFrame, dict[str, str]]:
        """
        Parameters
        ----------
        vendor: str, default None
            Name of data vendor.

        Returns
        -------
        vendors: Dataframe or dict
            Info on available data vendors, by name.
        """
        # TODO: add additional vendors which can be useful for analysis of digital assets

        # vendor-code pairs for databases
        vendors = {

            'Federal Reserve Economic Data': 'FRED',
            'US Federal Reserve Data Releases': 'FED',
            'US Treasury': 'USTREASURY',
            'European Central Bank': 'ECB',
            'Bank of England Official Statistics': 'BOE',
            'Corporate Bond Yield Rates': 'ML',
            'IMF Cross Country Macroeconomic Statistics': 'ODA',
            'Organisation for Economic Co-operation and Development': 'OECD'
        }
        
        # get vendor metadata 
        if vendor is not None:
            code = vendors[vendor]
            url = f"https://data.nasdaq.com/api/v3/databases/{code}.json?api_key=" + self.api_key
            r = requests.get(url)
            vendors = pd.DataFrame(r.json())
            
        return vendors
            
    @staticmethod
    def get_assets_info():
        """
        Gets available assets info.
        """
        print(f"See search page to find available assets: {data_cred.ndl_search_url} ")

    @staticmethod
    def get_indexes_info():
        """
        Gets available indexes info.     
        """
        print(f"See search page to find available indexes: {data_cred.ndl_search_url} ")

    @staticmethod
    def get_markets_info():
        """
        Gets market pairs info.
        """
        print(f"See search page to find available markets: {data_cred.ndl_search_url} ")

    def get_fields_info(self, data_type: Optional[str] = 'off-chain', cat=None) -> dict[str, list[str]]:
        """
        Gets fields info.

        Parameters
        ----------
        data_type: str, {'market', 'on-chain', 'off-chain'}, default 'market'
            Type of data.
        cat: str, {'crypto', 'eqty', 'fx', 'rates', 'cmdty', 'macro'}, default None
            Asset class or time series category.

        Returns
        -------
        fields_list: dict
            Info on available fields, by category.
        """
        if data_type == 'on-chain':
            raise Exception("No on-chain data available. Nasdaq data link only provides market and macro data.")
            
        # list of fields 
        rates_fields_list = ['close']
        credit_fields_list = ['close']
        macro_fields_list = ['actual']

        # fields dict
        fields = {
                  'rates': rates_fields_list,
                  'credit': credit_fields_list,
                  'macro': macro_fields_list,
                  }
        
        # fields obj                
        if cat is not None:
            fields = fields[cat]
                        
        return fields
    
    @staticmethod
    def get_exchanges_info():
        """
        Gets exchanges info.
        """
        print(f"See search page to find available exchanges: {data_cred.ndl_search_url} ")

    def get_rate_limit_info(self):
        """
        Gets rate limit info.
        """
        print(f"See rate limit info: {self.rate_limit}")
    
    def get_data(self, data_req: DataRequest) -> pd.DataFrame:
        """
        Submits data request to Nasdaq Data Link API for data.
        """
        # convert data request parameters to InvestPy format
        ip_data_req = ConvertParams(data_source='ndl').convert_to_source(data_req)
        with resources.path('cryptodatapy.conf', 'tickers.csv') as f:
            tickers_path = f
        tickers = pd.read_csv(tickers_path, index_col=0, encoding='latin1').index.to_list()
        
        # check cat
        if data_req.cat not in self.categories:
            raise ValueError(f"Invalid category. Valid categories are: {self.categories}.")
            
        # check freq
        if data_req.freq not in self.frequencies[data_req.cat]:
            raise ValueError(f"Invalid data frequency. Valid data frequencies are: {self.frequencies}.")
            
        # check fields
        if not any(field in self.fields[data_req.cat] for field in data_req.fields):
            raise ValueError("Invalid fields. See '.fields' property for available fields.")
        
        # emtpy df
        df = pd.DataFrame()
        
        # loop through tickers, countries
        for dr_ticker, ndl_ticker in zip(data_req.tickers, ndl_data_req['tickers']):
        
            # get data from ndl
            df0 = nasdl.get(ticker)
            
            # wrangle data resp
            if not df0.empty:
                # wrangle data resp
                df1 = self.wrangle_data_resp(data_req, df0)
                # add ticker to index
                df1['ticker'] = dr_ticker
                df1.set_index(['ticker'], append=True, inplace=True)
                # stack ticker dfs
                df = pd.concat([df, df1])  
            
        # check if df empty
        if df.empty:
            raise Exception('No data returned. Check data request parameters and try again.')

        # filter df for desired fields and typecast
        fields = [field for field in data_req.fields if field in df.columns ]
        df = df.loc[:, fields]
        # type conversion
        df = ConvertParams().convert_dtypes(df)

        return df
    
    @staticmethod
    def wrangle_data_resp(data_req: DataRequest, data_resp: pd.DataFrame) -> pd.DataFrame:
        """
        Wrangles data response.

        Parameters
        ----------
        data_req: DataRequest
            Parameters of data request in CryptoDataPy format.
        data_resp: pd.DataFrame
            Data response from GET request.

        Returns
        -------
        df: pd.DataFrame
            Wrangled dataframe in tidy format.
        """
        if data_req.cat != 'macro':
            # reset index
            data_resp = data_resp.reset_index()
        else:
            # parse date and time to create datetime
            data_resp['date'] = pd.to_datetime(data_resp.date + data_resp.time, format="%d/%m/%Y%H:%M")
            # replace missing vals and compute surprise val
            data_resp.forecast = np.where(np.nan, data_resp.previous, data_resp.forecast)  
            
        # convert cols to cryptodatapy format
        df = ConvertParams(data_source='investpy').convert_fields_to_lib(data_req, data_resp)
        # set index
        df.set_index('date', inplace=True)
        # str and resample to 5min freq for econ releases
        if data_req.cat == 'macro':
            df = df.replace('%', '', regex=True)  # remove % str
            # merge with empty datetimeindex df and ffill missing vals
            idx_df = pd.DataFrame(index=pd.date_range(start=df.index[0], end=datetime.utcnow(), freq='5min'))
            idx_df.index.name = 'date'
            df = idx_df.merge(df, how='outer', left_index=True, right_index=True).astype(float).ffill()
            df['surprise'] = df.actual - df.expected
        
        # resample freq
        df = df.resample(data_req.freq).last()
        
        # filter bad data
        if 'surprise' in df.columns:
            df = pd.concat([df[df.columns.drop('surprise')][df != 0], df.loc[:,['surprise']]], axis=1)
        else:
            df = df[df != 0]  # 0 values
        df = df[~df.index.duplicated()]  # duplicate rows
        df = df.dropna(how='all').dropna(how='all', axis=1) # entire row or col NaNs

        return df

In [13]:
ndl = NasdaqDataLink()

In [14]:
ndl.get_rate_limit_info()

See rate limit info: https://help.data.nasdaq.com/article/490-is-there-a-rate-limit-or-speed-limit-for-api-usage


In [None]:
ndl.get_assets_info()

In [None]:
# data_req = DataRequest()

In [None]:
# ndl_data_req = ConvertParams(data_source='ndl').convert_to_source(data_req)

In [None]:
# ndl_data_req

In [None]:
ndl.vendors

In [16]:
df = nasdl.get("ODA/USA_PPPSH")

In [20]:
df.index

DatetimeIndex(['1980-12-31', '1981-12-31', '1982-12-31', '1983-12-31',
               '1984-12-31', '1985-12-31', '1986-12-31', '1987-12-31',
               '1988-12-31', '1989-12-31', '1990-12-31', '1991-12-31',
               '1992-12-31', '1993-12-31', '1994-12-31', '1995-12-31',
               '1996-12-31', '1997-12-31', '1998-12-31', '1999-12-31',
               '2000-12-31', '2001-12-31', '2002-12-31', '2003-12-31',
               '2004-12-31', '2005-12-31', '2006-12-31', '2007-12-31',
               '2008-12-31', '2009-12-31', '2010-12-31', '2011-12-31',
               '2012-12-31', '2013-12-31', '2014-12-31', '2015-12-31',
               '2016-12-31', '2017-12-31', '2018-12-31', '2019-12-31',
               '2020-12-31', '2021-12-31', '2022-12-31', '2023-12-31',
               '2024-12-31'],
              dtype='datetime64[ns]', name='Date', freq=None)