# Imports

In [None]:
import logging
import os

import pandas as pd
import seaborn as sns

import core.config.config_ as ccocon
import helpers.dbg as hdbg
import helpers.env as henv
import helpers.printing as hprintin
import helpers.s3 as hs3
import im.data.universe as imdauni
import research.cc.statistics as rccsta
import research.cc.volume as rccvol

import core.plotting as cplot


In [None]:
hdbg.init_logger(verbosity=logging.INFO)

_LOG = logging.getLogger(__name__)

_LOG.info("%s", henv.get_system_signature()[0])

hprintin.config_notebook()

# Config

In [None]:
def get_cmtask260_config() -> ccocon.Config:
    """
    Get task260-specific config.
    """
    config = ccocon.Config()
    # Load parameters.
    config.add_subconfig("load")
    config["load"]["aws_profile"] = "am"
    config["load"]["data_dir"] = os.path.join(hs3.get_path(), "data")
    # Data parameters.
    config.add_subconfig("data")
    config["data"]["data_type"] = "OHLCV"
    config["data"]["universe_version"] = "v0_3"
    # Column names.
    config.add_subconfig("column_names")
    config["column_names"]["volume"] = "volume"
    config["column_names"]["currency_pair"] = "currency_pair"
    config["column_names"]["exchange"] = "exchange_id"
    config["column_names"]["close"] = "close"
    return config


config = get_cmtask260_config()
print(config)

In [None]:
# loader = imccdaloloa.CcxtLoader(
#    root_dir="s3://alphamatic-data/data", aws_profile="am"
# )

# Load the data

In [None]:
compute_cumul_volume_ = lambda data: rccvol.compute_cumul_volume(
    data, config, is_notional_volume=False
)

cumul_volume = rccsta.compute_stats_for_universe(config, compute_cumul_volume_)

In [None]:
_LOG.info("The number of (exchanges, currency pairs) =%s", cumul_volume.shape[0])
cumul_volume.head(3)

# Compute total volume per exchange

In [None]:
total_volume_by_exchange = rccvol.get_total_volume_by_exchange(
    cumul_volume, config, avg_daily=False
)
print(total_volume_by_exchange)

# Compute total volume per currency

In [None]:
total_volume_by_coins = rccvol.get_total_volume_by_coins(
    cumul_volume, config, avg_daily=False
)
print(total_volume_by_coins)

# Issue with compute_stats_for_universe()

As one can see, __compute_stats_for_universe()__ returns DataFrame with omitted timestamp values that are necessary to plot graph for rolling volume.

What do you think we should do in this case?
- We can either add param to your initial function that doesn't drop timestamp values
- Or write the new one that takes into account timestamp values

In [None]:
def get_daily_volume(data, is_nominal_value):
    if is_nominal_value:
        data["volume"]=data["volume"]*data["close"]
    data["date"] = data.index.date
    data_grouped = data.groupby(["exchange_id", "currency_pair", "date"], as_index=False)
    cumul_daily_volume = data_grouped["volume"].sum()
    return cumul_daily_volume

compute_daily_volume = lambda data: get_daily_volume(data, is_nominal_value=True)

cumul_daily_volume = rccsta.compute_stats_for_universe(
    config, compute_daily_volume
)

In [None]:
cumul_daily_volume

# Rolling Plots

In [None]:
def get_rolling_volume_per_group(data, group, window, display_plot):
    data_grouped = data.groupby([group, "date"], as_index=False)
    cum_volume_per_group_per_day = data_grouped["volume"].sum()
    resampler = cum_volume_per_group_per_day.groupby([group])
    rolling_volume = resampler["volume"].transform(lambda x: x.rolling(window).mean())
    cum_volume_per_group_per_day = cum_volume_per_group_per_day.merge(rolling_volume.to_frame(),left_index=True,right_index=True)
    cum_volume_per_group_per_day.rename(columns={'volume_x':'volume','volume_y':"rolling_volume"}, inplace=True)
    if display_plot:
        sns.lineplot(data=cum_volume_per_group_per_day, x='date', y='rolling_volume', hue=group)
    return cum_volume_per_group_per_day

In [None]:
rolling_vol_exchange = get_rolling_volume_per_group(cumul_daily_volume, group="exchange_id", window=90, display_plot=True)
print(rolling_vol_exchange)

In [None]:
rolling_vol_coins = get_rolling_volume_per_group(cumul_daily_volume, group="currency_pair", window=90, display_plot=True)
print(rolling_vol_coins)

# Compare weekday volumes

In [None]:
def compare_weekdays_volumes(df, plot_total_volumes, plot_distr_by_weekdays):
    df["weekday"] = df["date"].map(lambda x: x.strftime("%A"))
    total_volume_by_weekdays = df.groupby("weekday")["volume"].sum().sort_values(ascending=False)
    if plot_total_volumes:
        cplot.plot_barplot(
                    total_volume_by_weekdays,
                    title="Total volume per weekdays",
                    figsize=[15, 7],
                )
    if plot_distr_by_weekdays:
        weekends = df[(df["weekday"] == "Saturday") | (df["weekday"] == "Sunday")]
        weekdays = df[(df["weekday"] != "Saturday") & (df["weekday"] != "Sunday")]
        weekends_volume = weekends.groupby(["date","weekday"])["volume"].sum()
        weekdays_volume = weekdays.groupby(["date","weekday"])["volume"].sum()
        sns.displot(weekends_volume).set(title = 'Volume Distribution by weekends')
        sns.displot(weekdays_volume).set(title = 'Volume Distribution by working days')
    return total_volume_by_weekdays

In [None]:
total_volume_by_weekdays = compare_weekdays_volumes(cumul_daily_volume,
                                                    plot_total_volumes=True,
                                                    plot_distr_by_weekdays=True)
print(total_volume_by_weekdays)

# Compare ATH volumes

In [None]:
def plot_ath_volumes_comparison(df_list):
    """
    Return the graph with the comparison of average minute total trading volume
    in ATH vs.

    non-ATH
    Parameters: dataframe with volumes from a given exchange
    """
    plot_df = []
    for df in df_list:
        df_ath = df.iloc[df.index.indexer_between_time("09:30", "16:00")]
        df_not_ath = df.loc[~df.index.isin(df_ath.index)]
        ath_stat = pd.DataFrame()
        ath_stat.loc[f"{df.name}", f"minute_avg_total_volume_ath_{df.name}"] = (
            df_ath.sum().sum() / df_ath.shape[0]
        )
        ath_stat.loc[
            f"{df.name}", f"minute_avg_total_volume_not_ath_{df.name}"
        ] = (df_not_ath.sum().sum() / df_not_ath.shape[0])
        plot_df.append(ath_stat)
    plot_df = pd.concat(plot_df)
    plot_df.plot.bar(figsize=(15, 7), logy=True)

In [None]:
def get_ath_volume(data, is_nominal_value):
    if is_nominal_value:
        data["volume"]=data["volume"]*data["close"]
    df_ath = data.iloc[data.index.indexer_between_time("09:30", "16:00")]
    df_not_ath = data.loc[~data.index.isin(df_ath.index)]
    ath_stat = pd.DataFrame()
    ath_stat.loc[f"{df.name}", f"minute_avg_total_volume_ath_{df.name}"] = (
        df_ath.sum().sum() / df_ath.shape[0]
    )
    ath_stat.loc[
        f"{df.name}", f"minute_avg_total_volume_not_ath_{df.name}"
    ] = (df_not_ath.sum().sum() / df_not_ath.shape[0])
    plot_df.append(ath_stat)
    plot_df = pd.concat(plot_df)
    plot_df.plot.bar(figsize=(15, 7), logy=True)
    
    
    
    data["date"] = data.index.date
    data_grouped = data.groupby(["exchange_id", "currency_pair", "date"], as_index=False)
    cumul_daily_volume = data_grouped["volume"].sum()
    return cumul_daily_volume

    compute_ath_volume = lambda data: get_ath_volume(data, is_nominal_value=True)

    compute_ath_volume = rccsta.compute_stats_for_universe(
        config, compute_ath_volume
    )

# OLD CODE

# Functions

Note: by "volume" I mean the standard output that is nominated in the number of coins

In [None]:
def get_volume_df_for_exch(coins, exchange):
    """
    Return the DataFrame with a volume of all available coins for a given exchange \
    with timestamp transformation to one day
    Parameters: list of coins for a particular exchange, exchange name
    """
    result = []
    for coin in coins:
        df = config.read_data_from_filesystem(
            exchange_id=exchange, currency_pair=coin, data_type="OHLCV"
        )
        # transform timestamp into one-day format
        volume_df = pd.DataFrame(df.groupby(by=df.index.date)["volume"].sum())
        volume_df.columns = [
            col_name + f"_{suffixes[coin]}_{exchange}"
            for col_name in volume_df.columns
        ]
        result.append(volume_df)
    final_result = pd.concat(result, axis=1)
    return final_result


def get_total_trading_volume_by_coins(coin_list, exch_list):
    """
    Return the DataFrame with total trading volume and normalised trading volume (by day) \
    of all coins from all available exchanges
    Parameters: list of coin names, volume dataframes
    """
    df = pd.concat(exch_list, axis=1)
    volume_df = pd.DataFrame()
    for coin in coin_list:
        coin_cols = [col for col in df.columns if coin in col]
        coin_df = df[coin_cols]
        coin_df["total_volume"] = coin_df.sum(axis=1)
        total_volume_ = coin_df["total_volume"].sum()
        norm_volume_ = coin_df["total_volume"].sum() / len(
            coin_df[coin_df["total_volume"] != 0]
        )
        volume_df.loc[
            "{}".format(f"{coin}"), ("total_trading_volume_in_coins")
        ] = total_volume_
        volume_df.loc[
            "{}".format(f"{coin}"), ("daily_avg_coin_volume")
        ] = norm_volume_
    return volume_df.sort_values(
        by="total_trading_volume_in_coins", ascending=False
    )


def get_total_trading_volume_by_exchange(df_list):
    """
    Return the DataFrame with total trading volume on exchanges
    Parameters: volume dataframes
    """
    exch_volume = pd.DataFrame()
    for df in df_list:
        total_volume_ = df.sum().sum()
        norm_volume_ = df.sum().sum() / df.shape[0]
        exch_volume.loc[
            "{}".format(f"{df.name}"), "total_trading_volume_in_coins"
        ] = total_volume_
        exch_volume.loc[
            "{}".format(f"{df.name}"), "daily_avg_coin_volume"
        ] = norm_volume_
    return exch_volume.sort_values(
        by="total_trading_volume_in_coins", ascending=False
    )


def plot_rolling_volume_by_coins(coin_list, exch_list):
    """
    Return the graph of 90-days rolling volumes for each coin on all exchanges
    Parameters: list of all coin names, volume dataframes
    """
    df = pd.concat(exch_list, axis=1)
    rolling_df = []
    for coin in coin_list:
        coin_df = df[[col for col in df.columns if coin in col]]
        coin_df["total_volume"] = coin_df.sum(axis=1)
        coin_df[f"rolling_90_volume_{coin}"] = (
            coin_df["total_volume"].rolling(90).mean()
        )
        rolling_df.append(coin_df[f"rolling_90_volume_{coin}"])
    rolling_df = pd.concat(rolling_df, axis=1)
    rolling_df.plot(figsize=(12, 7))


def plot_rolling_volume_by_exchange(exch_list, exch_names):
    """
    Return the graph of 90-days rolling volumes for each exchanges for all coins
    Parameters: volume dataframes, volume dataframes' names
    """
    df = pd.concat(exch_list, axis=1)
    rolling_df = []
    for exch in exch_names:
        exch_cols = [col for col in df.columns if exch in col]
        exch_df = df[exch_cols]
        exch_df["total_volume"] = exch_df.sum(axis=1)
        exch_df[f"rolling_90_volume_{exch}"] = (
            exch_df["total_volume"].rolling(90).mean()
        )
        rolling_df.append(exch_df[f"rolling_90_volume_{exch}"])
    rolling_df = pd.concat(rolling_df, axis=1)
    rolling_df.plot(figsize=(12, 7))


def compare_weekdays_volumes(exch_list):
    """
    Return statistics and graphs with working days vs.

    weekends analysis
    Parameters: volume dataframes
    """
    # clean the existing dataframes from previously calculated volumes
    df = pd.concat(exch_list, axis=1)
    df = df[[col for col in df.columns if "total_volume" not in col]]
    df = df[[col for col in df.columns if "rolling_volume" not in col]]
    # calculate new volumes that sum up all coins and exchanges
    df["total_volume"] = df.sum(axis=1)
    # create column with ids for weekdays
    df["weekday"] = df.index.map(lambda x: x.weekday())
    # plot total amount of volume for each day
    df.groupby("weekday").total_volume.sum().plot.bar(figsize=(12, 7))
    # plot working days vs. weekends
    weekends = df[(df["weekday"] == 5) | (df["weekday"] == 6)]
    sns.displot(weekends, x="total_volume")
    weekdays = df[(df["weekday"] != 5) & (df["weekday"] != 6)]
    sns.displot(weekdays, x="total_volume")
    # calculate descriptive statistics for working days vs. weekends
    print("Descriptive statistics:")
    weeknd_stat = weekends["total_volume"].describe()
    weekdys_stat = weekdays["total_volume"].describe()
    weeknd_stat = pd.DataFrame(weeknd_stat)
    weekdys_stat = pd.DataFrame(weekdys_stat)
    stats = pd.concat([weeknd_stat, weekdys_stat], axis=1)
    stats.columns = ["weekends", "working_days"]
    print(stats)
    print(
        "The graph labels in respective order: Total Volume by weekdays, Distribution of Volume over weekends, Distribution of Volume over working days"
    )


def get_initial_df_with_volumes(coins, exchange):
    """
    Return DataFrame with the volume of all coins for exchange with initial timestamps
    Parameters: list of coins, exchange name
    """
    result = []
    for coin in coins:
        df = config.read_data_from_filesystem(
            exchange_id=exchange, currency_pair=coin, data_type="OHLCV"
        )
        result.append(df["volume"])
    final_result = pd.concat(result, axis=1)
    return final_result


def plot_ath_volumes_comparison(df_list):
    """
    Return the graph with the comparison of average minute total trading volume
    in ATH vs.

    non-ATH
    Parameters: dataframe with volumes from a given exchange
    """
    plot_df = []
    for df in df_list:
        df_ath = df.iloc[df.index.indexer_between_time("09:30", "16:00")]
        df_not_ath = df.loc[~df.index.isin(df_ath.index)]
        ath_stat = pd.DataFrame()
        ath_stat.loc[f"{df.name}", f"minute_avg_total_volume_ath_{df.name}"] = (
            df_ath.sum().sum() / df_ath.shape[0]
        )
        ath_stat.loc[
            f"{df.name}", f"minute_avg_total_volume_not_ath_{df.name}"
        ] = (df_not_ath.sum().sum() / df_not_ath.shape[0])
        plot_df.append(ath_stat)
    plot_df = pd.concat(plot_df)
    plot_df.plot.bar(figsize=(15, 7), logy=True)

### Supporting variables

In [None]:
# get the list of all coin paires for each exchange
binance_coins = imdauni.get_trade_universe("v0_1")["CCXT"]["binance"]
bitfinex_coins = imdauni.get_trade_universe("v0_1")["CCXT"]["bitfinex"]
ftx_coins = imdauni.get_trade_universe("v0_1")["CCXT"]["ftx"]
gateio_coins = imdauni.get_trade_universe("v0_1")["CCXT"]["gateio"]
kucoin_coins = imdauni.get_trade_universe("v0_1")["CCXT"]["kucoin"]

suffixes = {
    "ADA/USDT": "ada",
    "AVAX/USDT": "avax",
    "BNB/USDT": "bnb",
    "BTC/USDT": "btc",
    "DOGE/USDT": "doge",
    "EOS/USDT": "eos",
    "ETH/USDT": "eth",
    "LINK/USDT": "link",
    "SOL/USDT": "sol",
    "FIL/USDT": "fil",
    "XRP/USDT": "xrp",
}

# get the list of all unique coin names
coins = set(
    binance_coins + bitfinex_coins + ftx_coins + gateio_coins + kucoin_coins
)
coins = [i.split("/")[0].lower() for i in coins]

exch_names = ["binance", "bitfinex", "ftx", "gateio", "kucoin"]

## Load the volumes dataframes

In [None]:
binance = get_volume_df_for_exch_notional(binance_coins, "binance")
bitfinex = get_volume_df_for_exch_notional(bitfinex_coins, "bitfinex")
ftx = get_volume_df_for_exch_notional(ftx_coins, "ftx")
gateio = get_volume_df_for_exch_notional(gateio_coins, "gateio")
kucoin = get_volume_df_for_exch_notional(kucoin_coins, "kucoin")

In [None]:
binance = get_volume_df_for_exch(binance_coins, "binance")
bitfinex = get_volume_df_for_exch(bitfinex_coins, "bitfinex")
ftx = get_volume_df_for_exch(ftx_coins, "ftx")
gateio = get_volume_df_for_exch(gateio_coins, "gateio")
kucoin = get_volume_df_for_exch(kucoin_coins, "kucoin")

In [None]:
binance.name = "binance"
bitfinex.name = "bitfinex"
ftx.name = "ftx"
gateio.name = "gateio"
kucoin.name = "kucoin"

exch_list = [binance, bitfinex, ftx, gateio, kucoin]

# Compute total trading volume for each currency


In [None]:
total_trading_vol = get_total_trading_volume_by_coins(coins, exch_list)

In [None]:
total_trading_vol

In [None]:
total_trading_vol["total_trading_volume_in_coins"].plot.bar(
    figsize=(15, 7), logy=True
)

In [None]:
# daily_avg
total_trading_vol["daily_avg_coin_volume"].sort_values(ascending=False).plot.bar(
    figsize=(15, 7), logy=True
)

# Rolling volume for each currency

In [None]:
plot_rolling_volume_by_coins(coins, exch_list)

# Compute total volume per exchange

In [None]:
exchange_trading_volume = get_total_trading_volume_by_exchange(exch_list)

In [None]:
exchange_trading_volume

In [None]:
exchange_trading_volume["total_trading_volume_in_coins"].plot.bar(
    figsize=(15, 7), logy=True
)

In [None]:
# normalised
exchange_trading_volume["daily_avg_coin_volume"].plot.bar(
    figsize=(15, 7), logy=True
)

# Rolling volume for each exchange

In [None]:
plot_rolling_volume_by_exchange(exch_list, exch_names)

# Is volume constant over different days? E.g., weekend vs workdays?

In [None]:
def compare_weekdays_volumes(exch_list):
    """
    Return statistics and graphs with working days vs.

    weekends analysis
    Parameters: volume dataframes
    """
    # clean the existing dataframes from previously calculated volumes
    df = pd.concat(exch_list, axis=1)
    df = df[[col for col in df.columns if "total_volume" not in col]]
    df = df[[col for col in df.columns if "rolling_volume" not in col]]
    # calculate new volumes that sum up all coins and exchanges
    df["total_volume"] = df.sum(axis=1)
    # create column with ids for weekdays
    df["weekday"] = df.index.map(lambda x: x.strftime("%A"))
    # plot total amount of volume for each day
    df.groupby("weekday").total_volume.sum().sort_values(
        ascending=False
    ).plot.bar(figsize=(12, 7))
    # plot working days vs. weekends
    weekends = df[(df["weekday"] == "Saturday") | (df["weekday"] == "Sunday")]
    sns.displot(weekends, x="total_volume")
    weekdays = df[(df["weekday"] != "Saturday") & (df["weekday"] != "Sunday")]
    sns.displot(weekdays, x="total_volume")
    # calculate descriptive statistics for working days vs. weekends
    print("Descriptive statistics:")
    weeknd_stat = weekends["total_volume"].describe()
    weekdys_stat = weekdays["total_volume"].describe()
    weeknd_stat = pd.DataFrame(weeknd_stat)
    weekdys_stat = pd.DataFrame(weekdys_stat)
    stats = pd.concat([weeknd_stat, weekdys_stat], axis=1)
    stats.columns = ["weekends", "working_days"]
    print(stats)
    print(
        "The graph labels in respective order: Total Volume by weekdays, Distribution of Volume over weekends, Distribution of Volume over working days"
    )

In [None]:
compare_weekdays_volumes(exch_list)

# How does it vary over hours? E.g., US stock times 9:30-16 vs other time

## Binance example

In [None]:
binance_1 = get_initial_df_with_volumes(binance_coins, "binance")
bitfinex_1 = get_initial_df_with_volumes(bitfinex_coins, "bitfinex")
ftx_1 = get_initial_df_with_volumes(ftx_coins, "ftx")
gateio_1 = get_initial_df_with_volumes(gateio_coins, "gateio")
kucoin_1 = get_initial_df_with_volumes(kucoin_coins, "kucoin")

exchange_list = [binance_1, bitfinex_1, ftx_1, gateio_1, kucoin_1]
binance_1.name = "binance"
bitfinex_1.name = "bitfinex"
ftx_1.name = "ftx"
gateio_1.name = "gateio"
kucoin_1.name = "kucoin"

In [None]:
plot_ath_volumes_comparison(exchange_list)