## Import

In [None]:
%load_ext autoreload
%autoreload 2
import collections
import logging
import os

import pandas as pd

import core.config as cfg
import helpers.dbg as dbg
import helpers.env as env
import helpers.printing as pri
import vendors.kibot.utils as kut

In [None]:
print(env.get_system_signature())

pri.config_notebook()

# TODO(gp): Changing level during the notebook execution doesn't work. Fix it.
# dbg.init_logger(verb=logging.DEBUG)
dbg.init_logger(verb=logging.INFO)
# dbg.test_logger()

_LOG = logging.getLogger(__name__)

# Metadata

In [None]:
df1 = kut.read_metadata1()
df1.head(3)

In [None]:
df2 = kut.read_metadata2()
df2.head(3)

In [None]:
df3 = kut.read_metadata3()
df3.head(3)

In [None]:
df4 = kut.read_metadata4()
print(df4.head(3))

print(df4["Exchange"].unique())

## Explore metadata

In [None]:
mask = ["GAS" in d or "OIL" in d for d in df4["Description"]]
print(sum(mask))
print(df4[mask].drop(["SymbolBase", "Size(MB)"], axis=1))

In [None]:
df4[mask]["Symbol"].values

# Read data

In [None]:
config = collections.OrderedDict()

if "__CONFIG__" in os.environ:
    config = os.environ["__CONFIG__"]
    print("__CONFIG__=", config)
    config = eval(config)
else:
    # config["nrows"] = 100000
    config["nrows"] = None
    #
    config["zscore_com"] = 28

print(cfg.config_to_string(config))

# Prices

## Read daily prices

In [None]:
all_symbols = [
    futures.replace(".csv.gz", "")
    for futures in os.listdir(
        "/data/kibot/All_Futures_Continuous_Contracts_daily"
    )
]

In [None]:
symbols = df4[mask]["Symbol"].values
symbols

In [None]:
file_name = "/data/kibot/All_Futures_Continuous_Contracts_daily/%s.csv.gz"

daily_price_dict_df = kut.read_multiple_symbol_data(
    symbols, file_name, nrows=config["nrows"]
)

daily_price_dict_df["CL"].tail(2)

# Top futures by volume

## Sum volume

In [None]:
daily_volume_sum_dict = {
    symbol: daily_prices_symbol["vol"].sum()
    for symbol, daily_prices_symbol in daily_price_dict_df.items()
}

In [None]:
daily_volume_sum_df = pd.DataFrame.from_dict(
    daily_volume_sum_dict, orient="index", columns=["sum_vol"]
)
daily_volume_sum_df.index.name = "symbol"

In [None]:
daily_volume_sum_df.sort_values("sum_vol", ascending=False)

## Mean volume

In [None]:
daily_volume_mean_dict = {
    symbol: daily_prices_symbol["vol"].mean()
    for symbol, daily_prices_symbol in daily_price_dict_df.items()
}

In [None]:
daily_volume_mean_df = pd.DataFrame.from_dict(
    daily_volume_mean_dict, orient="index", columns=["mean_vol"]
)
daily_volume_mean_df.index.name = "symbol"

In [None]:
daily_volume_mean_df.sort_values("mean_vol", ascending=False)