# Imports

In [None]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt

In [None]:
from pylab import rcParams
from tqdm.autonotebook import tqdm

import core.signal_processing as sp
import vendors.kibot.utils as kut

# import vendors.particle_one.PartTask269_liquidity_analysis_utils as lau

sns.set()

rcParams["figure.figsize"] = (20, 5)

In [None]:
TAU = 2

In [None]:
def get_zscored_prices_diff(price_dict_df, symbol, tau=TAU):
    prices_symbol = price_dict_df[symbol]
    prices_diff = prices_symbol["close"] - prices_symbol["open"]
    zscored_prices_diff = sp.rolling_zscore(prices_diff, tau)
    zscored_prices_diff.head()
    abs_zscored_prices_diff = zscored_prices_diff.abs()
    return abs_zscored_prices_diff


def get_top_movements_by_group(
    price_dict_df, commodity_symbols_kibot, group, n_movements=100
):
    zscored_diffs = []
    for symbol in commodity_symbols_kibot[group]:
        zscored_diff = get_zscored_prices_diff(price_dict_df, symbol)
        zscored_diffs.append(zscored_diff)
    zscored_diffs = pd.concat(zscored_diffs, axis=1)
    mean_zscored_diffs = zscored_diffs.mean(axis=1, skipna=True)
    return mean_zscored_diffs.sort_values(ascending=False).head(n_movements)


def get_top_movements_for_symbol(price_dict_df, symbol, tau=TAU, n_movements=100):
    zscored_diffs = get_zscored_prices_diff(price_dict_df, symbol, tau=tau)
    return zscored_diffs.sort_values(ascending=False).head(n_movements)

# Load CME metadata

In [None]:
# Change this to library code from #269 once it is merged into master

In [None]:
_PRODUCT_SPECS_PATH = (
    "/data/prices/product_slate_export_with_contract_specs_20190905.csv"
)
product_list = pd.read_csv(_PRODUCT_SPECS_PATH)

In [None]:
product_list.head()

In [None]:
product_list["Product Group"].value_counts()

In [None]:
product_list.set_index("Product Group", inplace=True)

In [None]:
commodity_groups = ["Energy", "Agriculture", "Metals"]

In [None]:
commodity_symbols = {
    group: product_list.loc[group]["Globex"].values for group in commodity_groups
}

In [None]:
commodity_symbols

# Daily price movements

## Load kibot commodity daily prices

In [None]:
daily_metadata = kut.read_metadata2()
daily_metadata.head(3)

In [None]:
len(daily_metadata["Symbol"])

In [None]:
daily_metadata["Symbol"].nunique()

In [None]:
len(commodity_symbols["Energy"])

In [None]:
energy_symbols_kibot = np.intersect1d(
    daily_metadata["Symbol"].values, commodity_symbols["Energy"]
)
energy_symbols_kibot

In [None]:
len(energy_symbols_kibot)

In [None]:
commodity_symbols_kibot = {
    group: np.intersect1d(
        daily_metadata["Symbol"].values, commodity_symbols[group]
    )
    for group in commodity_symbols.keys()
}

In [None]:
commodity_symbols_kibot

In [None]:
{
    group: len(commodity_symbols_kibot[group])
    for group in commodity_symbols_kibot.keys()
}

In [None]:
comm_list = []
for comm_group in commodity_symbols_kibot.values():
    comm_list.extend(list(comm_group))
comm_list[:5]

In [None]:
file_name = "/data/kibot/All_Futures_Continuous_Contracts_daily/%s.csv.gz"

daily_price_dict_df = kut.read_multiple_symbol_data(
    comm_list, file_name, nrows=None
)

daily_price_dict_df["CL"].tail(2)

## Largest movements for a specific symbol

In [None]:
# There is a get_top_movements_for_symbol() function that
# implements this code and the code below. I am not using it
# in this chapter to provide a clearer view of the algorithm.

In [None]:
symbol = "CL"

In [None]:
cl_prices = daily_price_dict_df[symbol]

In [None]:
cl_prices_diff = cl_prices["close"] - cl_prices["open"]

In [None]:
zscored_cl_prices_diff = sp.rolling_zscore(cl_prices_diff, TAU)
zscored_cl_prices_diff.head()

In [None]:
abs_zscored_cl_prices_diff = zscored_cl_prices_diff.abs()

In [None]:
abs_zscored_cl_prices_diff.max()

In [None]:
top_100_movements_cl = abs_zscored_cl_prices_diff.sort_values(
    ascending=False
).head(100)

In [None]:
top_100_movements_cl.plot(kind="bar")
ax = plt.gca()
xlabels = [item.get_text()[:10] for item in ax.get_xticklabels()]
ax.set_xticklabels(xlabels)
plt.title(
    f"Largest price movements in a single day (in z-score space) for {symbol} symbol"
)
plt.show()

In [None]:
top_100_movements_cl.index.year.value_counts(sort=False).plot(kind="bar")
plt.title("How many of the top-100 price movements occured during each year")
plt.show()

## Largest movement for energy group

In [None]:
group = "Energy"

In [None]:
commodity_symbols_kibot[group]

In [None]:
zscored_diffs = []
for symbol in commodity_symbols_kibot[group]:
    zscored_diff = get_zscored_prices_diff(daily_price_dict_df, symbol)
    zscored_diffs.append(zscored_diff)

In [None]:
zscored_diffs = pd.concat(zscored_diffs, axis=1)
zscored_diffs.head()

In [None]:
mean_zscored_diffs = zscored_diffs.mean(axis=1, skipna=True)

In [None]:
mean_zscored_diffs.head()

In [None]:
mean_zscored_diffs.tail()

In [None]:
mean_zscored_diffs.sort_values(ascending=False).head(100)

## Largest movements for each group

In [None]:
top_100_movements_by_group = {
    group: get_top_movements_by_group(
        daily_price_dict_df, commodity_symbols_kibot, group
    )
    for group in commodity_symbols_kibot.keys()
}

In [None]:
top_100_movements_by_group.keys()

In [None]:
top_100_movements_by_group["Energy"].head()

In [None]:
top_100_movements_by_group["Agriculture"].head()

In [None]:
top_100_movements_by_group["Metals"].head()

# 5-minute price movements

## Load 1-minute prices

In [None]:
minutely_metadata = kut.read_metadata1()

In [None]:
minutely_metadata.head()

In [None]:
np.array_equal(
    minutely_metadata["Symbol"].values, minutely_metadata["Symbol"].values
)

In [None]:
file_name = "/data/kibot/All_Futures_Continuous_Contracts_1min/%s.csv.gz"

minutely_price_dict_df = kut.read_multiple_symbol_data(
    comm_list, file_name, nrows=None
)

minutely_price_dict_df["CL"].tail(2)

In [None]:
minutely_price_dict_df["CL"].head()

In [None]:
five_min_price_dict_df = {
    symbol: minutely_price_dict_df[symbol].resample("5Min").sum()
    for symbol in minutely_price_dict_df.keys()
}

In [None]:
top_100_movements_cl_5_min["CL"].head()

## Top movements for a symbol

In [None]:
symbol = "CL"

In [None]:
top_100_movements_cl_5_min = get_top_movements_for_symbol(
    five_min_price_dict_df, symbol
)

In [None]:
top_100_movements_cl_5_min.plot(kind="bar")
plt.title(
    f"Largest price movements in in a 5 min interval (in z-score space) for {symbol} symbol"
)
plt.show()

In [None]:
print(f"Top 100 of the price movements for {symbol} occur at the following time:")
print(pd.Series(top_100_movements_cl_5_min.index).dt.time.value_counts())

## Largest movements for energy group

In [None]:
group = "Energy"

In [None]:
commodity_symbols_kibot[group]

In [None]:
get_top_movements_by_group(five_min_price_dict_df, commodity_symbols_kibot, group)

## Largest movements for each group

In [None]:
top_100_5_min_movements_by_group = {
    group: get_top_movements_by_group(
        five_min_price_dict_df, commodity_symbols_kibot, group
    )
    for group in tqdm(commodity_symbols_kibot.keys())
}

In [None]:
{
    group: head_prices_group.head()
    for group, head_prices_group in top_100_5_min_movements_by_group.items()
}