# Imports

In [None]:
%load_ext autoreload
%autoreload 2

import logging
import os

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

In [None]:
from tqdm.autonotebook import tqdm

import amp_research.price_movement_analysis as pma
import helpers.dbg as dbg
import helpers.env as env
import helpers.printing as pri
import helpers.s3 as hs3
import vendors.cme.read as cmer
import vendors.kibot.utils as kut

In [None]:
def get_top_100(series):
    return series.sort_values(ascending=False).head(100)

In [None]:
print(env.get_system_signature())

pri.config_notebook()

dbg.init_logger(verb=logging.INFO)

_LOG = logging.getLogger(__name__)

In [None]:
SYMBOL = 'CL'

# Load daily and minutely data

In [None]:
s3_path = hs3.get_path()
kibot_path = os.path.join(
    s3_path, "kibot/All_Futures_Continuous_Contracts_daily/%s.csv.gz"
)
file_name = kibot_path % SYMBOL

daily_prices = kut.read_data(
    file_name, nrows=None
)

daily_prices.tail(2)

In [None]:
s3_path = hs3.get_path()
kibot_path = os.path.join(
    s3_path, "kibot/All_Futures_Continuous_Contracts_1min/%s.csv.gz"
)
file_name = kibot_path % SYMBOL
minutely_prices = kut.read_data(
    file_name, nrows=None
)

minutely_prices.tail(2)

In [None]:
five_min_prices = minutely_prices.resample("5Min").last()

In [None]:
five_min_prices.head()

# Daily price movements

In [None]:
zscored_rets = pma.get_zscored_returns(
    daily_prices, "daily"
)
top_daily_movements = get_top_100(zscored_rets)

In [None]:
top_daily_movements.index.year.value_counts(sort=False).plot(kind="bar")
plt.title("How many of the top-100 price movements occured during each year")
plt.show()

In [None]:
top_daily_movements_by_year = zscored_rets.resample('Y').apply(get_top_100)
top_daily_movements_by_year.head()

In [None]:
top_daily_movements_by_year.tail()

# 1-min movements

In [None]:
zscored_1min_rets = pma.get_zscored_returns(
    minutely_prices, "minutely"
)
top_1min_movements = get_top_100(zscored_1min_rets)

In [None]:
top_1min_movements.head()

In [None]:
top_1min_movements.plot(kind="bar")
plt.title(
    f"Largest price movements in a 1 min interval (in z-score space) for the {SYMBOL} symbol"
)
plt.show()

In [None]:
top_1min_movements_by_year = zscored_1min_rets.resample('Y').apply(get_top_100)
top_1min_movements_by_year.head()

# 5-min movements

In [None]:
zscored_5min_rets = pma.get_zscored_returns(
    five_min_prices, "minutely"
)
top_5min_movements = get_top_100(zscored_5min_rets)

In [None]:
top_5min_movements.head()

In [None]:
print(f"Top 100 of the 5-min price movements for {SYMBOL} occur at the following time:")
print(pd.Series(top_5min_movements.index).dt.time.value_counts())

In [None]:
top_5min_movements_by_year = zscored_5min_rets.resample('Y').apply(get_top_100)
top_5min_movements_by_year.head()

In [None]:
top_5min_movements_by_year.tail()