In [1]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import seaborn as sns
import pickle

import matplotlib.pyplot as plt
plt.style.use("ggplot")
%config InlineBackend.figure_format='retina'

import QuantTrading.ImpactFitting as IF



## Data Acquisition
Data initilialised in this section are all for all dates, all stocks

In [2]:
def load_from_pickle(filename):
    path = '../pkl_dump/'
    with open(path + filename, 'rb') as f:
        return pickle.load(f)

# Load data
traded_volume_df = load_from_pickle('traded_volume_df.pkl')
px_df = load_from_pickle('px_df.pkl')
daily_stock_info_df = load_from_pickle('daily_stock_info_df.pkl')
monthly_scaling_factor = load_from_pickle('monthly_scaling_factor.pkl')
stocks = traded_volume_df.reset_index()["stock"].unique()

## Price Impact

There are several impact model to choose from: 1) naive OW 2) AFS 3) Ridge-style OW.

**DODGY!!!** R2 is maxed at a max half-life!! Bug or Why?

### 1) OW - `model_type = "linear"`

In [3]:
in_sample_month = 5
# half_life_list = np.arange(10800, 14401, 1800)
half_life_list = np.array([3600])
rsq_data = np.zeros((len(stocks), len(half_life_list)))

reg_summary_list = []
for i in range(len(half_life_list)):
    model_type = "linear"
    impact_px_df = IF.get_impact_state(traded_volume_df, monthly_scaling_factor, 
                                    half_life_list[i], model_type)
    reg_summary_temp = IF.get_regression_results(impact_px_df, px_df, 
                                            in_sample_month, explanation_horizon_periods=6)
    reg_summary_temp["half_life"] = half_life_list[i]
    
    reg_summary_list.append(reg_summary_temp[["beta_estimate", "alpha_estimate", "is_rsq", "oos_rsq", "half_life"]])

rsq_table = pd.concat(reg_summary_list).reset_index()

  cum_impact = pre_ewm.ewm(alpha=1-decay_factor, adjust=False, axis="columns").mean()  # Across columns


In [22]:
ow_summary = rsq_table.groupby("stock").max("is_rsq")

### 2) AFS - `model_type = "sqrt"`

In [82]:
in_sample_month = 3
# half_life_list = np.arange(10800, 14401, 1800)
half_life_list = [900, 1800, 3600, 7200, 14400]
rsq_data = np.zeros((len(stocks), len(half_life_list)))

reg_summary_list = []
for i in range(len(half_life_list)):
    model_type = "sqrt"
    impact_px_df = IF.get_impact_state(traded_volume_df, monthly_scaling_factor, 
                                    half_life_list[i], model_type)
    reg_summary_temp = IF.get_regression_results(impact_px_df, px_df, 
                                            in_sample_month, explanation_horizon_periods=6)
    reg_summary_temp["half_life"] = half_life_list[i]
    
    reg_summary_list.append(reg_summary_temp[["beta_estimate", "alpha_estimate", "is_rsq", "oos_rsq", "half_life"]])

rsq_table = pd.concat(reg_summary_list).reset_index()

  cum_impact = pre_ewm.ewm(alpha=1-decay_factor, adjust=False, axis="columns").mean()  # Across columns
  cum_impact = pre_ewm.ewm(alpha=1-decay_factor, adjust=False, axis="columns").mean()  # Across columns
  cum_impact = pre_ewm.ewm(alpha=1-decay_factor, adjust=False, axis="columns").mean()  # Across columns
  cum_impact = pre_ewm.ewm(alpha=1-decay_factor, adjust=False, axis="columns").mean()  # Across columns
  cum_impact = pre_ewm.ewm(alpha=1-decay_factor, adjust=False, axis="columns").mean()  # Across columns


### 3) Extended: Ridge-style

## Synthetic Alpha (Done)

In [8]:
import QuantTrading.SyntheticAlpha as SA
# SA.get_synthetic_alpha(0.6, px_df, "AAPL")

## Optimal Trading Strategy (Not started, should be very short)

In [16]:
import QuantTrading.SyntheticAlpha as SA
# alpha info - change this for sensitivity analysis
corr = 0.6
alpha_horizon = 6

# fix stock/date
stock = "AAPL"
date = "2019-05-01"

In [34]:
smooth_synthetic_alphas_series = SA.get_synthetic_alpha(corr, px_df, stock, alpha_horizon=alpha_horizon)\
                                .loc[date]

px_vol, ADV = monthly_scaling_factor.loc[stock, date]

impact_summary = ow_summary # change this with various model summaries; just an example here
impact_coef = impact_summary.loc[stock]['beta_estimate']
half_life = impact_summary.loc[stock]['half_life']
time_unit = 10
decay_factor = np.exp(-beta * time_unit)


# optimal trade
intended_impacts = 1/2 * (smooth_synthetic_alphas_series - smooth_synthetic_alphas_series.diff(1).shift(-1).fillna(0) / beta / time_unit)
intended_impacts.iloc[-1] = smooth_synthetic_alphas_series.iloc[-1] # I_T^* = \alpha_T

beta = np.log(2) / half_life
optimal_trades = (beta * (smooth_synthetic_alphas_series
                - smooth_synthetic_alphas_series.diff(1).diff(1).shift(-1).fillna(0) / (beta ** 2 * time_unit ** 2)
                ) / (px_vol / ADV * impact_coef) / 2 * time_unit)

optimal_trades.iloc[0] = intended_impacts.iloc[0] / (px_vol / ADV * impact_coef) # I_0^* / lambda
optimal_trades.iloc[-1] += (smooth_synthetic_alphas_series.iloc[-1] - intended_impacts.iloc[-2] * decay_factor) / (px_vol / ADV * impact_coef)
pct_synthetic_alpha_optimal_trades = optimal_trades / ADV
total_trade_sizes = pct_synthetic_alpha_optimal_trades.abs().sum()

(0.602489275982515, 3.2902960222108816e-07, 1.1906048425404063e-07)


  return synthetic_alphas.ewm(halflife=200, axis="columns").mean()


## Backtesting (Barely Started)

In future versions use `cum_impacts` as input

In [168]:
# def impact_adjusted_prices(pre_ewm, px_df, scaling_df, half_life, impact_coef_df, model_type):
#     cum_impacts = impact_state(pre_ewm, scaling_df, half_life, model_type).T
#     cum_returns = px_df.T / px_df.T.iloc[0, :] - 1
#     stock_date_df = cum_returns.T.iloc[:, 0].reset_index()
#     # stock_date_df["month"] = pd.to_datetime(stock_date_df["date"]).dt.month
#     stock_date_df.drop(["date"], axis="columns", inplace=True)
#     impact_coefficients = pd.merge(stock_date_df, impact_coef_df, on=["stock"], how="left")["beta_estimate"].values
#     cum_returns -= cum_impacts * impact_coefficients
#     adjusted_px_df = (px_df.T.iloc[0, :] * (cum_returns + 1)).T.reset_index()
#     return adjusted_px_df


## Performance Analysis (Need plots!)