In [1]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import seaborn as sns
import pickle

import matplotlib.pyplot as plt
plt.style.use("ggplot")
%config InlineBackend.figure_format='retina'

import QuantTrading.ImpactFitting as IF
import QuantTrading.SyntheticAlpha as SA
import QuantTrading.BackTest as BT

## Data Acquisition
Data initilialised in this section are all for all dates, all stocks

In [2]:
def load_from_pickle(filename):
    path = '../pkl_dump/'
    with open(path + filename, 'rb') as f:
        return pickle.load(f)

# Load data
traded_volume_df = load_from_pickle('traded_volume_df.pkl')
px_df = load_from_pickle('px_df.pkl')
daily_stock_info_df = load_from_pickle('daily_stock_info_df.pkl')
monthly_scaling_factor = load_from_pickle('monthly_scaling_factor.pkl')
stocks = traded_volume_df.reset_index()["stock"].unique()

## Price Impact

There are several impact model to choose from: 1) naive OW 2) AFS 3) Ridge-style OW.

**DODGY!!!** R2 is maxed at a max half-life!! Bug or Why?

### 1) OW - `model_type = "linear"`

In [7]:
in_sample_month = 5
# half_life_list = np.arange(10800, 14401, 1800)
half_life_list = np.array([60, 300, 600, 900, 1800, 2700, 3600, 4500, 5400])
rsq_data = np.zeros((len(stocks), len(half_life_list)))

reg_summary_list = []
for i in range(len(half_life_list)):
    model_type = "linear"
    impact_px_df = IF.get_impact_state(traded_volume_df, monthly_scaling_factor, 
                                    half_life_list[i], model_type)
    reg_summary_temp = IF.get_regression_results(impact_px_df, px_df, 
                                            in_sample_month, explanation_horizon_periods=6)
    reg_summary_temp["half_life"] = half_life_list[i]
    
    reg_summary_list.append(reg_summary_temp[["beta_estimate", "alpha_estimate", "is_rsq", "oos_rsq", "half_life"]])

ow_rsq_table = pd.concat(reg_summary_list).reset_index()

  cum_impact = pre_ewm.ewm(alpha=1-decay_factor, adjust=False, axis="columns").mean()  # Across columns
  cum_impact = pre_ewm.ewm(alpha=1-decay_factor, adjust=False, axis="columns").mean()  # Across columns
  cum_impact = pre_ewm.ewm(alpha=1-decay_factor, adjust=False, axis="columns").mean()  # Across columns
  cum_impact = pre_ewm.ewm(alpha=1-decay_factor, adjust=False, axis="columns").mean()  # Across columns
  cum_impact = pre_ewm.ewm(alpha=1-decay_factor, adjust=False, axis="columns").mean()  # Across columns
  cum_impact = pre_ewm.ewm(alpha=1-decay_factor, adjust=False, axis="columns").mean()  # Across columns
  cum_impact = pre_ewm.ewm(alpha=1-decay_factor, adjust=False, axis="columns").mean()  # Across columns
  cum_impact = pre_ewm.ewm(alpha=1-decay_factor, adjust=False, axis="columns").mean()  # Across columns
  cum_impact = pre_ewm.ewm(alpha=1-decay_factor, adjust=False, axis="columns").mean()  # Across columns


In [8]:
ow_summary = ow_rsq_table.groupby("stock").max("is_rsq")
with open("./impact_model_summary/ow_summary.pkl", 'wb') as f:
    pickle.dump(ow_summary, f)
with open("./impact_model_summary/ow_rsq_table.pkl", 'wb') as f:
    pickle.dump(ow_rsq_table, f)

### 2) AFS - `model_type = "sqrt"`

In [9]:
in_sample_month = 5
# half_life_list = np.arange(10800, 14401, 1800)
half_life_list = np.array([60, 300, 600, 900, 1800, 2700, 3600, 4500, 5400])
rsq_data = np.zeros((len(stocks), len(half_life_list)))

reg_summary_list = []
for i in range(len(half_life_list)):
    model_type = "sqrt"
    impact_px_df = IF.get_impact_state(traded_volume_df, monthly_scaling_factor, 
                                    half_life_list[i], model_type)
    reg_summary_temp = IF.get_regression_results(impact_px_df, px_df, 
                                            in_sample_month, explanation_horizon_periods=6)
    reg_summary_temp["half_life"] = half_life_list[i]
    
    reg_summary_list.append(reg_summary_temp[["beta_estimate", "alpha_estimate", "is_rsq", "oos_rsq", "half_life"]])

afs_rsq_table = pd.concat(reg_summary_list).reset_index()

  cum_impact = pre_ewm.ewm(alpha=1-decay_factor, adjust=False, axis="columns").mean()  # Across columns
  cum_impact = pre_ewm.ewm(alpha=1-decay_factor, adjust=False, axis="columns").mean()  # Across columns
  cum_impact = pre_ewm.ewm(alpha=1-decay_factor, adjust=False, axis="columns").mean()  # Across columns
  cum_impact = pre_ewm.ewm(alpha=1-decay_factor, adjust=False, axis="columns").mean()  # Across columns
  cum_impact = pre_ewm.ewm(alpha=1-decay_factor, adjust=False, axis="columns").mean()  # Across columns
  cum_impact = pre_ewm.ewm(alpha=1-decay_factor, adjust=False, axis="columns").mean()  # Across columns
  cum_impact = pre_ewm.ewm(alpha=1-decay_factor, adjust=False, axis="columns").mean()  # Across columns
  cum_impact = pre_ewm.ewm(alpha=1-decay_factor, adjust=False, axis="columns").mean()  # Across columns
  cum_impact = pre_ewm.ewm(alpha=1-decay_factor, adjust=False, axis="columns").mean()  # Across columns


In [10]:
afs_summary = afs_rsq_table.groupby("stock").max("is_rsq")
with open("./impact_model_summary/afs_summary.pkl", 'wb') as f:
    pickle.dump(afs_summary, f)
with open("./impact_model_summary/afs_rsq_table.pkl", 'wb') as f:
    pickle.dump(afs_rsq_table, f)

### 3) Extended: Ridge-style

See `Ridge_toy.ipynb`

## Synthetic Alpha (Done)

In [8]:
import QuantTrading.SyntheticAlpha as SA
# SA.get_synthetic_alpha(0.6, px_df, "AAPL")

## Optimal Trading Strategy

In [3]:
# alpha info - change this for sensitivity analysis
corr = 0.6
alpha_horizon = 6

# fix stock/date
stock = "AAPL"
date = "2019-05-01"

alphas_series = SA.get_synthetic_alpha(corr, px_df, stock, alpha_horizon=alpha_horizon)\
                                .loc[date]
                                
# impact model result, change the pkl feed to change the 'impact_summary'
with open('./impact_model_summary/ow_summary.pkl', 'rb') as f: # change this to afs_summary.pkl for afs
    impact_summary = pickle.load(f)
model_type = 'linear'  # if changing to afs, change this to 'sqrt'

  return synthetic_alphas.ewm(halflife=200, axis="columns").mean()


In [4]:
optimal_trades = BT.get_optimal_trades(monthly_scaling_factor, 
                                       alphas_series, impact_summary,
                                       stock, date, 'linear')
ADV = monthly_scaling_factor.loc[stock, date][1]
pct_synthetic_alpha_optimal_trades = optimal_trades / ADV
total_trade_sizes = pct_synthetic_alpha_optimal_trades.abs().sum()

  ADV = monthly_scaling_factor.loc[stock, date][1]


## Backtesting (Barely Started)

In [168]:
# def impact_adjusted_prices(pre_ewm, px_df, scaling_df, half_life, impact_coef_df, model_type):
#     cum_impacts = impact_state(pre_ewm, scaling_df, half_life, model_type).T
#     cum_returns = px_df.T / px_df.T.iloc[0, :] - 1
#     stock_date_df = cum_returns.T.iloc[:, 0].reset_index()
#     # stock_date_df["month"] = pd.to_datetime(stock_date_df["date"]).dt.month
#     stock_date_df.drop(["date"], axis="columns", inplace=True)
#     impact_coefficients = pd.merge(stock_date_df, impact_coef_df, on=["stock"], how="left")["beta_estimate"].values
#     cum_returns -= cum_impacts * impact_coefficients
#     adjusted_px_df = (px_df.T.iloc[0, :] * (cum_returns + 1)).T.reset_index()
#     return adjusted_px_df


## Performance Analysis (Need plots!)