In [37]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import seaborn as sns
import pickle

import matplotlib.pyplot as plt
plt.style.use("ggplot")
%config InlineBackend.figure_format='retina'

import QuantTrading.ImpactFitting as IF
from scipy.optimize import minimize

def load_from_pickle(filename):
    path = '../pkl_dump/'
    with open(path + filename, 'rb') as f:
        return pickle.load(f)

# Load data
traded_volume_df = load_from_pickle('traded_volume_df.pkl')
px_df = load_from_pickle('px_df.pkl')
daily_stock_info_df = load_from_pickle('daily_stock_info_df.pkl')
monthly_scaling_factor = load_from_pickle('monthly_scaling_factor.pkl')
stocks = traded_volume_df.reset_index()["stock"].unique()


ridge_summary = pd.DataFrame(columns=['stock', 'half_life', 'model_type', 'beta_estimate', 'alpha_estimate', 'valid_loss', 'best_eta'])

In [112]:
half_life = 3600
model_type = 'sqrt'
explanation_horizon_periods = 6
impact_px_df = IF.get_impact_state(traded_volume_df, monthly_scaling_factor, half_life, model_type)
req_stat_df = IF.impact_regression_statistics(impact_px_df, explanation_horizon_periods, px_df)
req_stat_df = req_stat_df.loc[req_stat_df["y"] >= 1e-4].copy()
req_stat_df["date"] = pd.to_datetime(req_stat_df["date"])


  cum_impact = pre_ewm.ewm(alpha=1-decay_factor, adjust=False, axis="columns").mean()  # Across columns


In [113]:
reg_summary_naive = IF.regression_result_by_stock(req_stat_df, in_sample_month)
# reg_summary_naive


### Ridge model 1: global coef $\bar \lambda$ as the impact coef of the "stock index"

$$
\text{Loss} = \sum (y^{\text{real}}_i - \beta x_i + \alpha) + \eta (\lambda - \bar\lambda^{\text{index}})^2,
$$

for each stock $s_j$.

In [108]:
def loss_function(params, x, y, global_coef, eta):
    coef, intercept = params  # Unpack the parameters
    predictions = coef * x + intercept
    residuals = y - predictions
    penalty = eta * (global_coef - coef) ** 2
    return (np.sum(residuals ** 2) + penalty)

In [114]:
in_sample_month = 5

for stock in stocks:
    initial_param = reg_summary_naive.loc[stock][["beta_estimate", "alpha_estimate"]].values
    global_coef = IF.get_index_impact_coef(traded_volume_df, px_df, monthly_scaling_factor, 
                                        half_life, model_type, in_sample_month)

    req_stat_df_in_sample_month = req_stat_df.loc[req_stat_df["date"].dt.month == in_sample_month].copy()\
                                .loc[req_stat_df["stock"] == stock]
    x, y, x_valid, y_valid = IF.train_validation_split(req_stat_df_in_sample_month)

    eta_list = [0, 0.0001, 0.001, 0.01, 0.1, 1, 10, 100]
    eta_info = IF.eta_info(x, y, x_valid, y_valid, eta_list, initial_param, global_coef, loss_function)

    best_eta = min(eta_info, key=lambda x: eta_info.get(x)[1])
    best_param, valid_loss = eta_info[best_eta][0], eta_info[best_eta][1]
    
    new_row = pd.DataFrame([[stock, half_life, model_type, best_param[0], best_param[1], valid_loss, best_eta]],
                           columns=ridge_summary.columns)
    ridge_summary = pd.concat([ridge_summary, new_row], ignore_index=True)

  cum_impact = pre_ewm.ewm(alpha=1-decay_factor, adjust=False, axis="columns").mean()  # Across columns
  result = minimize(fun=loss_function, x0=initial_param,
  cum_impact = pre_ewm.ewm(alpha=1-decay_factor, adjust=False, axis="columns").mean()  # Across columns
  result = minimize(fun=loss_function, x0=initial_param,
  cum_impact = pre_ewm.ewm(alpha=1-decay_factor, adjust=False, axis="columns").mean()  # Across columns
  result = minimize(fun=loss_function, x0=initial_param,
  cum_impact = pre_ewm.ewm(alpha=1-decay_factor, adjust=False, axis="columns").mean()  # Across columns
  result = minimize(fun=loss_function, x0=initial_param,
  cum_impact = pre_ewm.ewm(alpha=1-decay_factor, adjust=False, axis="columns").mean()  # Across columns
  result = minimize(fun=loss_function, x0=initial_param,
  cum_impact = pre_ewm.ewm(alpha=1-decay_factor, adjust=False, axis="columns").mean()  # Across columns
  result = minimize(fun=loss_function, x0=initial_param,
  cum_impact = pre_ewm.ewm(alpha=1

In [122]:
ridge_summary.loc[ridge_summary['best_eta'] != 0]

Unnamed: 0,stock,half_life,model_type,beta_estimate,alpha_estimate,valid_loss,best_eta
93,AOS,3600,sqrt,11.714021,0.00051,0.001374,0.1


In [119]:
filename = "ridge1_summary.pkl"
path = "./impact_model_summary/"
with open(path + filename, 'wb') as f:
    pickle.dump(ridge_summary, f)

### Ridge model 2: global coef $\bar \lambda$ as average stock-wise $\lambda_j$

$$
\text{Loss} = \sum_{i, j} (y^{j, \text{real}}_i - \beta^j x^j_i + \alpha^j) + \eta \sum_j (\lambda_j - \bar\lambda)^2
$$

for each stock $s_j$.