In [1]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import seaborn as sns
import pickle

import matplotlib.pyplot as plt
plt.style.use("ggplot")
%config InlineBackend.figure_format='retina'

import QuantTrading.ImpactFitting as IF
from scipy.optimize import minimize

def load_from_pickle(filename):
    path = '../pkl_dump/'
    with open(path + filename, 'rb') as f:
        return pickle.load(f)

# Load data
traded_volume_df = load_from_pickle('traded_volume_df.pkl')
px_df = load_from_pickle('px_df.pkl')
daily_stock_info_df = load_from_pickle('daily_stock_info_df.pkl')
monthly_scaling_factor = load_from_pickle('monthly_scaling_factor.pkl')
stocks = traded_volume_df.reset_index()["stock"].unique()

In [2]:
half_life = 3600
model_type = 'linear'
explanation_horizon_periods = 6
impact_px_df = IF.get_impact_state(traded_volume_df, monthly_scaling_factor, half_life, model_type)
req_stat_df = IF.impact_regression_statistics(impact_px_df, explanation_horizon_periods, px_df)
req_stat_df = req_stat_df.loc[req_stat_df["y"] >= 1e-4].copy()
req_stat_df["date"] = pd.to_datetime(req_stat_df["date"])


  cum_impact = pre_ewm.ewm(alpha=1-decay_factor, adjust=False, axis="columns").mean()  # Across columns


### Ridge model 1: global coef $\bar \lambda$, impact coef of the "stock index"

In [3]:
in_sample_month = 5
stock = 'A'

global_coef = IF.get_index_impact_coef(traded_volume_df, px_df, monthly_scaling_factor, 
                                       half_life, model_type, in_sample_month)

  cum_impact = pre_ewm.ewm(alpha=1-decay_factor, adjust=False, axis="columns").mean()  # Across columns


In [7]:
req_stat_df_in_sample_month = req_stat_df.loc[req_stat_df["date"].dt.month == in_sample_month].copy()\
                              .loc[req_stat_df["stock"] == stock]
x, y, x_valid, y_valid = IF.train_validation_split(req_stat_df_in_sample_month)

In [10]:
def loss_function(params, x, y, global_coef, eta):
    coef, intercept = params  # Unpack the parameters
    predictions = coef * x + intercept
    residuals = y - predictions
    penalty = eta * (global_coef - coef) ** 2
    return (np.sum(residuals ** 2) + penalty)

In [25]:
def optimize_ridge(x, y, eta, initial_param, global_coef):
    result = minimize(fun=loss_function, x0=initial_param, 
                    args=(x, y, global_coef["beta_estimate"].iloc[0], eta), 
                    method='SLSQP', options={'max_iter': 10000, 'disp': False})
    return result.x

def validation_ridge(x_valid, y_alid, ridge_results):
    coef, intercept = ridge_results
    predictions = x_valid * coef + intercept
    return np.sum((y_valid - predictions) ** 2)

def eta_info(x, y, x_valid, y_valid, eta_list, initial_params, global_coef):
    best_eta, best_params = None, None
    lowest_loss = np.inf
    eta_info = {eta: None for eta in eta_list}

    for eta in eta_list:
        ridge_results = optimize_ridge(x, y, eta, initial_params, global_coef)
        valid_loss = validation_ridge(x_valid, y_valid, ridge_results)
        eta_info[eta] = (ridge_results, valid_loss)

    return eta_info

In [24]:
eta_list = [0, 0.0001, 0.001, 0.01, 0.1, 1, 10, 100]
eta_info = eta_info(x, y, x_valid, y_valid, eta_list, np.array([85, 0]), global_coef)

Optimization terminated successfully    (Exit mode 0)
            Current function value: 0.0033375411682390306
            Iterations: 2
            Function evaluations: 11
            Gradient evaluations: 2
Optimization terminated successfully    (Exit mode 0)
            Current function value: 0.004014741227860608
            Iterations: 9
            Function evaluations: 32
            Gradient evaluations: 9
Optimization terminated successfully    (Exit mode 0)
            Current function value: 0.00401481356190527
            Iterations: 7
            Function evaluations: 26
            Gradient evaluations: 7
Optimization terminated successfully    (Exit mode 0)
            Current function value: 0.004014819312094298
            Iterations: 6
            Function evaluations: 23
            Gradient evaluations: 6
Optimization terminated successfully    (Exit mode 0)
            Current function value: 0.004014819935936592
            Iterations: 4
            Function ev

  result = minimize(fun=loss_function, x0=initial_param,


{0: (array([8.50000000e+01, 5.57587002e-04]), 0.0008658035092350782),
 0.0001: (array([3.27726403e+02, 4.84688392e-04]), 0.0010332223467620296),
 0.001: (array([3.27753030e+02, 4.84680427e-04]), 0.0010332582595641397),
 0.01: (array([3.27754200e+02, 4.84680075e-04]), 0.001033259838448934),
 0.1: (array([3.27754611e+02, 4.84679879e-04]), 0.0010332603953541247),
 1: (array([3.27754731e+02, 4.84680650e-04]), 0.0010332605218454226),
 10: (array([3.27754609e+02, 4.84681295e-04]), 0.0010332603314520974),
 100: (array([3.27754610e+02, 4.84684708e-04]), 0.0010332601840083429)}