# Backtest VWAP performance

## Imports

In [1]:
import os
import pandas as pd
import numpy as np
import warnings
import math
from best_model_selection import *

from datetime import datetime
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split, TimeSeriesSplit, GridSearchCV
from sklearn.mixture import GaussianMixture, BayesianGaussianMixture
from sklearn.metrics import silhouette_score
from hmmlearn.hmm import GaussianHMM, GMMHMM, MultinomialHMM

from dateutil.relativedelta import relativedelta
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
## GLOBAL VARS
## GLOBAL VARS
R_DF_COL_NAMES = ['mean_static', 'sd_static', 
                  'mean_dynamic', 'sd_dynamic',
                  'mean_static_gmm', 'sd_static_gmm', 
                  'mean_dynamic_gmm', 'sd_dynamic_gmm', 
                  'mean_static_bgm', 'sd_static_bgm', 
                  'mean_dynamic_bgm', 'sd_dynamic_bgm', 
                  'mean_static_ghmm', 'sd_static_ghmm', 
                  'mean_dynamic_ghmm', 'sd_dynamic_ghmm', 
                  'from_year']
DATE_TIME = 'date_time'
DATE = 'date'
TIME = 'time'
OPEN_PRICE = 'open'
HIGH_PRICE = 'high'
LOW_PRICE = 'low'
CLOSE_PRICE = 'close'
VOLUME = 'volume'
TURNOVER = 'turnover'
VWAP = 'vwap'
FEATURES = ['high_low_spread', "open_close_rets", "log_total_traded_vol", "daily_log_return", "short_term_vol", "long_term_vol", "money_flow_index"]

## Functions

In [3]:
def load_data(file):
    filepath = os.path.join(DATAPATH, file)
    df = pd.read_csv("file:///" + filepath, parse_dates=[['<DTYYYYMMDD>', '<TIME>']])

    return df

def format_data(df):
    returned_df = df.copy()
    returned_df.drop(labels=["<PER>", "<OPENINT>"], axis="columns", inplace=True)
    returned_df.columns = ['date_time', 'ticker', 'open', 'high', 'low', 'close', 'volume']
    returned_df.set_index('date_time', drop=True, inplace=True)
    returned_df = add_datetime(returned_df)

    return returned_df

def get_open_close_time(df):
    open_time = df.time.min()
    close_time = df.time.max()
    
    return open_time, close_time

def get_open_close_auction_time(df):
    open_time = df.time.min()
    auction_time = df.time.max()
    close_time = df[df[TIME] != df.time.max()].time.max()
    
    return open_time, close_time, auction_time

def get_intraday_data(df, start_time, end_time):
    df = df.between_time(open_time, end_time)
    df[TURNOVER] = df[VOLUME]*df[CLOSE_PRICE]
    df_resampled = df.resample('5T').agg({OPEN_PRICE: "first", 
                                          LOW_PRICE: "min", 
                                          HIGH_PRICE: "max", 
                                          CLOSE_PRICE: "last", 
                                          VOLUME: "sum", 
                                          TURNOVER: "sum"})
    df_resampled[VWAP] = df_resampled[TURNOVER]/df_resampled[VOLUME]
    df_resampled = df_resampled[(df_resampled.index.weekday != 5) | (df_resampled.index.weekday != 6)]
#     open_resampled = df_resampled.index.time.min()
#     close_resmapled = df_resampled.index.time.max()
    
#     print(open_resampled)
#     print(close_resampled)
    
#     df_resampled = df_resampled.between_time(open_resampled, close_resampled)
#     df_resampled.fillna(method='ffill', inplace=True)
    df_resampled.dropna(inplace=True)

    return df_resampled

def get_daily_data(df):
    daily_data = df.groupby(df.index.date).agg({OPEN_PRICE: "first", 
                                                LOW_PRICE: "min",
                                                HIGH_PRICE: "max", 
                                                CLOSE_PRICE: "last", 
                                                VOLUME: "sum", 
                                                TURNOVER: "sum"})
    daily_data[VWAP] = daily_data.turnover/daily_data.volume
    daily_data = daily_data[daily_data[VOLUME] > 0.1]
    daily_data = daily_data[(daily_data.index != 6) & (daily_data.index != 7)]
    daily_data.index = pd.to_datetime(daily_data.index)
    daily_data = daily_data.resample('B').first()
    daily_data.dropna(inplace=True)
    
    
    return daily_data

def split_train_test_data(df, size_in_years):
    training_data = df[df.index[0]:df.index[-1] - relativedelta(years=size_in_years, hours=-9, minutes = -5)]
    test_data = df[df.index[-1] - relativedelta(years=size_in_years, hours=-9):]

    return training_data, test_data

def get_weekdays_data_dict(df):
    weekdays_data_dict = {}
    weekdays_data_dict[0] = df[df.index.weekday == 0]
    weekdays_data_dict[1] = df[df.index.weekday == 1]
    weekdays_data_dict[2] = df[df.index.weekday == 2]
    weekdays_data_dict[3] = df[df.index.weekday == 3]
    weekdays_data_dict[4] = df[df.index.weekday == 4]
    weekdays_data_dict['else'] = df

    return weekdays_data_dict

def get_static_vol_predictor_by_weekday(data, weekdays_data_dict):
    static_vol_predictor = {}
    static_vol_predictor[0] = get_norm_static_vol_predictor(weekdays_data_dict.get(0))
    static_vol_predictor[1] = get_norm_static_vol_predictor(weekdays_data_dict.get(1))
    static_vol_predictor[2] = get_norm_static_vol_predictor(weekdays_data_dict.get(2))
    static_vol_predictor[3] = get_norm_static_vol_predictor(weekdays_data_dict.get(3))
    static_vol_predictor[4] = get_norm_static_vol_predictor(weekdays_data_dict.get(4))
    static_vol_predictor['else'] = get_norm_static_vol_predictor(data)

    return static_vol_predictor

def get_adv_median_by_weekday(data, weekdays_data_dict):
    adv_median = {}
    adv_median[0] = get_adv_median(weekdays_data_dict.get(0))
    adv_median[1] = get_adv_median(weekdays_data_dict.get(1))
    adv_median[2] = get_adv_median(weekdays_data_dict.get(2))
    adv_median[3] = get_adv_median(weekdays_data_dict.get(3))
    adv_median[4] = get_adv_median(weekdays_data_dict.get(4))
    adv_median['else'] = get_adv_median(data)

    return adv_median
  
def get_data_by_weekday(df, weekday):
    df_weekday = df[df.index.weekday == weekday]

    return df_weekday

def add_datetime(df):
    returned_df = df.copy()
    returned_df['date'] = pd.to_datetime(returned_df.index.date)
    returned_df['time'] = pd.to_datetime(returned_df.index, format = "%m-%d-%Y %H:%M:%S")
    returned_df['time'] = returned_df['time'].apply(lambda x: x.strftime('%H:%M:%S'))

    return returned_df

def get_norm_static_vol_predictor(df):
    static_volume_predictor = df.groupby(by=df.index.time)[VOLUME].median()
    norm_static_volume_predictor = static_volume_predictor/sum(static_volume_predictor)

    return norm_static_volume_predictor

def get_reversed_cum_vol(data):
    reversed_cumvol = []
    for day in np.unique(data.index.date):
        reversed_cumvol.append(data[data.index.date == day][VOLUME].cumsum().values[::-1])

    reversed_cumvol = pd.Series(data = np.concatenate(reversed_cumvol), index = data.index)

    return reversed_cumvol

def get_adv_median(df):
    return df['volume'].groupby(df.index.date).sum().median()

def get_adv_mean(df):
    return df['volume'].groupby(df.index.date).sum().median()

def get_daily_VWAP(df):
    return df[TURNOVER].groupby(df.index.date).sum()/df[VOLUME].groupby(df.index.date).sum()
  
def get_vol_predictor_next_bin(data, static_vol_predictor, adv_median):
    volume_predictor_next_interval = data.groupby(data.index.date)[VOLUME].shift(1)
    volume_predictor_next_interval.fillna(int (adv_median.get('else')*static_vol_predictor.get('else').iloc[0]), inplace=True)

    return volume_predictor_next_interval
  
def vwap_static_execution_algo(data, norm_static_vol_predictor, amount_shares, order_side, start_time, end_time, day):
    if day.weekday() == 0:
        vol_predictor = norm_static_vol_predictor.get(0)[start_time:end_time]
    elif day.weekday() == 1:
        vol_predictor = norm_static_vol_predictor.get(1)[start_time:end_time]
    elif day.weekday() == 2:
        vol_predictor = norm_static_vol_predictor.get(2)[start_time:end_time]
    elif day.weekday() == 3:
        vol_predictor = norm_static_vol_predictor.get(3)[start_time:end_time]
    elif day.weekday() == 4:
        vol_predictor = norm_static_vol_predictor.get(4)[start_time:end_time]
    else:
        vol_predictor = norm_static_vol_predictor.get('else')[start_time:end_time]
        
    shares_per_interval = vol_predictor*amount_shares
    
    data['time'] = data.index.time
    data = data.reset_index()
    data = data.set_index(data.time)
    exec_price = (data[HIGH_PRICE]+data[LOW_PRICE])/2
    
    vwap_this_exec_this_day = sum(shares_per_interval*exec_price)/sum(shares_per_interval)
#     vwap_this_exec_this_day = sum(shares_per_interval*data[VWAP])/sum(shares_per_interval)

    return vwap_this_exec_this_day

def vwap_dynamic_execution_algo(data, reversed_cumvol, static_vol_predictor, volume_predictor_next_interval, amount_shares, order_side, start_time, end_time, day):
    shares_per_interval = []
    if day.weekday() == 0:
        shares_per_interval.append(static_vol_predictor.get(0).iloc[0]*amount_shares)
        vol_predictor = reversed_cumvol[reversed_cumvol.index.weekday == 0].groupby(reversed_cumvol[reversed_cumvol.index.weekday == 0].index.time).median()
    elif day.weekday() == 1:
        shares_per_interval.append(static_vol_predictor.get(1).iloc[0]*amount_shares)
        vol_predictor = reversed_cumvol[reversed_cumvol.index.weekday == 1].groupby(reversed_cumvol[reversed_cumvol.index.weekday == 1].index.time).median()
    elif day.weekday() == 2:
        shares_per_interval.append(static_vol_predictor.get(2).iloc[0]*amount_shares)
        vol_predictor = reversed_cumvol[reversed_cumvol.index.weekday == 2].groupby(reversed_cumvol[reversed_cumvol.index.weekday == 2].index.time).median()
    elif day.weekday() == 3:
        shares_per_interval.append(static_vol_predictor.get(3).iloc[0]*amount_shares)
        vol_predictor = reversed_cumvol[reversed_cumvol.index.weekday == 3].groupby(reversed_cumvol[reversed_cumvol.index.weekday == 3].index.time).median()
    elif day.weekday() == 4:
        shares_per_interval.append(static_vol_predictor.get(4).iloc[0]*amount_shares)
        vol_predictor = reversed_cumvol[reversed_cumvol.index.weekday == 4].groupby(reversed_cumvol[reversed_cumvol.index.weekday == 4].index.time).median()
    else:
        shares_per_interval.append(static_vol_predictor.get('else').iloc[0]*amount_shares)
        vol_predictor = reversed_cumvol.groupby(reversed_cumvol.index.time).median()

    for i in range(1, len(data)):
        num = data[VOLUME].iloc[:i].sum()+volume_predictor_next_interval[volume_predictor_next_interval.index.date == day].iloc[i]
        denom = data[VOLUME].iloc[:i].sum()+vol_predictor[i]
        op = amount_shares*(num/denom)
        shares_next_interval = op - sum(shares_per_interval)
        #     shares_next_interval = (amount_shares*((data['volume'].iloc[0:i].sum()+volume_predictor_next_interval.xs(day, level='Date').iloc[i])/(data['volume'].iloc[0:i].sum()+volPredictor[i])))-sum(shares_per_interval)
        shares_per_interval.append(shares_next_interval)

    exec_price = (data[HIGH_PRICE]+data[LOW_PRICE])/2
    
    vwap_this_exec_this_day = sum(shares_per_interval*exec_price)/sum(shares_per_interval)
#     vwap_this_exec_this_day = sum(shares_per_interval*data[VWAP])/sum(shares_per_interval)

    return vwap_this_exec_this_day

def backtest_algo_static(training_data, test_data, adv_median, norm_static_vol_predictor):
    
    backtest_sell_vwap = []
    backtest_buy_vwap = []

    for day in np.unique(test_data.index.date):
        data = test_data[test_data.index.date == day]

        if day.weekday() == 0:
            amount_shares = adv_median.get(0)*0.1
        elif day.weekday() == 1:
            amount_shares = adv_median.get(1)*0.1
        elif day.weekday() == 2:
            amount_shares = adv_median.get(2)*0.1
        elif day.weekday() == 3:
            amount_shares = adv_median.get(3)*0.1
        elif day.weekday() == 4:
            amount_shares = adv_median.get(4)*0.1
        else:
            amount_shares = adv_median.get('else')*0.1
            
        sell_exec = vwap_static_execution_algo(data, norm_static_vol_predictor, amount_shares, 'sell', data.index[0].time(), data.index[-1].time(), day)
        buy_exec = vwap_static_execution_algo(data, norm_static_vol_predictor, amount_shares, 'buy', data.index[0].time(), data.index[-1].time(), day)
        
        backtest_sell_vwap.append(sell_exec)
        backtest_buy_vwap.append(buy_exec)

    return backtest_buy_vwap, backtest_sell_vwap

def backtest_algo_dynamic(train_data, test_data, adv_median, static_vol_predictor):
    reversed_cum_vol = get_reversed_cum_vol(train_data)
    vol_predictor_next_bin = get_vol_predictor_next_bin(test_data, static_vol_predictor, adv_median)

    backtest_sell_vwap_dynamic = []
    backtest_buy_vwap_dynamic = []
    for day in np.unique(test_data.index.date):
        data = test_data[test_data.index.date == day]
        if day.weekday() == 0:
            amount_shares = adv_median.get(0)*0.1
        elif day.weekday() == 1:
            amount_shares = adv_median.get(1)*0.1
        elif day.weekday() == 2:
            amount_shares = adv_median.get(2)*0.1
        elif day.weekday() == 3:
            amount_shares = adv_median.get(3)*0.1
        elif day.weekday() == 4:
            amount_shares = adv_median.get(4)*0.1
        else:
            amount_shares = adv_median.get('else')*0.1
            
        backtest_sell_vwap_dynamic.append(vwap_dynamic_execution_algo(data, 
                                                                      reversed_cum_vol,
                                                                      static_vol_predictor,
                                                                      vol_predictor_next_bin,
                                                                      amount_shares,
                                                                      'sell', 
                                                                      data.index[0].time(),
                                                                      data.index[-1].time(),
                                                                      day))
        backtest_buy_vwap_dynamic.append(vwap_dynamic_execution_algo(data, 
                                                                     reversed_cum_vol,
                                                                     static_vol_predictor,
                                                                     vol_predictor_next_bin,
                                                                     amount_shares,
                                                                     'buy',
                                                                     data.index[0].time(),
                                                                     data.index[-1].time(),
                                                                     day))

    return backtest_buy_vwap_dynamic, backtest_sell_vwap_dynamic
  
def get_algo_performance(train_intraday_data, test_intraday_data, train_daily_data, test_daily_data, dynamic_flag):
    daily_vwap = test_daily_data[VWAP]
    weekdays_data_dict = get_weekdays_data_dict(train_intraday_data)
    static_vol_predictor = get_static_vol_predictor_by_weekday(train_intraday_data, weekdays_data_dict)
    adv_median = get_adv_median_by_weekday(train_intraday_data, weekdays_data_dict)

    if dynamic_flag:
        backtest_buy_vwap, backtest_sell_vwap = backtest_algo_dynamic(train_intraday_data, test_intraday_data, adv_median, static_vol_predictor)
    else:
        backtest_buy_vwap, backtest_sell_vwap = backtest_algo_static(train_intraday_data, test_intraday_data, adv_median, static_vol_predictor)

    static_vwap_comparison = pd.DataFrame(data=daily_vwap.values.tolist(), index=daily_vwap.index, columns=['market_vwap'])
    static_vwap_comparison['backtest_buy_vwap'] = backtest_buy_vwap
    static_vwap_comparison['backtest_sell_vwap'] = backtest_sell_vwap

    static_vwap_comparison['diff_vwap_bps_buy'] = 1e4*(static_vwap_comparison['backtest_buy_vwap']-static_vwap_comparison['market_vwap'])/static_vwap_comparison['market_vwap']
    static_vwap_comparison['diff_vwap_bps_sell'] = -1e4*(static_vwap_comparison['backtest_sell_vwap']-static_vwap_comparison['market_vwap'])/static_vwap_comparison['market_vwap']

    mean_bps_diff_buys = static_vwap_comparison['diff_vwap_bps_buy'].mean()
    sd_bps_diff_buys = static_vwap_comparison['diff_vwap_bps_buy'].std()
    mean_bps_diff_sells = static_vwap_comparison['diff_vwap_bps_sell'].mean()
    sd_bps_diff_sells = static_vwap_comparison['diff_vwap_bps_sell'].std()

    percentiles_diff_vwap_sells = {'1': static_vwap_comparison['diff_vwap_bps_sell'].quantile(0.01),
                               '5': static_vwap_comparison['diff_vwap_bps_sell'].quantile(0.05),
                               '95': static_vwap_comparison['diff_vwap_bps_sell'].quantile(0.95),
                               '99': static_vwap_comparison['diff_vwap_bps_sell'].quantile(0.99)}
    percentiles_diff_vwap_buys = {'1': static_vwap_comparison['diff_vwap_bps_buy'].quantile(0.01),
                               '5': static_vwap_comparison['diff_vwap_bps_buy'].quantile(0.05),
                               '95': static_vwap_comparison['diff_vwap_bps_buy'].quantile(0.95),
                               '99': static_vwap_comparison['diff_vwap_bps_buy'].quantile(0.99)}

    return mean_bps_diff_buys, sd_bps_diff_buys, mean_bps_diff_sells, sd_bps_diff_sells, percentiles_diff_vwap_buys, percentiles_diff_vwap_sells

def backtest_algo_static_by_regime(training_data, test_data, adv_median, static_vol_predictor):
    backtest_sell_vwap = []
    backtest_buy_vwap = []

    for day in np.unique(test_data.index.date):
        data = test_data[test_data.index.date == day]
        regime = data['regime'].iloc[0]
        
        if day.weekday() == 0:
            amount_shares = adv_median.get(regime).get(0)*0.1
        elif day.weekday() == 1:
            amount_shares = adv_median.get(regime).get(1)*0.1
        elif day.weekday() == 2:
            amount_shares = adv_median.get(regime).get(2)*0.1
        elif day.weekday() == 3:
            amount_shares = adv_median.get(regime).get(3)*0.1
        elif day.weekday() == 4:
            amount_shares = adv_median.get(regime).get(4)*0.1
        else:
            amount_shares = adv_median.get(regime).get('else')*0.1
            
        sell_exec = vwap_static_execution_algo(data,
                                               static_vol_predictor.get(regime),
                                               amount_shares, 'sell',
                                               data.index[0].time(),
                                               data.index[-1].time(), 
                                               day)
        buy_exec = vwap_static_execution_algo(data,
                                              static_vol_predictor.get(regime), 
                                              amount_shares,
                                              'buy',
                                              data.index[0].time(), 
                                              data.index[-1].time(),
                                              day)
        
        backtest_sell_vwap.append(sell_exec)
        backtest_buy_vwap.append(buy_exec)

    return backtest_buy_vwap, backtest_sell_vwap

def backtest_algo_dynamic_by_regime(training_data, test_data, adv_median, static_vol_predictor, regimes):
    regime_reversed_cum_vol = {}
    regime_vol_predictor_next_bin = {}
    for regime in regimes:
        regime_reversed_cum_vol[regime] = get_reversed_cum_vol(training_data[training_data['regime']==regime])
        regime_vol_predictor_next_bin[regime] = get_vol_predictor_next_bin(test_data[test_data['regime']==regime], 
                                                                           static_vol_predictor.get(regime),
                                                                           adv_median.get(regime))

    backtest_sell_vwap_dynamic = []
    backtest_buy_vwap_dynamic = []
    for day in np.unique(test_data.index.date):
        data = test_data[test_data.index.date == day]
        regime = data['regime'].iloc[0]
        if day.weekday() == 0:
            amount_shares = adv_median.get(regime).get(0)*0.1
        elif day.weekday() == 1:
            amount_shares = adv_median.get(regime).get(1)*0.1
        elif day.weekday() == 2:
            amount_shares = adv_median.get(regime).get(2)*0.1
        elif day.weekday() == 3:
            amount_shares = adv_median.get(regime).get(3)*0.1
        elif day.weekday() == 4:
            amount_shares = adv_median.get(regime).get(4)*0.1
        else:
            amount_shares = adv_median.get(regime).get('else')*0.1
        
        backtest_sell_vwap_dynamic.append(vwap_dynamic_execution_algo(data, 
                                                                      regime_reversed_cum_vol.get(regime),
                                                                      static_vol_predictor.get(regime),
                                                                      regime_vol_predictor_next_bin.get(regime),
                                                                      amount_shares,
                                                                      'sell', 
                                                                      data.index[0].time(),
                                                                      data.index[-1].time(),
                                                                      day))
        backtest_buy_vwap_dynamic.append(vwap_dynamic_execution_algo(data, 
                                                                     regime_reversed_cum_vol.get(regime),
                                                                     static_vol_predictor.get(regime),
                                                                     regime_vol_predictor_next_bin.get(regime),
                                                                     amount_shares,
                                                                     'buy',
                                                                     data.index[0].time(),
                                                                     data.index[-1].time(),
                                                                     day))
            
        

    return backtest_buy_vwap_dynamic, backtest_sell_vwap_dynamic

def get_algo_performance_by_regime(training_data, test_data, vwap_and_regime, dynamic_flag, regimes):
    regime_daily_vwap = {}
    regime_weekdays_data_dict = {}
    regime_static_vol_predictor = {}
    regime_adv_median = {}
    daily_vwap = vwap_and_regime[VWAP]
    for regime in regimes:
        regime_daily_vwap[regime] = vwap_and_regime[vwap_and_regime['regime'] == regime][VWAP]
        regime_weekdays_data_dict[regime] = get_weekdays_data_dict(training_data[training_data['regime'] == regime])
        regime_static_vol_predictor[regime] = get_static_vol_predictor_by_weekday(training_data[training_data['regime'] == regime],
                                                                                  regime_weekdays_data_dict[regime])
        regime_adv_median[regime] = get_adv_median_by_weekday(training_data[training_data['regime'] == regime], 
                                                              regime_weekdays_data_dict[regime])

    if dynamic_flag:
        backtest_buy_vwap, backtest_sell_vwap = backtest_algo_dynamic_by_regime(training_data, 
                                                                                test_data,
                                                                                regime_adv_median,
                                                                                regime_static_vol_predictor,
                                                                                regimes)
    else:
        backtest_buy_vwap, backtest_sell_vwap = backtest_algo_static_by_regime(training_data,
                                                                               test_data,
                                                                               regime_adv_median,
                                                                               regime_static_vol_predictor)

    static_vwap_comparison = pd.DataFrame(data=daily_vwap.values.tolist(), index=daily_vwap.index, columns=['market_vwap'])
    static_vwap_comparison['backtest_buy_vwap'] = backtest_buy_vwap
    static_vwap_comparison['backtest_sell_vwap'] = backtest_sell_vwap

    static_vwap_comparison['diff_vwap_bps_buy'] = 1e4*(static_vwap_comparison['backtest_buy_vwap']-static_vwap_comparison['market_vwap'])/static_vwap_comparison['market_vwap']
    static_vwap_comparison['diff_vwap_bps_sell'] = -1e4*(static_vwap_comparison['backtest_sell_vwap']-static_vwap_comparison['market_vwap'])/static_vwap_comparison['market_vwap']

    mean_bps_diff_buys = static_vwap_comparison['diff_vwap_bps_buy'].mean()
    sd_bps_diff_buys = static_vwap_comparison['diff_vwap_bps_buy'].std()
    mean_bps_diff_sells = static_vwap_comparison['diff_vwap_bps_sell'].mean()
    sd_bps_diff_sells = static_vwap_comparison['diff_vwap_bps_sell'].std()

    percentiles_diff_vwap_sells = {'1': static_vwap_comparison['diff_vwap_bps_sell'].quantile(0.01),
                               '5': static_vwap_comparison['diff_vwap_bps_sell'].quantile(0.05),
                               '95': static_vwap_comparison['diff_vwap_bps_sell'].quantile(0.95),
                               '99': static_vwap_comparison['diff_vwap_bps_sell'].quantile(0.99)}
    percentiles_diff_vwap_buys = {'1': static_vwap_comparison['diff_vwap_bps_buy'].quantile(0.01),
                               '5': static_vwap_comparison['diff_vwap_bps_buy'].quantile(0.05),
                               '95': static_vwap_comparison['diff_vwap_bps_buy'].quantile(0.95),
                               '99': static_vwap_comparison['diff_vwap_bps_buy'].quantile(0.99)}

    return mean_bps_diff_buys, sd_bps_diff_buys, mean_bps_diff_sells, sd_bps_diff_sells, percentiles_diff_vwap_buys, percentiles_diff_vwap_sells

def get_log_total_trade_vol(df):
    log_total_traded_vol = get_log(get_total_traded_vol(df))
    log_total_traded_vol.name = "log_total_traded_vol"
    
    return log_total_traded_vol 

def get_total_traded_vol(df):
    total_traded_vol = df.resample('B').first()[[VOLUME]]
    total_traded_vol.name = "total_traded_vol"

    return total_traded_vol

def get_log_open_close_returns(df):
    open_close_returns = np.log(df[OPEN_PRICE]/df[CLOSE_PRICE].shift(-1)).dropna()
    open_close_returns.name = "log_overnight_returns"

    return open_close_returns

def get_log_returns(df):
    close = df[CLOSE_PRICE].resample('B').last()
    log_daily_returns = np.log(close/close.shift(-1))
    log_daily_returns.name = "log_returns"

    return log_daily_returns

def get_high_low_spread(df):
    high_low_spread = (df[HIGH_PRICE]-df[LOW_PRICE])
    high_low_spread.name = "high_low_spread"

    return high_low_spread

def get_log(df):
    return np.log(df)

def get_antilog_adv_median(df):
    antilog_adv = np.exp(get_log(daily_data[VOLUME]).rolling(20, min_periods=5).median().dropna())
    antilog_adv.name = "antilog_adv"
    
    return antilog_adv

def get_money_flow_index(df):
    typical_price = (df[CLOSE_PRICE]+df[HIGH_PRICE]+df[LOW_PRICE])/3
    volume = df[VOLUME]
    money_flow_index = typical_price*volume
    money_flow_index.name = "money_flow_index"

    return money_flow_index

def get_beta_market_impact(df):
    adv_antilog = get_antilog_adv_median(df)
    X = df[VOLUME]/adv_antilog
    Y = ((df[VWAP]-df[OPEN_PRICE])/df[OPEN_PRICE])*1e4
    betas = Y/X
    betas.name = "betas_market_impact"
    
    return betas

def get_features_df(list_of_features):
    features_df = pd.concat(list_of_features, axis=1).dropna()
    features_df = features_df.replace([np.inf, -np.inf], np.nan)
    features_df = features_df.dropna()
    features_df = features_df[features_df != 0.0].shift(1).dropna()
#     features_df.rename(columns={VOLUME: "log_total_traded_vol"}, inplace=True)

    return features_df

def fit_model(model, full_data, train_data, list_of_features, pca_flag=True):
    X = train_data.values
    scaler = MinMaxScaler()
    scaler.fit(X)
    X_scaled = scaler.transform(X)

    if pca_flag:
        pca = PCA(n_components=.95)
        pca.fit(X_scaled)
        X_pca = pca.transform(X_scaled)
        fitted_model = model.fit(X_pca)
        X_full = full_data.values
        X_full_scaled = scaler.transform(X_full)
        X_full_pca = pca.transform(X_full_scaled)
        prediction = fitted_model.predict(X_full_pca)
    else:
        fitted_model = model.fit(X_scaled)
        X_full = full_data.values
        X_full_scaled = scaler.transform(X_full)
        prediction = fitted_model.predict(X_full_scaled)

    return prediction

def get_features_series_list(daily_data):
    log_returns = get_log_returns(daily_data)
    adv_antilog = get_antilog_adv_median(daily_data)
    mfi = get_money_flow_index(daily_data)
    betas_mkt_impact = get_beta_market_impact(daily_data)
    log_traded_vol = get_log_total_trade_vol(daily_data)
    
    features = [log_returns, adv_antilog, mfi, betas_mkt_impact, log_traded_vol]
    
    return features

In [4]:
def get_results_df(ticker, list_of_results):
    arr = np.array(list_of_results).reshape(1,17)
    columns_names = R_DF_COL_NAMES
    r_df = pd.DataFrame(data=arr, columns=columns_names, index=[ticker])
    
    return r_df

def get_full_algo_performance(daily_data, intraday_data, current_ticker, from_year = 1900):
    results_list = []
    year_from = daily_data.index[0].year
    pca_flag=False
    
    train_daily_data, test_daily_data = split_train_test_data(daily_data, 2)
    train_intraday_data, test_intraday_data = split_train_test_data(intraday_data, 2)

    mean_bps_diff_buys, sd_bps_diff_buys, mean_bps_diff_sells, sd_bps_diff_sells, percentiles_diff_vwap_buys, percentiles_diff_vwap_sells = get_algo_performance(train_intraday_data, test_intraday_data, train_daily_data, test_daily_data, dynamic_flag=False)
    results_list.append(np.abs(mean_bps_diff_buys))
    results_list.append(np.abs(sd_bps_diff_buys))

    mean_bps_diff_buys, sd_bps_diff_buys, mean_bps_diff_sells, sd_bps_diff_sells, percentiles_diff_vwap_buys, percentiles_diff_vwap_sells = get_algo_performance(train_intraday_data, test_intraday_data, train_daily_data, test_daily_data, dynamic_flag=True)
    results_list.append(np.abs(mean_bps_diff_buys))
    results_list.append(np.abs(sd_bps_diff_buys))

    features_list = get_features_series_list(daily_data)
    features_df = get_features_df(features_list)

    best_gmm, features_gmm, results_gmm_df = get_best_gmm_aic_bic(features_list, pca_flag=pca_flag)
    best_bgm, features_bgm, results_bgm_df = get_best_bgm_aic_bic(features_list, pca_flag=pca_flag)
    best_ghmm, features_ghmm, results_ghmm_df= get_best_ghmm_aic_bic(features_list, pca_flag=pca_flag)

    train_features_df, test_features_df = split_train_test_data(features_df[features_gmm], 2)

    model_prediction = fit_model(best_gmm, features_df[features_gmm], train_features_df, features_gmm, pca_flag=pca_flag)
    regime = pd.Series(data = model_prediction, index = features_df.index).rename("regime")

    intraday_date = intraday_data.copy()
    intraday_date['date'] = pd.to_datetime(intraday_date.index.date)
    regime_intraday = pd.merge(intraday_date, regime, right_index=True, how='left', left_on='date').dropna()
    regime_intraday = regime_intraday.astype({"regime":"int32"})

    regime_daily = pd.merge(daily_data, regime, left_index=True, right_index=True)
    regime_daily = regime_daily.astype({"regime":"int32"})

    intraday_regime_train, intraday_regime_test = split_train_test_data(regime_intraday, 2)
    daily_regime_train, daily_regime_test = split_train_test_data(regime_daily, 2)

    mean_bps_diff_buys, sd_bps_diff_buys, mean_bps_diff_sells, sd_bps_diff_sells, percentiles_diff_vwap_buys, percentiles_diff_vwap_sells = get_algo_performance_by_regime(intraday_regime_train, intraday_regime_test, daily_regime_test[[VWAP, "regime"]], dynamic_flag=False, regimes=regime_intraday['regime'].unique())
    results_list.append(np.abs(mean_bps_diff_buys))
    results_list.append(np.abs(sd_bps_diff_buys))

    mean_bps_diff_buys, sd_bps_diff_buys, mean_bps_diff_sells, sd_bps_diff_sells, percentiles_diff_vwap_buys, percentiles_diff_vwap_sells = get_algo_performance_by_regime(intraday_regime_train, intraday_regime_test, daily_regime_test[[VWAP, "regime"]], dynamic_flag=True, regimes=regime_intraday['regime'].unique())
    results_list.append(np.abs(mean_bps_diff_buys))
    results_list.append(np.abs(sd_bps_diff_buys))
    
    train_features_df, test_features_df = split_train_test_data(features_df[features_bgm], 2)
    
    model_prediction = fit_model(best_bgm, features_df[features_bgm], train_features_df, features_bgm, pca_flag=pca_flag)
    regime = pd.Series(data = model_prediction, index = features_df.index).rename("regime")
    regime = regime.resample("5T").asfreq().fillna(method="ffill")

    regime_intraday = pd.merge(intraday_date, regime, right_index=True, how='left', left_on='date').dropna()
    regime_intraday = regime_intraday.astype({"regime":"int32"})

    regime_daily = pd.merge(daily_data, regime, left_index=True, right_index=True)
    regime_daily = regime_daily.astype({"regime":"int32"})

    intraday_regime_train, intraday_regime_test = split_train_test_data(regime_intraday, 2)
    daily_regime_train, daily_regime_test = split_train_test_data(regime_daily, 2)

    mean_bps_diff_buys, sd_bps_diff_buys, mean_bps_diff_sells, sd_bps_diff_sells, percentiles_diff_vwap_buys, percentiles_diff_vwap_sells = get_algo_performance_by_regime(intraday_regime_train, intraday_regime_test, daily_regime_test[[VWAP, "regime"]], dynamic_flag=False, regimes=regime_intraday['regime'].unique())
    results_list.append(np.abs(mean_bps_diff_buys))
    results_list.append(np.abs(sd_bps_diff_buys))

    mean_bps_diff_buys, sd_bps_diff_buys, mean_bps_diff_sells, sd_bps_diff_sells, percentiles_diff_vwap_buys, percentiles_diff_vwap_sells = get_algo_performance_by_regime(intraday_regime_train, intraday_regime_test, daily_regime_test[[VWAP, "regime"]], dynamic_flag=True, regimes=regime_intraday['regime'].unique())
    results_list.append(np.abs(mean_bps_diff_buys))
    results_list.append(np.abs(sd_bps_diff_buys))
    
    train_features_df, test_features_df = split_train_test_data(features_df[features_ghmm], 2)
    
    model_prediction = fit_model(best_ghmm, features_df[features_ghmm], train_features_df, features_ghmm, pca_flag=pca_flag)
    regime = pd.Series(data = model_prediction, index = features_df.index).rename("regime")
    regime = regime.resample("5T").asfreq().fillna(method="ffill")

    regime_intraday = pd.merge(intraday_date, regime, right_index=True, how='left', left_on='date').dropna()
    regime_intraday = regime_intraday.astype({"regime":"int32"})

    regime_daily = pd.merge(daily_data, regime, left_index=True, right_index=True)
    regime_daily = regime_daily.astype({"regime":"int32"})

    intraday_regime_train, intraday_regime_test = split_train_test_data(regime_intraday, 2)
    daily_regime_train, daily_regime_test = split_train_test_data(regime_daily, 2)

    mean_bps_diff_buys, sd_bps_diff_buys, mean_bps_diff_sells, sd_bps_diff_sells, percentiles_diff_vwap_buys, percentiles_diff_vwap_sells = get_algo_performance_by_regime(intraday_regime_train, intraday_regime_test, daily_regime_test[[VWAP, "regime"]], dynamic_flag=False, regimes=regime_intraday['regime'].unique())
    results_list.append(np.abs(mean_bps_diff_buys))
    results_list.append(np.abs(sd_bps_diff_buys))
    
    mean_bps_diff_buys, sd_bps_diff_buys, mean_bps_diff_sells, sd_bps_diff_sells, percentiles_diff_vwap_buys, percentiles_diff_vwap_sells = get_algo_performance_by_regime(intraday_regime_train, intraday_regime_test, daily_regime_test[[VWAP, "regime"]], dynamic_flag=True, regimes=regime_intraday['regime'].unique())
    results_list.append(np.abs(mean_bps_diff_buys))
    results_list.append(np.abs(sd_bps_diff_buys))
    results_list.append(from_year)
    
    results_df = get_results_df(current_ticker, results_list)
    
    return results_df

In [5]:
warnings.filterwarnings("ignore")
DATAPATH = os.getcwd()+"\\Data\\"
filepath_anon = os.path.join(DATAPATH, "volume_price_2014_18.csv")

In [6]:
files_list = []
for root, dirs, files in os.walk(DATAPATH):
    for file in files:
        files_list.append(file)

In [7]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[0]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 100} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -49713.46512293637
Scores obtained -> BIC: -49452.5222518122, AIC: -49974.407994060544, SC: 0.45856943572018466
Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 800, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -47920.259678147464
Scores obtained -> BIC: -47659.31680702329, AIC: -48181.202549271635, SC: 0.45856943572018466
Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 400, 'algorithm': 'map'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -232288168.65812796
Scores obtained -> BIC: -232287907.71525

Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
AAPL,1.396556,11.599807,1.055697,23.878009,1.218248,12.204901,0.794599,19.833242,1.300952,12.171341,0.562963,19.552535,1.315597,12.266464,0.698526,14.938616,1900.0


In [9]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[1]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 400} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -49137.23679937182
Scores obtained -> BIC: -48883.34357826292, AIC: -49391.13002048072, SC: 0.36817498665969095
Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 200, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -49065.12126010032
Scores obtained -> BIC: -48811.228038991416, AIC: -49319.01448120922, SC: 0.36139801593840737
Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 200, 'algorithm': 'viterbi'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -185181854.9789548
Scores obtained -> BIC: -185181601.085

Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
AIR,0.90639,15.675521,0.370445,21.824672,0.704292,16.739214,0.549009,20.992999,0.797039,16.37085,0.483194,20.934555,0.886684,15.495813,0.331654,22.787592,1900.0


In [10]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[2]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 200} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -54190.294283740805
Scores obtained -> BIC: -53927.593454640584, AIC: -54452.99511284103, SC: 0.2226565660257407
Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 100, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -54101.55492915321
Scores obtained -> BIC: -53838.85410005298, AIC: -54364.25575825343, SC: 0.22875581708991777
Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 100, 'algorithm': 'map'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -252973161.50148827
Scores obtained -> BIC: -252972898.800659

Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
BBVA.MC,0.266778,12.583242,0.13542,9.797298,0.109441,12.482267,0.157663,9.855115,0.204867,12.462596,0.222935,9.885233,0.179276,12.458125,0.782849,11.251285,1900.0


In [21]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[3]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 400} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -42121.24204908013
Scores obtained -> BIC: -41867.287007611216, AIC: -42375.19709054904, SC: 0.4080436752172536
Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 100, 'weight_concentration_prior_type': 'dirichlet_process'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -42127.56609763115
Scores obtained -> BIC: -41873.61105616224, AIC: -42381.52113910006, SC: 0.3870022148673622
Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 800, 'algorithm': 'viterbi'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -165984183.63915417
Scores obtained -> BIC: -165983929.6841127, 

Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
BMW,1.26112,12.162185,0.499717,10.58948,1.394496,12.775934,0.569786,12.383983,1.220997,12.539211,0.128976,11.991589,1.343005,13.308322,0.68094,14.690432,1900.0


In [7]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[4]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 800} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -37578.9356789164
Scores obtained -> BIC: -37335.86628352295, AIC: -37822.005074309855, SC: 0.11281079911425275
Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 800, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -37952.77027734999
Scores obtained -> BIC: -37709.70088195654, AIC: -38195.839672743445, SC: 0.03387961609925383
Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 800, 'algorithm': 'map'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -112064948.02010027
Scores obtained -> BIC: -112064704.950704

Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
BA,0.540487,18.645351,1.544329,63.028557,0.671836,21.194707,1.384319,61.985798,0.871525,21.208387,1.335872,61.052128,0.774279,23.047416,0.829397,69.125629,1900.0


In [8]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[5]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 100} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -48201.38108325094
Scores obtained -> BIC: -47944.56201396566, AIC: -48458.20015253622, SC: 0.42913662847482636
Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 100, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -47201.99960586132
Scores obtained -> BIC: -46945.18053657604, AIC: -47458.8186751466, SC: 0.3077288523237861
Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 800, 'algorithm': 'map'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -202632728.02935487
Scores obtained -> BIC: -202632471.21028557,

Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
CBK,0.603572,23.493326,2.974795,52.810821,0.531574,25.252083,0.813773,21.018474,0.367233,25.513056,0.981023,17.886947,0.452979,24.819498,1.255998,21.500784,1900.0


In [9]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[6]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 800} using ['antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -30302.589694860733
Scores obtained -> BIC: -30130.52187312325, AIC: -30474.657516598218, SC: 0.2665582742926393
Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 400, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -30281.540952379295
Scores obtained -> BIC: -30109.47313064181, AIC: -30453.60877411678, SC: 0.38472462093954835
Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 200, 'algorithm': 'map'} using ['antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -88462614.17581408
Scores obtained -> BIC: -88462442.10799234, AIC: -88462786.24363582, SC: 0.29593648793

Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
E-DGE,0.474081,6.121075,0.98286,17.181186,0.433139,5.845486,0.960531,17.250446,0.326132,6.09532,1.158661,17.431197,0.417354,5.772672,0.910374,17.233714,1900.0


In [11]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[8]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 200} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -49292.43705765411
Scores obtained -> BIC: -49041.9461133036, AIC: -49542.92800200462, SC: 0.2677558451540951
Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 100, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -49200.79787365438
Scores obtained -> BIC: -48950.30692930387, AIC: -49451.28881800489, SC: 0.2677558451540951
Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 800, 'algorithm': 'viterbi'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -170582614.73368025
Scores obtained -> BIC: -170582364.242735

Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
ENGI,0.442662,8.249448,1.932665,29.826655,0.266005,7.59196,1.908224,26.915698,0.475933,8.610341,2.039622,26.884803,0.473452,8.658364,1.921555,25.729234,1900.0


In [9]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[11]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 800} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -43537.511761113856
Scores obtained -> BIC: -43294.97577131097, AIC: -43780.04775091674, SC: 0.3108116414354489
Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 800, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -43448.39026060919
Scores obtained -> BIC: -43205.8542708063, AIC: -43690.92625041207, SC: 0.31630658968619557
Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 200, 'algorithm': 'viterbi'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -121018275.84669065
Scores obtained -> BIC: -121018033.3107

Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
GE,0.82966,19.244686,4.557188,50.963703,0.565566,20.536893,0.735221,23.477266,0.70878,19.662419,0.212981,24.276684,0.702781,19.802579,0.09801,23.114468,1900.0


In [10]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[12]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 800} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -50418.192108797855
Scores obtained -> BIC: -50156.48376233252, AIC: -50679.90045526319, SC: 0.5190927477715457
Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 100, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -50341.73007842263
Scores obtained -> BIC: -50080.0217319573, AIC: -50603.438424887965, SC: 0.5190927477715457
Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 200, 'algorithm': 'viterbi'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -236331193.64290512
Scores obtained -> BIC: -236330931.9345

Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
GILD,0.199523,18.308264,1.196985,25.519445,0.131664,20.439463,0.988899,19.089083,0.059834,20.065031,1.03651,19.362304,0.104065,19.330765,1.128477,21.445808,1900.0


In [11]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[13]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 100} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -36334.48886412545
Scores obtained -> BIC: -36094.1406671307, AIC: -36574.83706112021, SC: 0.46897554899636723
Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 400, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -36255.6311013885
Scores obtained -> BIC: -36015.28290439374, AIC: -36495.97929838325, SC: 0.46897554899636723
Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 800, 'algorithm': 'map'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -96702399.97414103
Scores obtained -> BIC: -96702159.62594403, A

Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
E-GSK,0.080696,8.800301,0.579514,32.253741,0.118156,9.593992,0.332072,27.382576,0.259116,9.21672,0.095817,27.514847,0.144794,8.777235,0.27715,29.114177,1900.0


In [7]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[14]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 800} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -54212.876082991905
Scores obtained -> BIC: -53952.226573351894, AIC: -54473.52559263191, SC: 0.44410413687863076
Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 800, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -54141.14075478923
Scores obtained -> BIC: -53880.49124514923, AIC: -54401.79026442924, SC: 0.44410413687863076
Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 400, 'algorithm': 'viterbi'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -244662624.86149594
Scores obtained -> BIC: -244662364.2

Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
IBE.MC,0.641527,7.381247,1.296622,9.758614,0.563427,7.520346,0.725571,10.561706,0.466294,7.441017,0.519942,9.509268,0.465401,7.461332,0.645248,7.757175,1900.0


In [8]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[15]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 400} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -69986.73414913545
Scores obtained -> BIC: -69724.41534792678, AIC: -70249.0529503441, SC: 0.1352696204695562
Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 800, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -69976.60296694552
Scores obtained -> BIC: -69714.28416573686, AIC: -70238.92176815418, SC: 0.5837653314516119
Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 100, 'algorithm': 'map'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -308538330.354954
Scores obtained -> BIC: -308538068.03615284, AI

Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
MEL.MC,2.964914,14.428485,4.59661,32.858181,5.689878,11.955058,3.8414,25.62191,6.371653,11.972397,3.568246,25.464532,5.308939,12.041709,3.592876,24.507373,1900.0


In [9]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[16]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 100} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -60155.279335302905
Scores obtained -> BIC: -59893.44817740392, AIC: -60417.110493201886, SC: 0.23588557402526716
Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 100, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -60143.41753192597
Scores obtained -> BIC: -59881.586374026985, AIC: -60405.24868982495, SC: 0.19173873351196605
Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 400, 'algorithm': 'viterbi'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -276684307.095999
Scores obtained -> BIC: -276684045.26

Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
MSFT,1.657929,8.527853,1.790677,13.158808,1.556506,8.804837,1.239641,16.460207,1.712217,9.174056,1.686336,11.727775,1.668609,9.010596,0.903028,12.07658,1900.0


In [11]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[18]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 200} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -48312.05033792931
Scores obtained -> BIC: -48057.4212819401, AIC: -48566.679393918515, SC: 0.1360878661780655
Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 100, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -48055.3961142284
Scores obtained -> BIC: -47800.767058239195, AIC: -48310.02517021761, SC: 0.3309160788367445
Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 800, 'algorithm': 'map'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -185691956.0270322
Scores obtained -> BIC: -185691701.3979762, A

Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
NOKIA,0.086009,11.5771,0.249896,12.642312,0.195982,11.610466,0.253659,14.86872,0.064374,11.513144,0.290547,14.131441,0.089444,11.512577,0.391641,15.953926,1900.0


In [12]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[19]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 3, 'covariance_type': 'full', 'max_iter': 200} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -52208.62026222496
Scores obtained -> BIC: -52013.628972045124, AIC: -52403.6115524048, SC: 0.40315729386736776
Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 100, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -53465.10155271906
Scores obtained -> BIC: -53204.064825542824, AIC: -53726.1382798953, SC: 0.40315729386736776
Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 800, 'algorithm': 'viterbi'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -247628351.7986591
Scores obtained -> BIC: -247628090.7619

Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
REE.MC,0.09871,11.124457,0.56101,11.219609,0.215237,11.643029,0.402391,12.941795,0.129565,11.521283,0.469637,12.718066,0.165544,11.549258,0.314586,12.506002,1900.0


In [13]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[20]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 400} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -50860.23346065102
Scores obtained -> BIC: -50603.31074208194, AIC: -51117.156179220096, SC: 0.3267564151532071
Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 100, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -50761.02694733031
Scores obtained -> BIC: -50504.104228761236, AIC: -51017.94966589939, SC: 0.3267564151532071
Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 800, 'algorithm': 'map'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -209926577.9406885
Scores obtained -> BIC: -209926321.01796994

Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
GLE,0.419535,14.966104,2.284036,33.529292,0.523752,15.691588,1.581185,29.363194,0.55806,15.34826,1.297808,28.940103,0.519884,15.658198,1.530818,28.061997,1900.0


In [14]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[21]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 400} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -44107.955682610074
Scores obtained -> BIC: -43865.762737468634, AIC: -44350.14862775152, SC: 0.40701423565104505
Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 200, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -43801.286521924645
Scores obtained -> BIC: -43559.093576783205, AIC: -44043.47946706609, SC: 0.4082407293074416
Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 800, 'algorithm': 'map'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -120207390.25710732
Scores obtained -> BIC: -120207148.0641

Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
TMUS,0.110875,11.210664,0.405008,20.176466,0.003075,11.626304,0.227036,13.687671,0.059097,11.530482,0.634274,17.522131,0.09499,11.306249,0.619024,18.066286,1900.0


In [15]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[22]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 400} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -50821.82747838804
Scores obtained -> BIC: -50559.07664325694, AIC: -51084.57831351914, SC: 0.3828172102997777
Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 400, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -50945.93449256355
Scores obtained -> BIC: -50683.183657432455, AIC: -51208.68532769465, SC: 0.3828172102997777
Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 100, 'algorithm': 'map'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -248139946.9236449
Scores obtained -> BIC: -248139684.17280975,

Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
TEF.MC,0.499464,8.237029,0.981296,14.400765,0.534034,8.353757,0.018242,7.806309,0.554722,8.348743,0.028648,7.872063,0.590341,8.459532,0.273992,7.867596,1900.0


In [16]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[23]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 200} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -38202.94085309982
Scores obtained -> BIC: -37962.11524738908, AIC: -38443.766458810554, SC: 0.3661082907964777
Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 200, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -38120.67458248146
Scores obtained -> BIC: -37879.84897677072, AIC: -38361.50018819219, SC: 0.3178062980748289
Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 200, 'algorithm': 'viterbi'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -105802776.48677117
Scores obtained -> BIC: -105802535.6611

Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
E-TSCO,0.776956,10.879526,0.247111,28.709747,0.501435,10.036283,0.231492,26.656607,0.73012,9.349849,0.12971,26.411661,0.750176,10.403831,0.368679,27.728626,1900.0


In [17]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[24]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 400} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -45142.13285636662
Scores obtained -> BIC: -44885.31378708134, AIC: -45398.9519256519, SC: 0.126781497848353
Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 100, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -45078.8101819694
Scores obtained -> BIC: -44821.99111268412, AIC: -45335.62925125468, SC: 0.05562762881369224
Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 200, 'algorithm': 'map'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -182798662.2259783
Scores obtained -> BIC: -182798405.40690905, AI

Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
VIV,0.875276,11.210578,1.910846,16.729966,0.730438,9.812767,2.255052,18.328339,0.448667,10.254926,2.156773,18.390656,0.459484,9.923153,2.123206,20.331352,1900.0


In [18]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[25]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 800} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -41266.13459402225
Scores obtained -> BIC: -41023.258371979915, AIC: -41509.01081606459, SC: 0.060067675543206286
Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 400, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -41196.08001918852
Scores obtained -> BIC: -40953.20379714618, AIC: -41438.95624123086, SC: 0.05971820383939019
Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 800, 'algorithm': 'map'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Combined BIC-AIC score of -118723710.00694957
Scores obtained -> BIC: -118723467.13072

Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
WMT,0.129241,6.494737,0.607454,8.863935,0.087651,7.01158,0.047236,9.987748,0.058395,7.17887,0.269019,10.410481,0.097132,6.839754,0.350295,8.390956,1900.0


In [None]:
all_results = None
for instrument in files_list:
    raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[3]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
    df = format_data(raw)
    df_less_days = df[df.index.year >= 1900]
    open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
    intraday_data = get_intraday_data(df_less_days, open_time, close_time)
    daily_data = get_daily_data(intraday_data)
    current_ticker = df.ticker.iloc[0]
    
    this_ticker_results = get_full_algo_performance(daily_data, intraday_data, current_ticker)
    if all_results == None:
        all_results = this_ticker_results
    else:
        all_results = pd.concat([all_results, this_ticker_results])