# Backtest VWAP performance

## Imports

In [1]:
import os
import pandas as pd
import numpy as np
import warnings
import math

from datetime import datetime
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split, TimeSeriesSplit, GridSearchCV
from sklearn.mixture import GaussianMixture, BayesianGaussianMixture
from sklearn.metrics import silhouette_score
from hmmlearn.hmm import GaussianHMM, GMMHMM, MultinomialHMM

from dateutil.relativedelta import relativedelta
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
from itertools import chain, combinations

def powerset(iterable):
    s = list(iterable)
    return chain.from_iterable(combinations(s, r) for r in range(1, len(s)+1))

In [3]:
## GLOBAL VARS
## GLOBAL VARS
R_DF_COL_NAMES = ['mean_static', 'sd_static', 
                  'mean_dynamic', 'sd_dynamic',
                  'mean_static_gmm', 'sd_static_gmm', 
                  'mean_dynamic_gmm', 'sd_dynamic_gmm', 
                  'mean_static_bgm', 'sd_static_bgm', 
                  'mean_dynamic_bgm', 'sd_dynamic_bgm', 
                  'mean_static_ghmm', 'sd_static_ghmm', 
                  'mean_dynamic_ghmm', 'sd_dynamic_ghmm', 
                  'from_year']
DATE_TIME = 'date_time'
DATE = 'date'
TIME = 'time'
OPEN_PRICE = 'open'
HIGH_PRICE = 'high'
LOW_PRICE = 'low'
CLOSE_PRICE = 'close'
VOLUME = 'volume'
TURNOVER = 'turnover'
VWAP = 'vwap'
FEATURES = ['high_low_spread', "open_close_rets", "log_total_traded_vol", "daily_log_return", "short_term_vol", "long_term_vol", "money_flow_index"]

## Functions

In [4]:
def load_data(file):
    filepath = os.path.join(DATAPATH, file)
    df = pd.read_csv("file:///" + filepath, parse_dates=[['<DTYYYYMMDD>', '<TIME>']])

    return df

def format_data(df):
    returned_df = df.copy()
    returned_df.drop(labels=["<PER>", "<OPENINT>"], axis="columns", inplace=True)
    returned_df.columns = ['date_time', 'ticker', 'open', 'high', 'low', 'close', 'volume']
    returned_df.set_index('date_time', drop=True, inplace=True)
    returned_df = add_datetime(returned_df)

    return returned_df

def get_open_close_time(df):
    open_time = df.time.min()
    close_time = df.time.max()
    
    return open_time, close_time

def get_open_close_auction_time(df):
    open_time = df.time.min()
    auction_time = df.time.max()
    close_time = df[df[TIME] != df.time.max()].time.max()
    
    return open_time, close_time, auction_time

def get_intraday_data(df, start_time, end_time):
    df = df.between_time(open_time, end_time)
    df[TURNOVER] = df[VOLUME]*df[CLOSE_PRICE]
    df_resampled = df.resample('5T').agg({OPEN_PRICE: "first", 
                                          LOW_PRICE: "min", 
                                          HIGH_PRICE: "max", 
                                          CLOSE_PRICE: "last", 
                                          VOLUME: "sum", 
                                          TURNOVER: "sum"})
    df_resampled[VWAP] = df_resampled[TURNOVER]/df_resampled[VOLUME]
    df_resampled = df_resampled[(df_resampled.index.weekday != 5) | (df_resampled.index.weekday != 6)]
#     open_resampled = df_resampled.index.time.min()
#     close_resmapled = df_resampled.index.time.max()
    
#     print(open_resampled)
#     print(close_resampled)
    
#     df_resampled = df_resampled.between_time(open_resampled, close_resampled)
#     df_resampled.fillna(method='ffill', inplace=True)
    df_resampled.dropna(inplace=True)

    return df_resampled

def get_daily_data(df):
    daily_data = df.groupby(df.index.date).agg({OPEN_PRICE: "first", 
                                                LOW_PRICE: "min",
                                                HIGH_PRICE: "max", 
                                                CLOSE_PRICE: "last", 
                                                VOLUME: "sum", 
                                                TURNOVER: "sum"})
    daily_data[VWAP] = daily_data.turnover/daily_data.volume
    daily_data = daily_data[daily_data[VOLUME] > 0.1]
    daily_data = daily_data[(daily_data.index != 6) & (daily_data.index != 7)]
    daily_data.index = pd.to_datetime(daily_data.index)
    daily_data = daily_data.resample('B').first()
    daily_data.dropna(inplace=True)
    
    
    return daily_data

def split_train_test_data(df, size_in_years):
    training_data = df[df.index[0]:df.index[-1] - relativedelta(years=size_in_years, hours=-9, minutes = -5)]
    test_data = df[df.index[-1] - relativedelta(years=size_in_years, hours=-9):]

    return training_data, test_data

def get_weekdays_data_dict(df):
    weekdays_data_dict = {}
    weekdays_data_dict[0] = df[df.index.weekday == 0]
    weekdays_data_dict[1] = df[df.index.weekday == 1]
    weekdays_data_dict[2] = df[df.index.weekday == 2]
    weekdays_data_dict[3] = df[df.index.weekday == 3]
    weekdays_data_dict[4] = df[df.index.weekday == 4]
    weekdays_data_dict['else'] = df

    return weekdays_data_dict

def get_static_vol_predictor_by_weekday(data, weekdays_data_dict):
    static_vol_predictor = {}
    static_vol_predictor[0] = get_norm_static_vol_predictor(weekdays_data_dict.get(0))
    static_vol_predictor[1] = get_norm_static_vol_predictor(weekdays_data_dict.get(1))
    static_vol_predictor[2] = get_norm_static_vol_predictor(weekdays_data_dict.get(2))
    static_vol_predictor[3] = get_norm_static_vol_predictor(weekdays_data_dict.get(3))
    static_vol_predictor[4] = get_norm_static_vol_predictor(weekdays_data_dict.get(4))
    static_vol_predictor['else'] = get_norm_static_vol_predictor(data)

    return static_vol_predictor

def get_adv_median_by_weekday(data, weekdays_data_dict):
    adv_median = {}
    adv_median[0] = get_adv_median(weekdays_data_dict.get(0))
    adv_median[1] = get_adv_median(weekdays_data_dict.get(1))
    adv_median[2] = get_adv_median(weekdays_data_dict.get(2))
    adv_median[3] = get_adv_median(weekdays_data_dict.get(3))
    adv_median[4] = get_adv_median(weekdays_data_dict.get(4))
    adv_median['else'] = get_adv_median(data)

    return adv_median
  
def get_data_by_weekday(df, weekday):
    df_weekday = df[df.index.weekday == weekday]

    return df_weekday

def add_datetime(df):
    returned_df = df.copy()
    returned_df['date'] = pd.to_datetime(returned_df.index.date)
    returned_df['time'] = pd.to_datetime(returned_df.index, format = "%m-%d-%Y %H:%M:%S")
    returned_df['time'] = returned_df['time'].apply(lambda x: x.strftime('%H:%M:%S'))

    return returned_df

def get_norm_static_vol_predictor(df):
    static_volume_predictor = df.groupby(by=df.index.time)[VOLUME].median()
    norm_static_volume_predictor = static_volume_predictor/sum(static_volume_predictor)

    return norm_static_volume_predictor

def get_reversed_cum_vol(data):
    reversed_cumvol = []
    for day in np.unique(data.index.date):
        reversed_cumvol.append(data[data.index.date == day][VOLUME].cumsum().values[::-1])

    reversed_cumvol = pd.Series(data = np.concatenate(reversed_cumvol), index = data.index)

    return reversed_cumvol

def get_adv_median(df):
    return df['volume'].groupby(df.index.date).sum().median()

def get_adv_mean(df):
    return df['volume'].groupby(df.index.date).sum().median()

def get_daily_VWAP(df):
    return df[TURNOVER].groupby(df.index.date).sum()/df[VOLUME].groupby(df.index.date).sum()
  
def get_vol_predictor_next_bin(data, static_vol_predictor, adv_median):
    volume_predictor_next_interval = data.groupby(data.index.date)[VOLUME].shift(1)
    volume_predictor_next_interval.fillna(int (adv_median.get('else')*static_vol_predictor.get('else').iloc[0]), inplace=True)

    return volume_predictor_next_interval
  
def vwap_static_execution_algo(data, norm_static_vol_predictor, amount_shares, order_side, start_time, end_time, day):
    if day.weekday() == 0:
        vol_predictor = norm_static_vol_predictor.get(0)[start_time:end_time]
    elif day.weekday() == 1:
        vol_predictor = norm_static_vol_predictor.get(1)[start_time:end_time]
    elif day.weekday() == 2:
        vol_predictor = norm_static_vol_predictor.get(2)[start_time:end_time]
    elif day.weekday() == 3:
        vol_predictor = norm_static_vol_predictor.get(3)[start_time:end_time]
    elif day.weekday() == 4:
        vol_predictor = norm_static_vol_predictor.get(4)[start_time:end_time]
    else:
        vol_predictor = norm_static_vol_predictor.get('else')[start_time:end_time]
        
    shares_per_interval = vol_predictor*amount_shares
    
    data['time'] = data.index.time
    data = data.reset_index()
    data = data.set_index(data.time)
    exec_price = (data[HIGH_PRICE]+data[LOW_PRICE])/2
    
    vwap_this_exec_this_day = sum(shares_per_interval*exec_price)/sum(shares_per_interval)
#     vwap_this_exec_this_day = sum(shares_per_interval*data[VWAP])/sum(shares_per_interval)

    return vwap_this_exec_this_day

def vwap_dynamic_execution_algo(data, reversed_cumvol, static_vol_predictor, volume_predictor_next_interval, amount_shares, order_side, start_time, end_time, day):
    shares_per_interval = []
    if day.weekday() == 0:
        shares_per_interval.append(static_vol_predictor.get(0).iloc[0]*amount_shares)
        vol_predictor = reversed_cumvol[reversed_cumvol.index.weekday == 0].groupby(reversed_cumvol[reversed_cumvol.index.weekday == 0].index.time).median()
    elif day.weekday() == 1:
        shares_per_interval.append(static_vol_predictor.get(1).iloc[0]*amount_shares)
        vol_predictor = reversed_cumvol[reversed_cumvol.index.weekday == 1].groupby(reversed_cumvol[reversed_cumvol.index.weekday == 1].index.time).median()
    elif day.weekday() == 2:
        shares_per_interval.append(static_vol_predictor.get(2).iloc[0]*amount_shares)
        vol_predictor = reversed_cumvol[reversed_cumvol.index.weekday == 2].groupby(reversed_cumvol[reversed_cumvol.index.weekday == 2].index.time).median()
    elif day.weekday() == 3:
        shares_per_interval.append(static_vol_predictor.get(3).iloc[0]*amount_shares)
        vol_predictor = reversed_cumvol[reversed_cumvol.index.weekday == 3].groupby(reversed_cumvol[reversed_cumvol.index.weekday == 3].index.time).median()
    elif day.weekday() == 4:
        shares_per_interval.append(static_vol_predictor.get(4).iloc[0]*amount_shares)
        vol_predictor = reversed_cumvol[reversed_cumvol.index.weekday == 4].groupby(reversed_cumvol[reversed_cumvol.index.weekday == 4].index.time).median()
    else:
        shares_per_interval.append(static_vol_predictor.get('else').iloc[0]*amount_shares)
        vol_predictor = reversed_cumvol.groupby(reversed_cumvol.index.time).median()

    for i in range(1, len(data)):
        num = data[VOLUME].iloc[:i].sum()+volume_predictor_next_interval[volume_predictor_next_interval.index.date == day].iloc[i]
        denom = data[VOLUME].iloc[:i].sum()+vol_predictor[i]
        op = amount_shares*(num/denom)
        shares_next_interval = op - sum(shares_per_interval)
        #     shares_next_interval = (amount_shares*((data['volume'].iloc[0:i].sum()+volume_predictor_next_interval.xs(day, level='Date').iloc[i])/(data['volume'].iloc[0:i].sum()+volPredictor[i])))-sum(shares_per_interval)
        shares_per_interval.append(shares_next_interval)

    exec_price = (data[HIGH_PRICE]+data[LOW_PRICE])/2
    
    vwap_this_exec_this_day = sum(shares_per_interval*exec_price)/sum(shares_per_interval)
#     vwap_this_exec_this_day = sum(shares_per_interval*data[VWAP])/sum(shares_per_interval)

    return vwap_this_exec_this_day

def backtest_algo_static(training_data, test_data, adv_median, norm_static_vol_predictor):
    
    backtest_sell_vwap = []
    backtest_buy_vwap = []

    for day in np.unique(test_data.index.date):
        data = test_data[test_data.index.date == day]

        if day.weekday() == 0:
            amount_shares = adv_median.get(0)*0.1
        elif day.weekday() == 1:
            amount_shares = adv_median.get(1)*0.1
        elif day.weekday() == 2:
            amount_shares = adv_median.get(2)*0.1
        elif day.weekday() == 3:
            amount_shares = adv_median.get(3)*0.1
        elif day.weekday() == 4:
            amount_shares = adv_median.get(4)*0.1
        else:
            amount_shares = adv_median.get('else')*0.1
            
        sell_exec = vwap_static_execution_algo(data, norm_static_vol_predictor, amount_shares, 'sell', data.index[0].time(), data.index[-1].time(), day)
        buy_exec = vwap_static_execution_algo(data, norm_static_vol_predictor, amount_shares, 'buy', data.index[0].time(), data.index[-1].time(), day)
        
        backtest_sell_vwap.append(sell_exec)
        backtest_buy_vwap.append(buy_exec)

    return backtest_buy_vwap, backtest_sell_vwap

def backtest_algo_dynamic(train_data, test_data, adv_median, static_vol_predictor):
    reversed_cum_vol = get_reversed_cum_vol(train_data)
    vol_predictor_next_bin = get_vol_predictor_next_bin(test_data, static_vol_predictor, adv_median)

    backtest_sell_vwap_dynamic = []
    backtest_buy_vwap_dynamic = []
    for day in np.unique(test_data.index.date):
        data = test_data[test_data.index.date == day]
        if day.weekday() == 0:
            amount_shares = adv_median.get(0)*0.1
        elif day.weekday() == 1:
            amount_shares = adv_median.get(1)*0.1
        elif day.weekday() == 2:
            amount_shares = adv_median.get(2)*0.1
        elif day.weekday() == 3:
            amount_shares = adv_median.get(3)*0.1
        elif day.weekday() == 4:
            amount_shares = adv_median.get(4)*0.1
        else:
            amount_shares = adv_median.get('else')*0.1
            
        backtest_sell_vwap_dynamic.append(vwap_dynamic_execution_algo(data, 
                                                                      reversed_cum_vol,
                                                                      static_vol_predictor,
                                                                      vol_predictor_next_bin,
                                                                      amount_shares,
                                                                      'sell', 
                                                                      data.index[0].time(),
                                                                      data.index[-1].time(),
                                                                      day))
        backtest_buy_vwap_dynamic.append(vwap_dynamic_execution_algo(data, 
                                                                     reversed_cum_vol,
                                                                     static_vol_predictor,
                                                                     vol_predictor_next_bin,
                                                                     amount_shares,
                                                                     'buy',
                                                                     data.index[0].time(),
                                                                     data.index[-1].time(),
                                                                     day))

    return backtest_buy_vwap_dynamic, backtest_sell_vwap_dynamic
  
def get_algo_performance(train_intraday_data, test_intraday_data, train_daily_data, test_daily_data, dynamic_flag):
    daily_vwap = test_daily_data[VWAP]
    weekdays_data_dict = get_weekdays_data_dict(train_intraday_data)
    static_vol_predictor = get_static_vol_predictor_by_weekday(train_intraday_data, weekdays_data_dict)
    adv_median = get_adv_median_by_weekday(train_intraday_data, weekdays_data_dict)

    if dynamic_flag:
        backtest_buy_vwap, backtest_sell_vwap = backtest_algo_dynamic(train_intraday_data, test_intraday_data, adv_median, static_vol_predictor)
    else:
        backtest_buy_vwap, backtest_sell_vwap = backtest_algo_static(train_intraday_data, test_intraday_data, adv_median, static_vol_predictor)

    static_vwap_comparison = pd.DataFrame(data=daily_vwap.values.tolist(), index=daily_vwap.index, columns=['market_vwap'])
    static_vwap_comparison['backtest_buy_vwap'] = backtest_buy_vwap
    static_vwap_comparison['backtest_sell_vwap'] = backtest_sell_vwap

    static_vwap_comparison['diff_vwap_bps_buy'] = 1e4*(static_vwap_comparison['backtest_buy_vwap']-static_vwap_comparison['market_vwap'])/static_vwap_comparison['market_vwap']
    static_vwap_comparison['diff_vwap_bps_sell'] = -1e4*(static_vwap_comparison['backtest_sell_vwap']-static_vwap_comparison['market_vwap'])/static_vwap_comparison['market_vwap']

    mean_bps_diff_buys = static_vwap_comparison['diff_vwap_bps_buy'].mean()
    sd_bps_diff_buys = static_vwap_comparison['diff_vwap_bps_buy'].std()
    mean_bps_diff_sells = static_vwap_comparison['diff_vwap_bps_sell'].mean()
    sd_bps_diff_sells = static_vwap_comparison['diff_vwap_bps_sell'].std()

    percentiles_diff_vwap_sells = {'1': static_vwap_comparison['diff_vwap_bps_sell'].quantile(0.01),
                               '5': static_vwap_comparison['diff_vwap_bps_sell'].quantile(0.05),
                               '95': static_vwap_comparison['diff_vwap_bps_sell'].quantile(0.95),
                               '99': static_vwap_comparison['diff_vwap_bps_sell'].quantile(0.99)}
    percentiles_diff_vwap_buys = {'1': static_vwap_comparison['diff_vwap_bps_buy'].quantile(0.01),
                               '5': static_vwap_comparison['diff_vwap_bps_buy'].quantile(0.05),
                               '95': static_vwap_comparison['diff_vwap_bps_buy'].quantile(0.95),
                               '99': static_vwap_comparison['diff_vwap_bps_buy'].quantile(0.99)}

    return mean_bps_diff_buys, sd_bps_diff_buys, mean_bps_diff_sells, sd_bps_diff_sells, percentiles_diff_vwap_buys, percentiles_diff_vwap_sells

def backtest_algo_static_by_regime(training_data, test_data, adv_median, static_vol_predictor):
    backtest_sell_vwap = []
    backtest_buy_vwap = []

    for day in np.unique(test_data.index.date):
        data = test_data[test_data.index.date == day]
        regime = data['regime'].iloc[0]
        
        if day.weekday() == 0:
            amount_shares = adv_median.get(regime).get(0)*0.1
        elif day.weekday() == 1:
            amount_shares = adv_median.get(regime).get(1)*0.1
        elif day.weekday() == 2:
            amount_shares = adv_median.get(regime).get(2)*0.1
        elif day.weekday() == 3:
            amount_shares = adv_median.get(regime).get(3)*0.1
        elif day.weekday() == 4:
            amount_shares = adv_median.get(regime).get(4)*0.1
        else:
            amount_shares = adv_median.get(regime).get('else')*0.1
            
        sell_exec = vwap_static_execution_algo(data,
                                               static_vol_predictor.get(regime),
                                               amount_shares, 'sell',
                                               data.index[0].time(),
                                               data.index[-1].time(), 
                                               day)
        buy_exec = vwap_static_execution_algo(data,
                                              static_vol_predictor.get(regime), 
                                              amount_shares,
                                              'buy',
                                              data.index[0].time(), 
                                              data.index[-1].time(),
                                              day)
        
        backtest_sell_vwap.append(sell_exec)
        backtest_buy_vwap.append(buy_exec)

    return backtest_buy_vwap, backtest_sell_vwap

def backtest_algo_dynamic_by_regime(training_data, test_data, adv_median, static_vol_predictor, regimes):
    regime_reversed_cum_vol = {}
    regime_vol_predictor_next_bin = {}
    for regime in regimes:
        regime_reversed_cum_vol[regime] = get_reversed_cum_vol(training_data[training_data['regime']==regime])
        regime_vol_predictor_next_bin[regime] = get_vol_predictor_next_bin(test_data[test_data['regime']==regime], 
                                                                           static_vol_predictor.get(regime),
                                                                           adv_median.get(regime))

    backtest_sell_vwap_dynamic = []
    backtest_buy_vwap_dynamic = []
    for day in np.unique(test_data.index.date):
        data = test_data[test_data.index.date == day]
        regime = data['regime'].iloc[0]
        if day.weekday() == 0:
            amount_shares = adv_median.get(regime).get(0)*0.1
        elif day.weekday() == 1:
            amount_shares = adv_median.get(regime).get(1)*0.1
        elif day.weekday() == 2:
            amount_shares = adv_median.get(regime).get(2)*0.1
        elif day.weekday() == 3:
            amount_shares = adv_median.get(regime).get(3)*0.1
        elif day.weekday() == 4:
            amount_shares = adv_median.get(regime).get(4)*0.1
        else:
            amount_shares = adv_median.get(regime).get('else')*0.1
        
        backtest_sell_vwap_dynamic.append(vwap_dynamic_execution_algo(data, 
                                                                      regime_reversed_cum_vol.get(regime),
                                                                      static_vol_predictor.get(regime),
                                                                      regime_vol_predictor_next_bin.get(regime),
                                                                      amount_shares,
                                                                      'sell', 
                                                                      data.index[0].time(),
                                                                      data.index[-1].time(),
                                                                      day))
        backtest_buy_vwap_dynamic.append(vwap_dynamic_execution_algo(data, 
                                                                     regime_reversed_cum_vol.get(regime),
                                                                     static_vol_predictor.get(regime),
                                                                     regime_vol_predictor_next_bin.get(regime),
                                                                     amount_shares,
                                                                     'buy',
                                                                     data.index[0].time(),
                                                                     data.index[-1].time(),
                                                                     day))
            
        

    return backtest_buy_vwap_dynamic, backtest_sell_vwap_dynamic

def get_algo_performance_by_regime(training_data, test_data, vwap_and_regime, dynamic_flag, regimes):
    regime_daily_vwap = {}
    regime_weekdays_data_dict = {}
    regime_static_vol_predictor = {}
    regime_adv_median = {}
    daily_vwap = vwap_and_regime[VWAP]
    for regime in regimes:
        regime_daily_vwap[regime] = vwap_and_regime[vwap_and_regime['regime'] == regime][VWAP]
        regime_weekdays_data_dict[regime] = get_weekdays_data_dict(training_data[training_data['regime'] == regime])
        regime_static_vol_predictor[regime] = get_static_vol_predictor_by_weekday(training_data[training_data['regime'] == regime],
                                                                                  regime_weekdays_data_dict[regime])
        regime_adv_median[regime] = get_adv_median_by_weekday(training_data[training_data['regime'] == regime], 
                                                              regime_weekdays_data_dict[regime])

    if dynamic_flag:
        backtest_buy_vwap, backtest_sell_vwap = backtest_algo_dynamic_by_regime(training_data, 
                                                                                test_data,
                                                                                regime_adv_median,
                                                                                regime_static_vol_predictor,
                                                                                regimes)
    else:
        backtest_buy_vwap, backtest_sell_vwap = backtest_algo_static_by_regime(training_data,
                                                                               test_data,
                                                                               regime_adv_median,
                                                                               regime_static_vol_predictor)

    static_vwap_comparison = pd.DataFrame(data=daily_vwap.values.tolist(), index=daily_vwap.index, columns=['market_vwap'])
    static_vwap_comparison['backtest_buy_vwap'] = backtest_buy_vwap
    static_vwap_comparison['backtest_sell_vwap'] = backtest_sell_vwap

    static_vwap_comparison['diff_vwap_bps_buy'] = 1e4*(static_vwap_comparison['backtest_buy_vwap']-static_vwap_comparison['market_vwap'])/static_vwap_comparison['market_vwap']
    static_vwap_comparison['diff_vwap_bps_sell'] = -1e4*(static_vwap_comparison['backtest_sell_vwap']-static_vwap_comparison['market_vwap'])/static_vwap_comparison['market_vwap']

    mean_bps_diff_buys = static_vwap_comparison['diff_vwap_bps_buy'].mean()
    sd_bps_diff_buys = static_vwap_comparison['diff_vwap_bps_buy'].std()
    mean_bps_diff_sells = static_vwap_comparison['diff_vwap_bps_sell'].mean()
    sd_bps_diff_sells = static_vwap_comparison['diff_vwap_bps_sell'].std()

    percentiles_diff_vwap_sells = {'1': static_vwap_comparison['diff_vwap_bps_sell'].quantile(0.01),
                               '5': static_vwap_comparison['diff_vwap_bps_sell'].quantile(0.05),
                               '95': static_vwap_comparison['diff_vwap_bps_sell'].quantile(0.95),
                               '99': static_vwap_comparison['diff_vwap_bps_sell'].quantile(0.99)}
    percentiles_diff_vwap_buys = {'1': static_vwap_comparison['diff_vwap_bps_buy'].quantile(0.01),
                               '5': static_vwap_comparison['diff_vwap_bps_buy'].quantile(0.05),
                               '95': static_vwap_comparison['diff_vwap_bps_buy'].quantile(0.95),
                               '99': static_vwap_comparison['diff_vwap_bps_buy'].quantile(0.99)}

    return mean_bps_diff_buys, sd_bps_diff_buys, mean_bps_diff_sells, sd_bps_diff_sells, percentiles_diff_vwap_buys, percentiles_diff_vwap_sells

def get_log_total_trade_vol(df):
    log_total_traded_vol = get_log(get_total_traded_vol(df))
    log_total_traded_vol.name = "log_total_traded_vol"
    
    return log_total_traded_vol 

def get_total_traded_vol(df):
    total_traded_vol = df.resample('B').first()[[VOLUME]]
    total_traded_vol.name = "total_traded_vol"

    return total_traded_vol

def get_log_open_close_returns(df):
    open_close_returns = np.log(df[OPEN_PRICE]/df[CLOSE_PRICE].shift(-1)).dropna()
    open_close_returns.name = "log_overnight_returns"

    return open_close_returns

def get_log_returns(df):
    close = df[CLOSE_PRICE].resample('B').last()
    log_daily_returns = np.log(close/close.shift(-1))
    log_daily_returns.name = "log_returns"

    return log_daily_returns

def get_high_low_spread(df):
    high_low_spread = (df[HIGH_PRICE]-df[LOW_PRICE])
    high_low_spread.name = "high_low_spread"

    return high_low_spread

def get_log(df):
    return np.log(df)

def get_antilog_adv_median(df):
    antilog_adv = np.exp(get_log(daily_data[VOLUME]).rolling(20, min_periods=5).median().dropna())
    antilog_adv.name = "antilog_adv"
    
    return antilog_adv

def get_money_flow_index(df):
    typical_price = (df[CLOSE_PRICE]+df[HIGH_PRICE]+df[LOW_PRICE])/3
    volume = df[VOLUME]
    money_flow_index = typical_price*volume
    money_flow_index.name = "money_flow_index"

    return money_flow_index

def get_beta_market_impact(df):
    adv_antilog = get_antilog_adv_median(df)
    X = df[VOLUME]/adv_antilog
    Y = ((df[VWAP]-df[OPEN_PRICE])/df[OPEN_PRICE])*1e4
    betas = Y/X
    betas.name = "betas_market_impact"
    
    return betas

def get_features_df(list_of_features):
    features_df = pd.concat(list_of_features, axis=1).dropna()
    features_df = features_df.replace([np.inf, -np.inf], np.nan)
    features_df = features_df.dropna()
    features_df = features_df[features_df != 0.0].shift(1).dropna()
    features_df.rename(columns={VOLUME: "log_total_traded_vol"}, inplace=True)

    return features_df

def fit_model(model, full_data, train_data, list_of_features):
    X = train_data.values
    scaler = MinMaxScaler()
    scaler.fit(X)
    X_scaled = scaler.transform(X)

    if len(list_of_features) > 2:
        pca = PCA(n_components=.95)
        pca.fit(X_scaled)
        X_pca = pca.transform(X_scaled)
        fitted_model = model.fit(X_pca)
        X_full = full_data.values
        X_full_scaled = scaler.transform(X_full)
        X_full_pca = pca.transform(X_full_scaled)
        prediction = fitted_model.predict(X_full_pca)
    else:
        fitted_model = model.fit(X_scaled)
        X_full = full_data.values
        X_full_scaled = scaler.transform(X_full)
        prediction = fitted_model.predict(X_full_scaled)

    return prediction

def get_features_series_list(daily_data):
    log_returns = get_log_returns(daily_data)
    adv_antilog = get_antilog_adv_median(daily_data)
    mfi = get_money_flow_index(daily_data)
    betas_mkt_impact = get_beta_market_impact(daily_data)
    log_traded_vol = get_log_total_trade_vol(daily_data)
    
    features = [log_returns, adv_antilog, mfi, betas_mkt_impact, log_traded_vol]
    
    return features

def silhouette_scorer(model, train, test):
    model.fit(X)
    labels = np.unique(model.predict(test))
    sc = silhouette_score(test, labels)
    
    return sc

class EvaluatedGMM:
    
    def __init__(self, n_components, covariance_type, max_iter, train_data, test_data):
        self.n_components = n_components
        self.covariance_type = covariance_type
        self.max_iter = max_iter
        self.train_data = train_data[train_data.index.year < train_data.index[-1].year]
        self.validation_data = train_data[train_data.index.year >= train_data.index[-1].year]
        self.test_data = test_data
        self.scaler = MinMaxScaler().fit(self.train_data.values)
        self.features = train_data.columns
        if len(train_data.columns) > 2:
            self.pca = PCA(n_components=2).fit(self.scaler.transform(self.train_data.values))
        else:
            self.pca = None
        self.gmm = GaussianMixture(n_components=self.n_components, 
                                   covariance_type=self.covariance_type, 
                                   max_iter=self.max_iter, 
                                   n_init=100)
        
    def fit_model(self):
        if self.pca is not None:
            self.X_train = self.pca.transform(self.scaler.transform(self.train_data.values))
        else:
            self.X_train = self.scaler.transform(self.train_data.values)
        self.gmm_fitted = self.gmm.fit(self.X_train)
        
    def compute_silhouette_score(self):
        if self.pca is not None:
            validation = self.pca.transform(self.scaler.transform(self.validation_data.values))
        else:
            validation = self.scaler.transform(self.validation_data.values)
            
        prediction = self.gmm_fitted.predict(validation)
        labels = np.unique(prediction)
        if len(labels) < 2:
            self.silhouette_score = 0.25
        else:
            self.silhouette_score = silhouette_score(validation, prediction)  
        
        return self.silhouette_score
    
    def get_params(self):
        dict_params = {'n_components': self.n_components, 
                       'covariance_type': self.covariance_type, 
                       'max_iter': self.max_iter}
        
        return dict_params
    
    def get_features(self):
        return self.features
    
class EvaluatedGaussianHMM:
    
    def __init__(self, n_components, covariance_type, max_iter, algo_type, train_data, test_data):
        self.n_components = n_components
        self.covariance_type = covariance_type
        self.max_iter = max_iter
        self.algo_type = algo_type
        self.train_data = train_data[train_data.index.year < train_data.index[-1].year]
        self.validation_data = train_data[train_data.index.year >= train_data.index[-1].year]
        self.test_data = test_data
        self.scaler = MinMaxScaler().fit(self.train_data.values)
        self.features = train_data.columns
        if len(train_data.columns) > 2:
            self.pca = PCA(n_components=2).fit(self.scaler.transform(self.train_data.values))
        else:
            self.pca = None
        self.gaussianHMM = GaussianHMM(n_components=self.n_components, 
                                       covariance_type=self.covariance_type, 
                                       n_iter=self.max_iter, 
                                       algorithm=self.algo_type)
        
    def fit_model(self):
        if self.pca is not None:
            self.X_train = self.pca.transform(self.scaler.transform(self.train_data.values))
        else:
            self.X_train = self.scaler.transform(self.train_data.values)
        self.gaussianHMM_fitted = self.gaussianHMM.fit(self.X_train)
        
    def compute_silhouette_score(self):
        if self.pca is not None:
            validation = self.pca.transform(self.scaler.transform(self.validation_data.values))
        else:
            validation = self.scaler.transform(self.validation_data.values)
            
        prediction = self.gaussianHMM_fitted.predict(validation)
        labels = np.unique(prediction)
        if len(labels) < 2:
            self.silhouette_score = 0.25
        else:
            self.silhouette_score = silhouette_score(validation, prediction)   
        
        return self.silhouette_score
    
    def get_params(self):
        dict_params = {'n_components': self.n_components, 
                       'covariance_type': self.covariance_type, 
                       'max_iter': self.max_iter,
                       'algorithm': self.algo_type}
        
        return dict_params
    
    def get_features(self):
        return self.features
    
class EvaluatedBayesianGM:
    
    def __init__(self, n_components, covariance_type, max_iter, weight_concentration, train_data, test_data):
        self.n_components = n_components
        self.covariance_type = covariance_type
        self.max_iter = max_iter
        self.weight_concentration = weight_concentration
        self.train_data = train_data[train_data.index.year < train_data.index[-1].year]
        self.validation_data = train_data[train_data.index.year >= train_data.index[-1].year]
        self.test_data = test_data
        self.scaler = MinMaxScaler().fit(self.train_data.values)
        self.features = train_data.columns
        if len(train_data.columns) > 2:
            self.pca = PCA(n_components=2).fit(self.scaler.transform(self.train_data.values))
        else:
            self.pca = None
        self.bayesianGM = BayesianGaussianMixture(n_components=self.n_components, 
                                                  covariance_type=self.covariance_type, 
                                                  max_iter=self.max_iter, 
                                                  weight_concentration_prior_type=self.weight_concentration)
        
    def fit_model(self):
        if self.pca is not None:
            self.X_train = self.pca.transform(self.scaler.transform(self.train_data.values))
        else:
            self.X_train = self.scaler.transform(self.train_data.values)
        self.bayesianGM_fitted = self.bayesianGM.fit(self.X_train)
        
    def compute_silhouette_score(self):
        if self.pca is not None:
            validation = self.pca.transform(self.scaler.transform(self.validation_data.values))
        else:
            validation = self.scaler.transform(self.validation_data.values)
            
        prediction = self.bayesianGM_fitted.predict(validation)
        labels = np.unique(prediction)
        if len(labels) < 2:
            self.silhouette_score = 0.25
        else:
            self.silhouette_score = silhouette_score(validation, prediction) 
        
        return self.silhouette_score
    
    def get_params(self):
        dict_params = {'n_components': self.n_components, 
                       'covariance_type': self.covariance_type, 
                       'max_iter': self.max_iter, 
                       'weight_concentration_prior_type': self.weight_concentration}
        
        return dict_params
    
    def get_features(self):
        return self.features
    

def get_best_gmm(features):
    
    n_components = [2,3]
    cov_type = ['full', 'diag', 'spherical', 'tied']
    max_iter = [100, 200, 400, 800]

    dict_scores = {}
    dict_features = {}
    
    features_cross_product = list(powerset(features))
    features_df = get_features_df(features)
    
    train_data, test_data = split_train_test_data(features_df, 2)
    for n_comp in n_components:
        for cov in cov_type:
            for iters in max_iter:
                model = EvaluatedGMM(n_comp, cov, iters, train_data, test_data)
                model.fit_model()
                sc = model.compute_silhouette_score()
                dict_scores[sc] = model.get_params()
                dict_features[sc] = model.get_features()

    max_sc = max(list(dict_scores.keys()))
    max_params = dict_scores.get(max_sc)
    features_used = dict_features.get(max_sc)
    print("Optimal params are {} using {} obtaining a Silhouette Score of {}".format(max_params, list(features_used), max_sc))

    best_model = GaussianMixture(n_components=max_params.get('n_components'), 
                                 covariance_type=max_params.get('covariance_type'), 
                                 max_iter=max_params.get('max_iter'), 
                                 n_init=100)
    
    return best_model, features_used;

def get_best_ghmm(features):
    
    n_components = [2,3]
    cov_type = ['full', 'diag', 'spherical', 'tied']
    max_iter = [100, 200, 400, 800]
    algorithms = ['viterbi', 'map']
    
    dict_scores = {}
    dict_features = {}
    
    features_cross_product = list(powerset(features))
    features_df = get_features_df(features)
    
    train_data, test_data = split_train_test_data(features_df, 2)
    for n_comp in n_components:
        for cov in cov_type:
            for iters in max_iter:
                for algo in algorithms:
                    model = EvaluatedGaussianHMM(n_comp, cov, iters, algo, train_data, test_data)
                    model.fit_model()
                    sc = model.compute_silhouette_score()
                    dict_scores[sc] = model.get_params()
                    dict_features[sc] = model.get_features()

    max_sc = max(list(dict_scores.keys()))
    max_params = dict_scores.get(max_sc)
    features_used = dict_features.get(max_sc)
    print("Optimal params are {} using {} obtaining a Silhouette Score of {}".format(max_params, list(features_used), max_sc))

    best_model = GaussianHMM(n_components=max_params.get('n_components'), 
                             covariance_type=max_params.get('covariance_type'),
                             n_iter=max_params.get('max_iter'), 
                             algorithm=max_params.get('algorithm'))
    
    return best_model, features_used;

def get_best_bgm(features):
    
    n_components = [2,3]
    cov_type = ['full', 'diag', 'spherical', 'tied']
    max_iter = [100, 200, 400, 800]
    w_c_types = ['dirichlet_process', 'dirichlet_distribution']
    
    dict_scores = {}
    dict_features = {}
    
    features_cross_product = list(powerset(features))
    features_df = get_features_df(features)
    
    train_data, test_data = split_train_test_data(features_df, 2)
    for n_comp in n_components:
        for cov in cov_type:
            for iters in max_iter:
                for w_c_type in w_c_types:
                    model = EvaluatedBayesianGM(n_comp, cov, iters, w_c_type, train_data, test_data)
                    model.fit_model()
                    sc = model.compute_silhouette_score()
                    dict_scores[sc] = model.get_params()
                    dict_features[sc] = model.get_features()

    max_sc = max(list(dict_scores.keys()))
    max_params = dict_scores.get(max_sc)
    features_used = dict_features.get(max_sc)
    print("Optimal params are {} using {} obtaining a Silhouette Score of {}".format(max_params, list(features_used), max_sc))

    best_model = BayesianGaussianMixture(n_components=max_params.get('n_components'), 
                             covariance_type=max_params.get('covariance_type'),
                             max_iter=max_params.get('max_iter'), 
                             weight_concentration_prior_type=max_params.get('weight_concentration_prior_type'))
    
    return best_model, features_used;

In [5]:
def get_results_df(ticker, list_of_results):
    arr = np.array(list_of_results).reshape(1,17)
    columns_names = R_DF_COL_NAMES
    r_df = pd.DataFrame(data=arr, columns=columns_names, index=[ticker])
    
    return r_df

def get_full_algo_performance(daily_data, intraday_data, current_ticker, from_year = 1900):
    results_list = []
    year_from = daily_data.index[0].year
    
    train_daily_data, test_daily_data = split_train_test_data(daily_data, 2)
    train_intraday_data, test_intraday_data = split_train_test_data(intraday_data, 2)

    mean_bps_diff_buys, sd_bps_diff_buys, mean_bps_diff_sells, sd_bps_diff_sells, percentiles_diff_vwap_buys, percentiles_diff_vwap_sells = get_algo_performance(train_intraday_data, test_intraday_data, train_daily_data, test_daily_data, dynamic_flag=False)
    results_list.append(np.abs(mean_bps_diff_buys))
    results_list.append(np.abs(sd_bps_diff_buys))

    mean_bps_diff_buys, sd_bps_diff_buys, mean_bps_diff_sells, sd_bps_diff_sells, percentiles_diff_vwap_buys, percentiles_diff_vwap_sells = get_algo_performance(train_intraday_data, test_intraday_data, train_daily_data, test_daily_data, dynamic_flag=True)
    results_list.append(np.abs(mean_bps_diff_buys))
    results_list.append(np.abs(sd_bps_diff_buys))

    features_list = get_features_series_list(daily_data)
    features_df = get_features_df(features_list)

    best_gmm, features_gmm = get_best_gmm(features_list)
    best_bgm, features_bgm = get_best_bgm(features_list)
    best_ghmm, features_ghmm = get_best_ghmm(features_list)
    
    ##test
    features_to_use = ["log_total_traded_vol", "money_flow_index"]
    gmm = GaussianMixture(n_components=3, covariance_type='full', max_iter=1000, n_init=100, random_state=3)
    ##test

    train_features_df, test_features_df = split_train_test_data(features_df[features_to_use], 2)

    model_prediction = fit_model(gmm, features_df[features_to_use], train_features_df, features_to_use)
    regime = pd.Series(data = model_prediction, index = features_df.index).rename("regime")

    intraday_date = intraday_data.copy()
    intraday_date['date'] = pd.to_datetime(intraday_date.index.date)
    regime_intraday = pd.merge(intraday_date, regime, right_index=True, how='left', left_on='date').dropna()
    regime_intraday = regime_intraday.astype({"regime":"int32"})

    regime_daily = pd.merge(daily_data, regime, left_index=True, right_index=True)
    regime_daily = regime_daily.astype({"regime":"int32"})

    intraday_regime_train, intraday_regime_test = split_train_test_data(regime_intraday, 2)
    daily_regime_train, daily_regime_test = split_train_test_data(regime_daily, 2)

    mean_bps_diff_buys, sd_bps_diff_buys, mean_bps_diff_sells, sd_bps_diff_sells, percentiles_diff_vwap_buys, percentiles_diff_vwap_sells = get_algo_performance_by_regime(intraday_regime_train, intraday_regime_test, daily_regime_test[[VWAP, "regime"]], dynamic_flag=False, regimes=regime_intraday['regime'].unique())
    results_list.append(np.abs(mean_bps_diff_buys))
    results_list.append(np.abs(sd_bps_diff_buys))

    mean_bps_diff_buys, sd_bps_diff_buys, mean_bps_diff_sells, sd_bps_diff_sells, percentiles_diff_vwap_buys, percentiles_diff_vwap_sells = get_algo_performance_by_regime(intraday_regime_train, intraday_regime_test, daily_regime_test[[VWAP, "regime"]], dynamic_flag=True, regimes=regime_intraday['regime'].unique())
    results_list.append(np.abs(mean_bps_diff_buys))
    results_list.append(np.abs(sd_bps_diff_buys))
    
    model_prediction = fit_model(best_bgm, features_df[features_to_use], train_features_df, features_to_use)
    regime = pd.Series(data = model_prediction, index = features_df.index).rename("regime")
    regime = regime.resample("5T").asfreq().fillna(method="ffill")

    regime_intraday = pd.merge(intraday_date, regime, right_index=True, how='left', left_on='date').dropna()
    regime_intraday = regime_intraday.astype({"regime":"int32"})

    regime_daily = pd.merge(daily_data, regime, left_index=True, right_index=True)
    regime_daily = regime_daily.astype({"regime":"int32"})

    intraday_regime_train, intraday_regime_test = split_train_test_data(regime_intraday, 2)
    daily_regime_train, daily_regime_test = split_train_test_data(regime_daily, 2)

    mean_bps_diff_buys, sd_bps_diff_buys, mean_bps_diff_sells, sd_bps_diff_sells, percentiles_diff_vwap_buys, percentiles_diff_vwap_sells = get_algo_performance_by_regime(intraday_regime_train, intraday_regime_test, daily_regime_test[[VWAP, "regime"]], dynamic_flag=False, regimes=regime_intraday['regime'].unique())
    results_list.append(np.abs(mean_bps_diff_buys))
    results_list.append(np.abs(sd_bps_diff_buys))

    mean_bps_diff_buys, sd_bps_diff_buys, mean_bps_diff_sells, sd_bps_diff_sells, percentiles_diff_vwap_buys, percentiles_diff_vwap_sells = get_algo_performance_by_regime(intraday_regime_train, intraday_regime_test, daily_regime_test[[VWAP, "regime"]], dynamic_flag=True, regimes=regime_intraday['regime'].unique())
    results_list.append(np.abs(mean_bps_diff_buys))
    results_list.append(np.abs(sd_bps_diff_buys))
    
    model_prediction = fit_model(best_ghmm, features_df[features_to_use], train_features_df, features_to_use)
    regime = pd.Series(data = model_prediction, index = features_df.index).rename("regime")
    regime = regime.resample("5T").asfreq().fillna(method="ffill")

    regime_intraday = pd.merge(intraday_date, regime, right_index=True, how='left', left_on='date').dropna()
    regime_intraday = regime_intraday.astype({"regime":"int32"})

    regime_daily = pd.merge(daily_data, regime, left_index=True, right_index=True)
    regime_daily = regime_daily.astype({"regime":"int32"})

    intraday_regime_train, intraday_regime_test = split_train_test_data(regime_intraday, 2)
    daily_regime_train, daily_regime_test = split_train_test_data(regime_daily, 2)

    mean_bps_diff_buys, sd_bps_diff_buys, mean_bps_diff_sells, sd_bps_diff_sells, percentiles_diff_vwap_buys, percentiles_diff_vwap_sells = get_algo_performance_by_regime(intraday_regime_train, intraday_regime_test, daily_regime_test[[VWAP, "regime"]], dynamic_flag=False, regimes=regime_intraday['regime'].unique())
    results_list.append(np.abs(mean_bps_diff_buys))
    results_list.append(np.abs(sd_bps_diff_buys))
    
    mean_bps_diff_buys, sd_bps_diff_buys, mean_bps_diff_sells, sd_bps_diff_sells, percentiles_diff_vwap_buys, percentiles_diff_vwap_sells = get_algo_performance_by_regime(intraday_regime_train, intraday_regime_test, daily_regime_test[[VWAP, "regime"]], dynamic_flag=True, regimes=regime_intraday['regime'].unique())
    results_list.append(np.abs(mean_bps_diff_buys))
    results_list.append(np.abs(sd_bps_diff_buys))
    results_list.append(from_year)
    
    results_df = get_results_df(current_ticker, results_list)
    
    return results_df
    
def printFullAlgoPerformance(daily_data, intraday_data, current_ticker, from_year = 1900):
    print("Backtesting {}. Using data from {}\n".format(current_ticker, daily_data.index[0].year))
    train_daily_data, test_daily_data = split_train_test_data(daily_data, 2)
    train_intraday_data, test_intraday_data = split_train_test_data(intraday_data, 2)

#     print("Backtesting performance with static predictor...\n")
#     mean_bps_diff_buys, sd_bps_diff_buys, mean_bps_diff_sells, sd_bps_diff_sells, percentiles_diff_vwap_buys, percentiles_diff_vwap_sells = get_algo_performance(train_intraday_data, test_intraday_data, train_daily_data, test_daily_data, dynamic_flag=False)
#     print("The performance of the algorithm using static predictor on " + current_ticker + " is")
#     print("Mean: ±%f\nStandard Dev: ±%f" % (np.abs(mean_bps_diff_buys), sd_bps_diff_buys))

#     print("-"*80)

#     print("Backtesting performance with dynamic predictor...\n")
#     mean_bps_diff_buys, sd_bps_diff_buys, mean_bps_diff_sells, sd_bps_diff_sells, percentiles_diff_vwap_buys, percentiles_diff_vwap_sells = get_algo_performance(train_intraday_data, test_intraday_data, train_daily_data, test_daily_data, dynamic_flag=True)
#     print("The performance of the algorithm using dynamic predictor on " + current_ticker + " is")
#     print("Mean: ±%f\nStandard Dev: ±%f" % (np.abs(mean_bps_diff_buys), sd_bps_diff_buys))

#     print("-"*80)

    features_list = get_features_series_list(daily_data)
    features_df = get_features_df(features_list)

    best_gmm, features_gmm = get_best_gmm(features_list)
    best_bgm, features_bgm = get_best_bgm(features_list)
    best_ghmm, features_ghmm = get_best_ghmm(features_list)
    
    ##test
    features_to_use = ["log_total_traded_vol", "money_flow_index"]
    gmm = GaussianMixture(n_components=3, covariance_type='full', max_iter=1000, n_init=100, random_state=3)
    ##test

    train_features_df, test_features_df = split_train_test_data(features_df[features_to_use], 2)
    
    print(train_features_df)
    print(test_features_df)

    model_prediction = fit_model(gmm, features_df[features_to_use], train_features_df, features_to_use)
    regime = pd.Series(data = model_prediction, index = features_df.index).rename("regime")
#     regime = regime.resample("5T").asfreq().fillna(method="ffill")

    intraday_date = intraday_data.copy()
    intraday_date['date'] = pd.to_datetime(intraday_date.index.date)
    regime_intraday = pd.merge(intraday_date, regime, right_index=True, how='left', left_on='date').dropna()
    regime_intraday = regime_intraday.astype({"regime":"int32"})

    regime_daily = pd.merge(daily_data, regime, left_index=True, right_index=True)
    regime_daily = regime_daily.astype({"regime":"int32"})

    intraday_regime_train, intraday_regime_test = split_train_test_data(regime_intraday, 2)
    daily_regime_train, daily_regime_test = split_train_test_data(regime_daily, 2)

    #     print("Model used: GMM, features used: [{}]\n".format(", ".join(features_gmm)))
    print("Backtesting performance of static predictor using segmented data...\n")
    mean_bps_diff_buys, sd_bps_diff_buys, mean_bps_diff_sells, sd_bps_diff_sells, percentiles_diff_vwap_buys, percentiles_diff_vwap_sells = get_algo_performance_by_regime(intraday_regime_train, intraday_regime_test, daily_regime_test[[VWAP, "regime"]], dynamic_flag=False, regimes=regime_intraday['regime'].unique())
    print("The performance of the algorithm using static predictor with segmented data on " + current_ticker + " is")
    print("Mean: ±%f\nStandard Dev: ±%f" % (np.abs(mean_bps_diff_buys), sd_bps_diff_buys))
    print("-"*80)

    print("Backtesting performance of dynamic predictor using segmented data ...\n")
    mean_bps_diff_buys, sd_bps_diff_buys, mean_bps_diff_sells, sd_bps_diff_sells, percentiles_diff_vwap_buys, percentiles_diff_vwap_sells = get_algo_performance_by_regime(intraday_regime_train, intraday_regime_test, daily_regime_test[[VWAP, "regime"]], dynamic_flag=True, regimes=regime_intraday['regime'].unique())
    print("The performance of the algorithm using dynamic predictor with segmented data on " + current_ticker + " is")
    print("Mean: ±%f\nStandard Dev: ±%f" % (np.abs(mean_bps_diff_buys), sd_bps_diff_buys))
    print("-"*80)
    
#     train_features_df, test_features_df = split_train_test_data(features_df[["volume", "betas_market_impact", "money_flow_index"]], 2)

#     model_prediction = fit_model(best_bgm, features_df[["volume", "betas_market_impact", "money_flow_index"]], train_features_df, features_bgm)
    model_prediction = fit_model(best_bgm, features_df[features_to_use], train_features_df, features_to_use)
    regime = pd.Series(data = model_prediction, index = features_df.index).rename("regime")
    regime = regime.resample("5T").asfreq().fillna(method="ffill")

    regime_intraday = pd.merge(intraday_date, regime, right_index=True, how='left', left_on='date').dropna()
    regime_intraday = regime_intraday.astype({"regime":"int32"})

    regime_daily = pd.merge(daily_data, regime, left_index=True, right_index=True)
    regime_daily = regime_daily.astype({"regime":"int32"})

    intraday_regime_train, intraday_regime_test = split_train_test_data(regime_intraday, 2)
    daily_regime_train, daily_regime_test = split_train_test_data(regime_daily, 2)

    #     print("Model used: GMM, features used: [{}]\n".format(", ".join(features_gmm)))
    print("Backtesting performance of static predictor using segmented data...\n")
    mean_bps_diff_buys, sd_bps_diff_buys, mean_bps_diff_sells, sd_bps_diff_sells, percentiles_diff_vwap_buys, percentiles_diff_vwap_sells = get_algo_performance_by_regime(intraday_regime_train, intraday_regime_test, daily_regime_test[[VWAP, "regime"]], dynamic_flag=False, regimes=regime_intraday['regime'].unique())
    print("The performance of the algorithm using static predictor with segmented data on " + current_ticker + " is")
    print("Mean: ±%f\nStandard Dev: ±%f" % (np.abs(mean_bps_diff_buys), sd_bps_diff_buys))
    print("-"*80)

    print("Backtesting performance of dynamic predictor using segmented data ...\n")
    mean_bps_diff_buys, sd_bps_diff_buys, mean_bps_diff_sells, sd_bps_diff_sells, percentiles_diff_vwap_buys, percentiles_diff_vwap_sells = get_algo_performance_by_regime(intraday_regime_train, intraday_regime_test, daily_regime_test[[VWAP, "regime"]], dynamic_flag=True, regimes=regime_intraday['regime'].unique())
    print("The performance of the algorithm using dynamic predictor with segmented data on " + current_ticker + " is")
    print("Mean: ±%f\nStandard Dev: ±%f" % (np.abs(mean_bps_diff_buys), sd_bps_diff_buys))
    print("-"*80)
    
#     train_features_df, test_features_df = split_train_test_data(features_df[["volume", "betas_market_impact", "money_flow_index"]], 2)
    
#     model_prediction = fit_model(best_ghmm, features_df[["volume", "betas_market_impact", "money_flow_index"]], train_features_df, features_ghmm)
    model_prediction = fit_model(best_ghmm, features_df[features_to_use], train_features_df, features_to_use)
    regime = pd.Series(data = model_prediction, index = features_df.index).rename("regime")
    regime = regime.resample("5T").asfreq().fillna(method="ffill")

    regime_intraday = pd.merge(intraday_date, regime, right_index=True, how='left', left_on='date').dropna()
    regime_intraday = regime_intraday.astype({"regime":"int32"})

    regime_daily = pd.merge(daily_data, regime, left_index=True, right_index=True)
    regime_daily = regime_daily.astype({"regime":"int32"})

    intraday_regime_train, intraday_regime_test = split_train_test_data(regime_intraday, 2)
    daily_regime_train, daily_regime_test = split_train_test_data(regime_daily, 2)

    #     print("Model used: GMM, features used: [{}]\n".format(", ".join(features_gmm)))
    print("Backtesting performance of static predictor using segmented data...\n")
    mean_bps_diff_buys, sd_bps_diff_buys, mean_bps_diff_sells, sd_bps_diff_sells, percentiles_diff_vwap_buys, percentiles_diff_vwap_sells = get_algo_performance_by_regime(intraday_regime_train, intraday_regime_test, daily_regime_test[[VWAP, "regime"]], dynamic_flag=False, regimes=regime_intraday['regime'].unique())
    print("The performance of the algorithm using static predictor with segmented data on " + current_ticker + " is")
    print("Mean: ±%f\nStandard Dev: ±%f" % (np.abs(mean_bps_diff_buys), sd_bps_diff_buys))
    print("-"*80)

    print("Backtesting performance of dynamic predictor using segmented data ...\n")
    mean_bps_diff_buys, sd_bps_diff_buys, mean_bps_diff_sells, sd_bps_diff_sells, percentiles_diff_vwap_buys, percentiles_diff_vwap_sells = get_algo_performance_by_regime(intraday_regime_train, intraday_regime_test, daily_regime_test[[VWAP, "regime"]], dynamic_flag=True, regimes=regime_intraday['regime'].unique())
    print("The performance of the algorithm using dynamic predictor with segmented data on " + current_ticker + " is")
    print("Mean: ±%f\nStandard Dev: ±%f" % (np.abs(mean_bps_diff_buys), sd_bps_diff_buys))
    print("-"*80)

In [6]:
warnings.filterwarnings("ignore")
DATAPATH = os.getcwd()+"\\Data\\"
filepath_anon = os.path.join(DATAPATH, "volume_price_2014_18.csv")

In [7]:
files_list = []
for root, dirs, files in os.walk(DATAPATH):
    for file in files:
        files_list.append(file)

### Por item

In [46]:
df[df['date'] == '2020-03-09'].head(50)

Unnamed: 0_level_0,ticker,open,high,low,close,volume,date,time
date_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2020-03-09 09:31:00,AAPL,263.75,266.92,263.0,266.19,2158162,2020-03-09,09:31:00
2020-03-09 09:32:00,AAPL,266.19,267.25,265.16,267.1,304557,2020-03-09,09:32:00
2020-03-09 09:33:00,AAPL,267.12,267.93,266.87,267.84,306729,2020-03-09,09:33:00
2020-03-09 09:34:00,AAPL,267.87,268.13,266.64,267.41,313054,2020-03-09,09:34:00
2020-03-09 09:35:00,AAPL,267.47,267.64,267.19,267.43,46808,2020-03-09,09:35:00
2020-03-09 09:50:00,AAPL,267.45,269.3,265.01,269.25,536551,2020-03-09,09:50:00
2020-03-09 09:51:00,AAPL,269.16,269.25,264.36,268.13,350914,2020-03-09,09:51:00
2020-03-09 09:52:00,AAPL,268.27,270.44,267.01,267.25,322718,2020-03-09,09:52:00
2020-03-09 09:53:00,AAPL,267.28,269.5,267.08,269.5,274057,2020-03-09,09:53:00
2020-03-09 09:54:00,AAPL,269.5,272.84,269.43,272.58,375866,2020-03-09,09:54:00


In [45]:
intraday_data[intraday_data.index.date == pd.to_datetime('2020-03-09')]

Unnamed: 0_level_0,open,low,high,close,volume,turnover,vwap
date_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2020-03-09 09:30:00,263.75,263.00,268.13,267.41,3082502,8.216964e+08,266.567997
2020-03-09 09:35:00,267.47,267.19,267.64,267.43,46808,1.251786e+07,267.430000
2020-03-09 09:50:00,267.45,264.36,272.84,272.58,1860106,5.011152e+08,269.401437
2020-03-09 09:55:00,272.58,270.79,275.10,271.72,1533456,4.184631e+08,272.888889
2020-03-09 10:00:00,271.64,271.51,274.44,271.86,930362,2.538639e+08,272.865688
...,...,...,...,...,...,...,...
2020-03-09 15:35:00,270.96,270.93,273.80,273.17,825756,2.249774e+08,272.450163
2020-03-09 15:40:00,273.13,272.37,273.61,272.83,699281,1.908920e+08,272.983314
2020-03-09 15:45:00,272.83,271.66,273.65,273.44,805900,2.198859e+08,272.845191
2020-03-09 15:50:00,273.46,270.98,273.70,271.77,1071133,2.914611e+08,272.105388


### Probando con precio medio entre high y low

In [12]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[0]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 3, 'covariance_type': 'diag', 'max_iter': 200} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.6019629065587822
Optimal params are {'n_components': 2, 'covariance_type': 'spherical', 'max_iter': 400, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.6054584896185055
Optimal params are {'n_components': 3, 'covariance_type': 'tied', 'max_iter': 100, 'algorithm': 'map'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.3263155233956592


Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
AAPL,1.396556,11.599807,1.055697,23.878009,1.297945,12.064786,0.380458,22.235763,1.392944,11.675734,0.945408,15.522455,1.346832,12.125675,0.990051,11.328981,1900.0


### Probando con precio medio entre high y low

In [8]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[0]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 3, 'covariance_type': 'diag', 'max_iter': 400} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.601962906558782
Optimal params are {'n_components': 2, 'covariance_type': 'spherical', 'max_iter': 800, 'weight_concentration_prior_type': 'dirichlet_process'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.6054584896185055
Optimal params are {'n_components': 3, 'covariance_type': 'spherical', 'max_iter': 400, 'algorithm': 'viterbi'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.32631552339565945


Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
AAPL,0.172362,1.136649,0.135569,2.404073,0.164319,1.181817,0.07475,2.248926,0.173006,1.145077,0.117962,1.604024,0.17038,1.19833,0.162334,1.307393,1900.0


In [9]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[1]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 3, 'covariance_type': 'spherical', 'max_iter': 200} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.705193036479779
Optimal params are {'n_components': 3, 'covariance_type': 'spherical', 'max_iter': 100, 'weight_concentration_prior_type': 'dirichlet_process'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.705193036479779
Optimal params are {'n_components': 3, 'covariance_type': 'tied', 'max_iter': 800, 'algorithm': 'map'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.25


Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
AIR,0.09129,1.558926,0.040578,2.172817,0.075998,1.710671,0.070776,1.978492,0.069632,1.400107,0.11923,2.503843,0.092909,1.563379,0.034293,2.190229,1900.0


In [10]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[2]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 3, 'covariance_type': 'full', 'max_iter': 400} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.472094960350096
Optimal params are {'n_components': 2, 'covariance_type': 'diag', 'max_iter': 800, 'weight_concentration_prior_type': 'dirichlet_process'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.491493229141817
Optimal params are {'n_components': 3, 'covariance_type': 'tied', 'max_iter': 800, 'algorithm': 'map'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.25


Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
BBVA.MC,0.018221,1.244839,0.025108,0.973098,0.002378,1.235923,0.011129,0.923723,0.011796,1.2342,0.002143,0.973919,0.015895,1.243448,0.041661,0.968049,1900.0


**BOEING**

In [11]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[4]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 2, 'covariance_type': 'tied', 'max_iter': 100} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.3779697166126222
Optimal params are {'n_components': 3, 'covariance_type': 'tied', 'max_iter': 400, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.3779697166126225
Optimal params are {'n_components': 2, 'covariance_type': 'tied', 'max_iter': 100, 'algorithm': 'map'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.3186685105104767


Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
BA,0.065086,1.851205,0.180401,6.280641,0.088409,2.145334,0.177647,5.994589,0.150649,2.187025,0.165323,5.583865,0.089365,1.914903,0.152562,6.917942,1900.0


In [12]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[5]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 3, 'covariance_type': 'diag', 'max_iter': 100} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.502416844499417
Optimal params are {'n_components': 3, 'covariance_type': 'diag', 'max_iter': 200, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.5130444273895015
Optimal params are {'n_components': 2, 'covariance_type': 'tied', 'max_iter': 800, 'algorithm': 'viterbi'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.25196568026517097


Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
CBK,0.062419,2.321645,0.256453,5.24347,0.036933,2.505276,0.145839,2.013812,0.047697,2.368665,0.028387,3.831598,0.033307,2.490096,0.154228,1.98783,1900.0


In [8]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[6]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 2, 'covariance_type': 'spherical', 'max_iter': 200} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.5033747170848477
Optimal params are {'n_components': 2, 'covariance_type': 'spherical', 'max_iter': 800, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.5044741130395229
Optimal params are {'n_components': 3, 'covariance_type': 'spherical', 'max_iter': 100, 'algorithm': 'viterbi'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.4025288896346846


Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
E-DGE,0.044524,0.608996,0.093752,1.714131,0.064285,0.802603,0.089479,1.409895,0.036593,0.633548,0.119952,1.663247,0.047796,0.663211,0.073806,1.556274,1900.0


### A partir de aquí, con ejecución bien de precios

In [14]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[8]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 3, 'covariance_type': 'diag', 'max_iter': 400} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.49153912431388347
Optimal params are {'n_components': 3, 'covariance_type': 'spherical', 'max_iter': 100, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.3344282523355486
Optimal params are {'n_components': 3, 'covariance_type': 'spherical', 'max_iter': 800, 'algorithm': 'viterbi'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.38927660959361965


Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
ENGI,0.442662,8.249448,1.932665,29.826655,0.286264,7.714994,1.929084,27.015995,0.554641,9.036291,1.907891,25.784267,0.571363,9.344834,1.820013,24.541639,1900.0


In [30]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[11]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 3, 'covariance_type': 'spherical', 'max_iter': 400} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.5128728677893343
Optimal params are {'n_components': 3, 'covariance_type': 'spherical', 'max_iter': 100, 'weight_concentration_prior_type': 'dirichlet_process'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.5386410395929189
Optimal params are {'n_components': 2, 'covariance_type': 'tied', 'max_iter': 100, 'algorithm': 'map'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.09268982251767185


Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
GE,0.82966,19.244686,4.557188,50.963703,0.647877,19.731922,3.788462,39.057371,0.688943,20.481339,2.469481,37.526683,0.741453,19.879695,2.654356,35.081509,1900.0


In [31]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[12]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 3, 'covariance_type': 'full', 'max_iter': 200} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.5953031399242293
Optimal params are {'n_components': 2, 'covariance_type': 'full', 'max_iter': 800, 'weight_concentration_prior_type': 'dirichlet_process'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.5953031399242297
Optimal params are {'n_components': 3, 'covariance_type': 'spherical', 'max_iter': 200, 'algorithm': 'viterbi'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of -0.15055811885738063


Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
GILD,0.199523,18.308264,1.196985,25.519445,0.071681,19.533026,0.59028,25.15578,0.114911,18.991733,1.379861,29.45727,0.166583,18.659649,0.766778,18.185382,1900.0


In [32]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[13]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 3, 'covariance_type': 'tied', 'max_iter': 800} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.6251761518396748
Optimal params are {'n_components': 2, 'covariance_type': 'full', 'max_iter': 800, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.5722119123823449
Optimal params are {'n_components': 3, 'covariance_type': 'tied', 'max_iter': 400, 'algorithm': 'viterbi'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.5632817336851466


Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
E-GSK,0.080696,8.800301,0.579514,32.253741,0.098244,9.535633,0.472138,28.867988,0.139984,9.237068,0.451525,29.697345,0.300463,9.499814,0.224447,28.08506,1900.0


In [33]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[14]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 2, 'covariance_type': 'diag', 'max_iter': 400} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.6577157350370492
Optimal params are {'n_components': 2, 'covariance_type': 'diag', 'max_iter': 200, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.6577157350370493
Optimal params are {'n_components': 3, 'covariance_type': 'spherical', 'max_iter': 100, 'algorithm': 'map'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.27383273245205597


Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
IBE.MC,0.641527,7.381247,1.296622,9.758614,0.76301,7.366366,0.525529,8.338283,0.651443,7.360257,0.979542,8.389449,0.57586,7.335337,0.465046,7.822871,1900.0


In [34]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[15]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 3, 'covariance_type': 'diag', 'max_iter': 400} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.7244535094732014
Optimal params are {'n_components': 2, 'covariance_type': 'full', 'max_iter': 400, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.7244535094732014
Optimal params are {'n_components': 3, 'covariance_type': 'spherical', 'max_iter': 800, 'algorithm': 'map'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.4387064482330712


Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
MEL.MC,2.964914,14.428485,4.59661,32.858181,2.589506,14.090406,4.290855,29.248963,4.059364,13.020379,3.70309,28.183428,3.923666,14.293276,3.780634,25.664347,1900.0


In [35]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[16]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 3, 'covariance_type': 'full', 'max_iter': 200} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.5413037698497153
Optimal params are {'n_components': 2, 'covariance_type': 'diag', 'max_iter': 800, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.5433283210662437
Optimal params are {'n_components': 2, 'covariance_type': 'spherical', 'max_iter': 100, 'algorithm': 'map'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.5280337780201557


Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
MSFT,1.657929,8.527853,1.790677,13.158808,1.632105,9.206094,1.448348,15.062327,1.683343,8.98755,1.603884,14.658482,1.693401,8.612301,1.039795,8.810342,1900.0


In [37]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[18]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 2, 'covariance_type': 'diag', 'max_iter': 400} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.5032140390239093
Optimal params are {'n_components': 2, 'covariance_type': 'full', 'max_iter': 200, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.5442147643531762
Optimal params are {'n_components': 3, 'covariance_type': 'full', 'max_iter': 400, 'algorithm': 'viterbi'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.29873091265383916


Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
NOKIA,0.086009,11.5771,0.249896,12.642312,0.12581,11.64478,0.575889,13.196772,0.054513,11.582836,0.30726,13.633442,0.053845,11.563337,0.244171,14.363563,1900.0


In [38]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[19]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 3, 'covariance_type': 'tied', 'max_iter': 800} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.25
Optimal params are {'n_components': 3, 'covariance_type': 'full', 'max_iter': 800, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.2586739452529631
Optimal params are {'n_components': 3, 'covariance_type': 'tied', 'max_iter': 800, 'algorithm': 'map'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.25


Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
REE.MC,0.09871,11.124457,0.56101,11.219609,0.272763,11.109151,0.43036,11.640716,0.173455,11.106795,0.557082,11.523021,0.103369,11.498777,0.512012,12.884346,1900.0


In [9]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[20]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, auction_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 2, 'covariance_type': 'full', 'max_iter': 800} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.47946243016580625
Optimal params are {'n_components': 3, 'covariance_type': 'full', 'max_iter': 200, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.44661696936133477
Optimal params are {'n_components': 3, 'covariance_type': 'tied', 'max_iter': 800, 'algorithm': 'viterbi'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.2675280754776613


Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
GLE,0.418003,14.968742,1.871768,32.362549,0.463511,14.071344,1.778475,34.369286,0.393302,14.390398,1.724783,33.306645,0.501227,15.920049,1.266063,28.013087,1900.0


In [10]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[21]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, auction_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 3, 'covariance_type': 'diag', 'max_iter': 200} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.4550973818373307
Optimal params are {'n_components': 2, 'covariance_type': 'diag', 'max_iter': 200, 'weight_concentration_prior_type': 'dirichlet_process'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.4550973818373307
Optimal params are {'n_components': 2, 'covariance_type': 'diag', 'max_iter': 100, 'algorithm': 'viterbi'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.2252904979493954


Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
TMUS,0.628226,15.093506,0.119691,25.075955,0.458968,15.527825,0.047892,20.119438,0.686215,15.361643,0.259411,20.40795,0.657298,15.250631,0.101092,20.335488,1900.0


In [None]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[22]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, auction_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 2, 'covariance_type': 'full', 'max_iter': 800} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.4331050085510489
Optimal params are {'n_components': 3, 'covariance_type': 'diag', 'max_iter': 800, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.5591032229692747
Optimal params are {'n_components': 3, 'covariance_type': 'diag', 'max_iter': 400, 'algorithm': 'map'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of -0.3064710113190657


## Backtest

In [46]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[3]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

**Test with auction time**

In [48]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[3]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, auction_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

In [49]:
get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 2, 'covariance_type': 'diag', 'max_iter': 800} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.5112033977090062
Optimal params are {'n_components': 3, 'covariance_type': 'full', 'max_iter': 200, 'weight_concentration_prior_type': 'dirichlet_process'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.5098700544068736
Optimal params are {'n_components': 3, 'covariance_type': 'tied', 'max_iter': 800, 'algorithm': 'map'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.25


KeyboardInterrupt: 

**Test with auction time**

In [47]:
get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 3, 'covariance_type': 'spherical', 'max_iter': 800} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.6187127688096521
Optimal params are {'n_components': 3, 'covariance_type': 'tied', 'max_iter': 200, 'weight_concentration_prior_type': 'dirichlet_process'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.6187127688096521
Optimal params are {'n_components': 3, 'covariance_type': 'tied', 'max_iter': 800, 'algorithm': 'map'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.25


Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
BMW,0.125761,1.206045,0.052502,1.035624,0.14874,1.2373,0.03708,1.030391,0.138108,1.305583,0.068016,1.281392,0.139875,1.338019,0.065808,1.142081,1900.0


In [None]:
all_results = None
for instrument in files_list:
    raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[3]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
    df = format_data(raw)
    df_less_days = df[df.index.year >= 1900]
    open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
    intraday_data = get_intraday_data(df_less_days, open_time, close_time)
    daily_data = get_daily_data(intraday_data)
    current_ticker = df.ticker.iloc[0]
    
    this_ticker_results = get_full_algo_performance(daily_data, intraday_data, current_ticker)
    if all_results == None:
        all_results = this_ticker_results
    else:
        all_results = pd.concat([all_results, this_ticker_results])

In [36]:
printFullAlgoPerformance(daily_data, intraday_data, current_ticker)

Backtesting BMW. Using data from 2003

Optimal params are {'n_components': 3, 'covariance_type': 'spherical', 'max_iter': 800} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.6187127688096522
Optimal params are {'n_components': 3, 'covariance_type': 'spherical', 'max_iter': 800, 'weight_concentration_prior_type': 'dirichlet_process'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.6187127688096521
Optimal params are {'n_components': 3, 'covariance_type': 'tied', 'max_iter': 800, 'algorithm': 'map'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.25
            log_total_traded_vol  money_flow_index
2003-06-27             14.716221      8.091281e+07
2003-07-03             14.231743      5.119830e+07
2003-07-04            

The performance of the algorithm using dynamic predictor with segmented data on BMW is
Mean: ±0.037080
Standard Dev: ±1.030391
--------------------------------------------------------------------------------
Backtesting performance of static predictor using segmented data...

The performance of the algorithm using static predictor with segmented data on BMW is
Mean: ±0.126713
Standard Dev: ±1.208794
--------------------------------------------------------------------------------
Backtesting performance of dynamic predictor using segmented data ...

The performance of the algorithm using dynamic predictor with segmented data on BMW is
Mean: ±0.023635
Standard Dev: ±1.174381
--------------------------------------------------------------------------------
Backtesting performance of static predictor using segmented data...

The performance of the algorithm using static predictor with segmented data on BMW is
Mean: ±0.139875
Standard Dev: ±1.338019
------------------------------------------

In [24]:
printFullAlgoPerformance(daily_data, intraday_data, current_ticker)

Backtesting BMW. Using data from 2003

Backtesting performance with static predictor...

The performance of the algorithm using static predictor on BMW is
Mean: ±0.125761
Standard Dev: ±1.206045
--------------------------------------------------------------------------------
Backtesting performance with dynamic predictor...

The performance of the algorithm using dynamic predictor on BMW is
Mean: ±0.052502
Standard Dev: ±1.035624
--------------------------------------------------------------------------------
Optimal params are {'n_components': 3, 'covariance_type': 'tied', 'max_iter': 800} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.25
Optimal params are {'n_components': 3, 'covariance_type': 'diag', 'max_iter': 100, 'weight_concentration_prior_type': 'dirichlet_process'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhou

The performance of the algorithm using dynamic predictor with segmented data on BMW is
Mean: ±0.061515
Standard Dev: ±1.280750
--------------------------------------------------------------------------------


In [None]:
printFullAlgoPerformance(daily_data, intraday_data, current_ticker)

In [None]:
printFullAlgoPerformance(daily_data, intraday_data, current_ticker)

In [17]:
printFullAlgoPerformance(daily_data, intraday_data, current_ticker)

Backtesting BMW. Using data from 2003

Backtesting performance with static predictor...

The performance of the algorithm using static predictor on BMW is
Mean: ±0.119796
Standard Dev: ±1.178328
--------------------------------------------------------------------------------
Backtesting performance with dynamic predictor...

The performance of the algorithm using dynamic predictor on BMW is
Mean: ±0.056874
Standard Dev: ±1.038553
--------------------------------------------------------------------------------
Optimal params are {'n_components': 3, 'covariance_type': 'tied', 'max_iter': 800} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Silhouette Score of 0.25
Optimal params are {'n_components': 3, 'covariance_type': 'full', 'max_iter': 100, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Silhouette Score of 0.3915074

KeyboardInterrupt: 

### Old try con subasta

In [47]:
printFullAlgoPerformance(daily_data, intraday_data, current_ticker)

Backtesting BMW. Using data from 2003

Backtesting performance with static predictor...

The performance of the algorithm using static predictor on BMW is
Mean: ±0.109193
Standard Dev: ±1.258882
--------------------------------------------------------------------------------
Backtesting performance with dynamic predictor...

The performance of the algorithm using dynamic predictor on BMW is
Mean: ±0.072437
Standard Dev: ±1.836598
--------------------------------------------------------------------------------
Optimal params are {'n_components': 3, 'covariance_type': 'diag', 'max_iter': 800} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Silhouette Score of 0.3705803884123717
Optimal params are {'n_components': 3, 'covariance_type': 'full', 'max_iter': 100, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Silhouette Scor

In [23]:
printFullAlgoPerformance(daily_data, intraday_data, current_ticker)

Backtesting BMW. Using data from 2003

Backtesting performance with static predictor...

The performance of the algorithm using static predictor on BMW is
Mean: ±0.109193
Standard Dev: ±1.258882
--------------------------------------------------------------------------------
Backtesting performance with dynamic predictor...

The performance of the algorithm using dynamic predictor on BMW is
Mean: ±0.072437
Standard Dev: ±1.836598
--------------------------------------------------------------------------------
Optimal params are {'n_components': 4, 'covariance_type': 'diag', 'max_iter': 800} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Silhouette Score of 0.3705803884123717
Optimal params are {'n_components': 4, 'covariance_type': 'spherical', 'max_iter': 400, 'weight_concentration_prior_type': 'dirichlet_process'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Silhouette Scor

### Test

In [67]:
train_intraday_data, test_intraday_data = split_train_test_data(intraday_data, 2)
train_daily_data, test_daily_data = split_train_test_data(daily_data, 2)

mean_bps_diff_buys, sd_bps_diff_buys, mean_bps_diff_sells, sd_bps_diff_sells, percentiles_diff_vwap_buys, percentiles_diff_vwap_sells = get_algo_performance(train_intraday_data, test_intraday_data, train_daily_data, test_daily_data, dynamic_flag=True)

[87.84938620320398, 87.21973623113446, 87.3585464255511, 87.25355654620324, 88.28585973773629, 86.82714834486157, 86.86089905195837, 86.09707821800299, 86.00158864302881, 85.41299807768524, 86.1379409847325, 87.39685836416578, 87.41341502776574, 89.45065740188143, 89.87920448617912, 89.83897165754763, 90.23779596399461, 87.81354772390486, 87.55369730162559, 87.05504619336725, 86.28182943707542, 85.9766357669019, 86.12320118893739, 84.94924962068265, 85.61855510216662, 85.05772762545614, 85.34137385984891, 86.85214465975187, 87.28930530856296, 86.59709864399052, 86.54433953506789, 85.79052918511158, 84.87137325933651, 85.20823609722993, 85.04815446260899, 83.79488994196313, 85.22281590174235, 85.45613333287342, 84.32129298702586, 84.48740654250912, 85.5991944069995, 85.42210258135816, 85.3570486298831, 86.14877351616502, 86.06053076380773, 85.64190157036523, 87.17872680537866, 87.33310461405911, 87.08516338370008, 87.63727857011366, 87.27140771066829, 87.53783765396147, 87.1599899308941

In [17]:
features_list = get_features_series_list(daily_data)
features_df = get_features_df(features_list)

In [56]:
print("Backtesting {}. Using data from {}\n".format(current_ticker, daily_data.index[0].year))
train_daily_data, test_daily_data = split_train_test_data(daily_data, 2)
train_intraday_data, test_intraday_data = split_train_test_data(intraday_data, 2)

print("Backtesting performance with static predictor...\n")
mean_bps_diff_buys, sd_bps_diff_buys, mean_bps_diff_sells, sd_bps_diff_sells, percentiles_diff_vwap_buys, percentiles_diff_vwap_sells = get_algo_performance(train_intraday_data, test_intraday_data, train_daily_data, test_daily_data, dynamic_flag=False)
print("The performance of the algorithm using static predictor on " + current_ticker + " is")
print("Mean: ±%f\nStandard Dev: ±%f" % (np.abs(mean_bps_diff_buys), sd_bps_diff_buys))

print("-"*80)

print("Backtesting performance with dynamic predictor...\n")
mean_bps_diff_buys, sd_bps_diff_buys, mean_bps_diff_sells, sd_bps_diff_sells, percentiles_diff_vwap_buys, percentiles_diff_vwap_sells = get_algo_performance(train_intraday_data, test_intraday_data, train_daily_data, test_daily_data, dynamic_flag=True)
print("The performance of the algorithm using dynamic predictor on " + current_ticker + " is")
print("Mean: ±%f\nStandard Dev: ±%f" % (np.abs(mean_bps_diff_buys), sd_bps_diff_buys))

print("-"*80)

features_list = get_features_series_list(daily_data)
features_df = get_features_df(features_list)

best_gmm, features_gmm = get_best_gmm(features_list)
best_bgm, features_bgm = get_best_bgm(features_list)
best_ghmm, features_ghmm = get_best_ghmm(features_list)

train_features_df, test_features_df = split_train_test_data(features_df[features_gmm], 2)

model_prediction = fit_model(best_gmm, features_df[features_gmm], train_features_df, features_gmm)
regime = pd.Series(data = model_prediction, index = features_df.index).rename("regime")
regime = regime.resample("5T").asfreq().fillna(method="ffill")

regime_intraday = pd.merge(intraday_data, regime, left_index=True, right_index=True)
regime_intraday = regime_intraday.astype({"regime":"int32"})

regime_daily = pd.merge(daily_data, regime, left_index=True, right_index=True)
regime_daily = regime_daily.astype({"regime":"int32"})

intraday_regime_train, intraday_regime_test = split_train_test_data(regime_intraday, 2)
daily_regime_train, daily_regime_test = split_train_test_data(regime_daily, 2)

#     print("Model used: GMM, features used: [{}]\n".format(", ".join(features_gmm)))
print("Backtesting performance of static predictor using segmented data...\n")
mean_bps_diff_buys, sd_bps_diff_buys, mean_bps_diff_sells, sd_bps_diff_sells, percentiles_diff_vwap_buys, percentiles_diff_vwap_sells = get_algo_performance_by_regime(intraday_regime_train, intraday_regime_test, daily_regime_test[[VWAP, "regime"]], dynamic_flag=False, regimes=regime_intraday['regime'].unique())
print("The performance of the algorithm using static predictor with segmented data on " + current_ticker + " is")
print("Mean: ±%f\nStandard Dev: ±%f" % (np.abs(mean_bps_diff_buys), sd_bps_diff_buys))
print("-"*80)

Backtesting performance of static predictor using segmented data...

2017-10-10 09:05:00    88.370242
2017-10-10 09:10:00    88.331238
2017-10-10 09:15:00    88.398833
2017-10-10 09:20:00    88.534209
2017-10-10 09:25:00    88.411758
                         ...    
2017-10-10 17:15:00    88.388678
2017-10-10 17:20:00    88.481229
2017-10-10 17:25:00    88.463151
2017-10-10 17:30:00    88.460000
2017-10-10 17:35:00    88.420000
Name: vwap, Length: 103, dtype: float64
2017-10-10 09:05:00    88.370242
2017-10-10 09:10:00    88.331238
2017-10-10 09:15:00    88.398833
2017-10-10 09:20:00    88.534209
2017-10-10 09:25:00    88.411758
                         ...    
2017-10-10 17:15:00    88.388678
2017-10-10 17:20:00    88.481229
2017-10-10 17:25:00    88.463151
2017-10-10 17:30:00    88.460000
2017-10-10 17:35:00    88.420000
Name: vwap, Length: 103, dtype: float64
2017-10-11 09:05:00    88.598729
2017-10-11 09:10:00    88.432148
2017-10-11 09:15:00    88.426483
2017-10-11 09:20:00    88.

Name: vwap, Length: 103, dtype: float64
2017-11-06 09:05:00    89.708307
2017-11-06 09:10:00    89.696227
2017-11-06 09:15:00    89.731870
2017-11-06 09:20:00    89.975274
2017-11-06 09:25:00    90.027298
                         ...    
2017-11-06 17:15:00    90.300469
2017-11-06 17:20:00    90.282461
2017-11-06 17:25:00    90.221868
2017-11-06 17:30:00    90.190000
2017-11-06 17:35:00    89.970000
Name: vwap, Length: 103, dtype: float64
2017-11-07 09:05:00    88.403737
2017-11-07 09:10:00    88.113887
2017-11-07 09:15:00    88.088704
2017-11-07 09:20:00    88.135549
2017-11-07 09:25:00    88.092946
                         ...    
2017-11-07 17:15:00    87.473594
2017-11-07 17:20:00    87.468186
2017-11-07 17:25:00    87.504418
2017-11-07 17:30:00    87.510000
2017-11-07 17:35:00    87.420000
Name: vwap, Length: 103, dtype: float64
2017-11-07 09:05:00    88.403737
2017-11-07 09:10:00    88.113887
2017-11-07 09:15:00    88.088704
2017-11-07 09:20:00    88.135549
2017-11-07 09:25:00   

Name: vwap, Length: 103, dtype: float64
2017-11-27 09:05:00    85.533874
2017-11-27 09:10:00    85.674742
2017-11-27 09:15:00    85.541992
2017-11-27 09:20:00    85.687460
2017-11-27 09:25:00    85.692971
                         ...    
2017-11-27 17:15:00    85.784872
2017-11-27 17:20:00    85.759199
2017-11-27 17:25:00    85.745485
2017-11-27 17:30:00    85.590000
2017-11-27 17:35:00    85.690000
Name: vwap, Length: 103, dtype: float64
2017-11-28 09:05:00    85.083940
2017-11-28 09:10:00    85.004534
2017-11-28 09:15:00    85.004383
2017-11-28 09:20:00    85.104648
2017-11-28 09:25:00    84.873246
                         ...    
2017-11-28 17:15:00    84.797893
2017-11-28 17:20:00    84.947422
2017-11-28 17:25:00    84.928729
2017-11-28 17:30:00    84.980000
2017-11-28 17:35:00    84.950000
Name: vwap, Length: 103, dtype: float64
2017-11-28 09:05:00    85.083940
2017-11-28 09:10:00    85.004534
2017-11-28 09:15:00    85.004383
2017-11-28 09:20:00    85.104648
2017-11-28 09:25:00   

Name: vwap, Length: 103, dtype: float64
2017-12-28 09:05:00    87.000895
2017-12-28 09:10:00    87.019522
2017-12-28 09:15:00    86.987446
2017-12-28 09:20:00    87.001815
2017-12-28 09:25:00    86.987101
                         ...    
2017-12-28 17:15:00    87.020385
2017-12-28 17:20:00    86.994998
2017-12-28 17:25:00    86.977800
2017-12-28 17:30:00    86.950000
2017-12-28 17:35:00    87.140000
Name: vwap, Length: 103, dtype: float64
2017-12-29 09:05:00    87.226855
2017-12-29 09:10:00    87.209752
2017-12-29 09:15:00    87.182142
2017-12-29 09:20:00    87.123220
2017-12-29 09:25:00    87.139336
                         ...    
2017-12-29 13:45:00    87.151205
2017-12-29 13:50:00    87.050940
2017-12-29 13:55:00    87.094599
2017-12-29 14:00:00    86.989608
2017-12-29 14:05:00    86.830000
Name: vwap, Length: 61, dtype: float64
2017-12-29 09:05:00    87.226855
2017-12-29 09:10:00    87.209752
2017-12-29 09:15:00    87.182142
2017-12-29 09:20:00    87.123220
2017-12-29 09:25:00    

Name: vwap, Length: 103, dtype: float64
2018-01-25 09:05:00    93.908457
2018-01-25 09:10:00    93.979236
2018-01-25 09:15:00    93.962141
2018-01-25 09:20:00    93.957796
2018-01-25 09:25:00    94.180388
                         ...    
2018-01-25 17:15:00    93.188129
2018-01-25 17:20:00    93.049464
2018-01-25 17:25:00    93.117089
2018-01-25 17:30:00    93.090000
2018-01-25 17:35:00    93.250000
Name: vwap, Length: 103, dtype: float64
2018-01-26 09:05:00    92.602698
2018-01-26 09:10:00    92.783692
2018-01-26 09:15:00    92.940334
2018-01-26 09:20:00    93.037723
2018-01-26 09:25:00    92.829897
                         ...    
2018-01-26 17:15:00    93.499699
2018-01-26 17:20:00    93.519700
2018-01-26 17:25:00    93.572852
2018-01-26 17:30:00    93.660000
2018-01-26 17:35:00    93.830000
Name: vwap, Length: 103, dtype: float64
2018-01-26 09:05:00    92.602698
2018-01-26 09:10:00    92.783692
2018-01-26 09:15:00    92.940334
2018-01-26 09:20:00    93.037723
2018-01-26 09:25:00   

Name: vwap, Length: 103, dtype: float64
2018-02-23 09:05:00    87.442653
2018-02-23 09:10:00    87.441391
2018-02-23 09:15:00    87.335504
2018-02-23 09:20:00    87.240231
2018-02-23 09:25:00    87.269650
                         ...    
2018-02-23 17:15:00    86.749735
2018-02-23 17:20:00    86.261670
2018-02-23 17:25:00    86.415116
2018-02-23 17:30:00    86.750000
2018-02-23 17:35:00    87.050000
Name: vwap, Length: 103, dtype: float64
2018-02-26 09:05:00    87.823237
2018-02-26 09:10:00    88.089984
2018-02-26 09:15:00    87.910514
2018-02-26 09:20:00    87.937004
2018-02-26 09:25:00    88.024152
                         ...    
2018-02-26 17:15:00    87.736073
2018-02-26 17:20:00    87.752460
2018-02-26 17:25:00    87.751105
2018-02-26 17:30:00    87.800000
2018-02-26 17:35:00    87.750000
Name: vwap, Length: 103, dtype: float64
2018-02-26 09:05:00    87.823237
2018-02-26 09:10:00    88.089984
2018-02-26 09:15:00    87.910514
2018-02-26 09:20:00    87.937004
2018-02-26 09:25:00   

Name: vwap, Length: 103, dtype: float64
2018-03-23 09:05:00    84.280115
2018-03-23 09:10:00    84.229070
2018-03-23 09:15:00    84.173990
2018-03-23 09:20:00    84.162570
2018-03-23 09:25:00    83.981030
                         ...    
2018-03-23 17:15:00    83.868433
2018-03-23 17:20:00    83.902475
2018-03-23 17:25:00    84.155251
2018-03-23 17:30:00    84.270000
2018-03-23 17:35:00    84.180000
Name: vwap, Length: 103, dtype: float64
2018-03-26 09:05:00    84.682257
2018-03-26 09:10:00    84.816532
2018-03-26 09:15:00    84.870888
2018-03-26 09:20:00    84.903643
2018-03-26 09:25:00    84.780540
                         ...    
2018-03-26 17:15:00    85.179834
2018-03-26 17:20:00    85.012495
2018-03-26 17:25:00    85.197804
2018-03-26 17:30:00    85.110000
2018-03-26 17:35:00    85.210000
Name: vwap, Length: 103, dtype: float64
2018-03-26 09:05:00    84.682257
2018-03-26 09:10:00    84.816532
2018-03-26 09:15:00    84.870888
2018-03-26 09:20:00    84.903643
2018-03-26 09:25:00   

2018-04-24 09:05:00    90.549555
2018-04-24 09:10:00    90.604463
2018-04-24 09:15:00    90.557153
2018-04-24 09:20:00    90.504104
2018-04-24 09:25:00    90.595366
                         ...    
2018-04-24 17:15:00    91.024752
2018-04-24 17:20:00    91.044453
2018-04-24 17:25:00    91.070191
2018-04-24 17:30:00    91.090000
2018-04-24 17:35:00    91.130000
Name: vwap, Length: 103, dtype: float64
2018-04-24 09:05:00    90.549555
2018-04-24 09:10:00    90.604463
2018-04-24 09:15:00    90.557153
2018-04-24 09:20:00    90.504104
2018-04-24 09:25:00    90.595366
                         ...    
2018-04-24 17:15:00    91.024752
2018-04-24 17:20:00    91.044453
2018-04-24 17:25:00    91.070191
2018-04-24 17:30:00    91.090000
2018-04-24 17:35:00    91.130000
Name: vwap, Length: 103, dtype: float64
2018-04-25 09:05:00    90.211256
2018-04-25 09:10:00    90.266923
2018-04-25 09:15:00    90.405624
2018-04-25 09:20:00    90.496588
2018-04-25 09:25:00    90.610543
                         ... 

Name: vwap, Length: 103, dtype: float64
2018-05-11 09:05:00    92.326839
2018-05-11 09:10:00    92.276043
2018-05-11 09:15:00    92.188298
2018-05-11 09:20:00    92.343741
2018-05-11 09:25:00    92.421275
                         ...    
2018-05-11 17:15:00    92.200846
2018-05-11 17:20:00    92.176856
2018-05-11 17:25:00    92.197101
2018-05-11 17:30:00    92.220000
2018-05-11 17:35:00    92.180000
Name: vwap, Length: 103, dtype: float64
2018-05-14 09:05:00    92.001605
2018-05-14 09:10:00    91.967325
2018-05-14 09:15:00    91.833795
2018-05-14 09:20:00    91.823536
2018-05-14 09:25:00    91.730816
                         ...    
2018-05-14 17:15:00    91.795955
2018-05-14 17:20:00    91.879491
2018-05-14 17:25:00    91.905203
2018-05-14 17:30:00    91.910000
2018-05-14 17:35:00    91.830000
Name: vwap, Length: 103, dtype: float64
2018-05-14 09:05:00    92.001605
2018-05-14 09:10:00    91.967325
2018-05-14 09:15:00    91.833795
2018-05-14 09:20:00    91.823536
2018-05-14 09:25:00   

2018-05-29 09:05:00    86.501267
2018-05-29 09:10:00    86.562224
2018-05-29 09:15:00    86.559167
2018-05-29 09:20:00    86.399572
2018-05-29 09:25:00    86.359871
                         ...    
2018-05-29 17:15:00    86.067946
2018-05-29 17:20:00    85.975321
2018-05-29 17:25:00    86.069060
2018-05-29 17:30:00    86.010103
2018-05-29 17:35:00    86.080000
Name: vwap, Length: 103, dtype: float64
2018-05-29 09:05:00    86.501267
2018-05-29 09:10:00    86.562224
2018-05-29 09:15:00    86.559167
2018-05-29 09:20:00    86.399572
2018-05-29 09:25:00    86.359871
                         ...    
2018-05-29 17:15:00    86.067946
2018-05-29 17:20:00    85.975321
2018-05-29 17:25:00    86.069060
2018-05-29 17:30:00    86.010103
2018-05-29 17:35:00    86.080000
Name: vwap, Length: 103, dtype: float64
2018-05-30 09:05:00    85.540858
2018-05-30 09:10:00    85.405750
2018-05-30 09:15:00    85.393628
2018-05-30 09:20:00    85.418347
2018-05-30 09:25:00    85.501186
                         ... 

Name: vwap, Length: 103, dtype: float64
2018-06-14 09:05:00    84.193159
2018-06-14 09:10:00    84.374780
2018-06-14 09:15:00    84.297953
2018-06-14 09:20:00    84.181193
2018-06-14 09:25:00    84.245021
                         ...    
2018-06-14 17:15:00    86.170908
2018-06-14 17:20:00    86.209239
2018-06-14 17:25:00    86.207326
2018-06-14 17:30:00    86.310000
2018-06-14 17:35:00    86.270000
Name: vwap, Length: 103, dtype: float64
2018-06-15 09:05:00    86.696049
2018-06-15 09:10:00    86.684796
2018-06-15 09:15:00    86.904634
2018-06-15 09:20:00    86.939781
2018-06-15 09:25:00    87.070429
                         ...    
2018-06-15 17:15:00    85.968725
2018-06-15 17:20:00    85.988484
2018-06-15 17:25:00    85.943560
2018-06-15 17:30:00    85.880000
2018-06-15 17:35:00    85.740000
Name: vwap, Length: 103, dtype: float64
2018-06-15 09:05:00    86.696049
2018-06-15 09:10:00    86.684796
2018-06-15 09:15:00    86.904634
2018-06-15 09:20:00    86.939781
2018-06-15 09:25:00   

Name: vwap, Length: 103, dtype: float64
2018-06-29 09:05:00    78.695399
2018-06-29 09:10:00    78.767882
2018-06-29 09:15:00    78.552483
2018-06-29 09:20:00    78.447231
2018-06-29 09:25:00    78.545099
                         ...    
2018-06-29 17:15:00    77.824855
2018-06-29 17:20:00    77.810090
2018-06-29 17:25:00    77.859850
2018-06-29 17:30:00    77.840000
2018-06-29 17:35:00    77.630000
Name: vwap, Length: 103, dtype: float64
2018-07-02 09:05:00    76.782679
2018-07-02 09:10:00    76.850762
2018-07-02 09:15:00    76.877033
2018-07-02 09:20:00    76.844081
2018-07-02 09:25:00    76.709444
                         ...    
2018-07-02 17:15:00    77.815538
2018-07-02 17:20:00    77.868142
2018-07-02 17:25:00    77.864284
2018-07-02 17:30:00    77.809293
2018-07-02 17:35:00    77.730000
Name: vwap, Length: 103, dtype: float64
2018-07-02 09:05:00    76.782679
2018-07-02 09:10:00    76.850762
2018-07-02 09:15:00    76.877033
2018-07-02 09:20:00    76.844081
2018-07-02 09:25:00   

Name: vwap, Length: 103, dtype: float64
2018-07-27 09:05:00    83.521213
2018-07-27 09:10:00    83.720487
2018-07-27 09:15:00    83.807278
2018-07-27 09:20:00    83.888905
2018-07-27 09:25:00    83.982274
                         ...    
2018-07-27 17:15:00    82.873806
2018-07-27 17:20:00    82.937968
2018-07-27 17:25:00    83.014202
2018-07-27 17:30:00    82.979799
2018-07-27 17:35:00    82.910000
Name: vwap, Length: 103, dtype: float64
2018-07-30 09:05:00    82.524957
2018-07-30 09:10:00    82.666729
2018-07-30 09:15:00    82.745790
2018-07-30 09:20:00    82.765620
2018-07-30 09:25:00    82.762261
                         ...    
2018-07-30 17:15:00    83.044238
2018-07-30 17:20:00    83.070909
2018-07-30 17:25:00    83.064730
2018-07-30 17:30:00    83.050000
2018-07-30 17:35:00    82.970000
Name: vwap, Length: 103, dtype: float64
2018-07-30 09:05:00    82.524957
2018-07-30 09:10:00    82.666729
2018-07-30 09:15:00    82.745790
2018-07-30 09:20:00    82.765620
2018-07-30 09:25:00   

Name: vwap, Length: 103, dtype: float64
2018-08-24 09:05:00    81.197737
2018-08-24 09:10:00    81.199453
2018-08-24 09:15:00    81.213575
2018-08-24 09:20:00    81.110851
2018-08-24 09:25:00    81.178924
                         ...    
2018-08-24 17:15:00    81.253302
2018-08-24 17:20:00    81.243077
2018-08-24 17:25:00    81.246304
2018-08-24 17:30:00    81.290000
2018-08-24 17:35:00    81.290000
Name: vwap, Length: 103, dtype: float64
2018-08-27 09:05:00    82.178137
2018-08-27 09:10:00    82.362381
2018-08-27 09:15:00    82.447104
2018-08-27 09:20:00    82.386936
2018-08-27 09:25:00    82.339240
                         ...    
2018-08-27 17:15:00    82.989943
2018-08-27 17:20:00    82.989473
2018-08-27 17:25:00    83.016597
2018-08-27 17:30:00    83.060000
2018-08-27 17:35:00    83.190000
Name: vwap, Length: 103, dtype: float64
2018-08-27 09:05:00    82.178137
2018-08-27 09:10:00    82.362381
2018-08-27 09:15:00    82.447104
2018-08-27 09:20:00    82.386936
2018-08-27 09:25:00   

Name: vwap, Length: 103, dtype: float64
2018-09-21 09:05:00    86.274740
2018-09-21 09:10:00    86.317647
2018-09-21 09:15:00    86.572338
2018-09-21 09:20:00    86.620302
2018-09-21 09:25:00    86.414604
                         ...    
2018-09-21 17:15:00    85.796028
2018-09-21 17:20:00    85.743318
2018-09-21 17:25:00    85.693197
2018-09-21 17:30:00    85.820000
2018-09-21 17:35:00    85.770000
Name: vwap, Length: 103, dtype: float64
2018-09-24 09:05:00    85.191656
2018-09-24 09:10:00    85.011029
2018-09-24 09:15:00    84.930460
2018-09-24 09:20:00    84.980753
2018-09-24 09:25:00    85.011617
                         ...    
2018-09-24 17:15:00    83.598879
2018-09-24 17:20:00    83.627218
2018-09-24 17:25:00    83.623564
2018-09-24 17:30:00    83.639728
2018-09-24 17:35:00    83.500000
Name: vwap, Length: 103, dtype: float64
2018-09-24 09:05:00    85.191656
2018-09-24 09:10:00    85.011029
2018-09-24 09:15:00    84.930460
2018-09-24 09:20:00    84.980753
2018-09-24 09:25:00   

2018-10-10 09:05:00    76.063788
2018-10-10 09:10:00    75.976860
2018-10-10 09:15:00    75.960726
2018-10-10 09:20:00    75.908581
2018-10-10 09:25:00    75.777758
                         ...    
2018-10-10 17:15:00    75.421089
2018-10-10 17:20:00    75.452368
2018-10-10 17:25:00    75.484225
2018-10-10 17:30:00    75.420000
2018-10-10 17:35:00    75.350000
Name: vwap, Length: 103, dtype: float64
2018-10-10 09:05:00    76.063788
2018-10-10 09:10:00    75.976860
2018-10-10 09:15:00    75.960726
2018-10-10 09:20:00    75.908581
2018-10-10 09:25:00    75.777758
                         ...    
2018-10-10 17:15:00    75.421089
2018-10-10 17:20:00    75.452368
2018-10-10 17:25:00    75.484225
2018-10-10 17:30:00    75.420000
2018-10-10 17:35:00    75.350000
Name: vwap, Length: 103, dtype: float64
2018-10-11 09:05:00    73.940202
2018-10-11 09:10:00    74.292097
2018-10-11 09:15:00    74.745580
2018-10-11 09:20:00    74.792999
2018-10-11 09:25:00    74.579539
                         ... 

2018-10-26 09:05:00    73.749985
2018-10-26 09:10:00    73.777625
2018-10-26 09:15:00    73.592013
2018-10-26 09:20:00    73.784840
2018-10-26 09:25:00    73.728397
                         ...    
2018-10-26 17:15:00    74.249998
2018-10-26 17:20:00    74.365058
2018-10-26 17:25:00    74.434743
2018-10-26 17:30:00    74.490000
2018-10-26 17:35:00    75.020000
Name: vwap, Length: 103, dtype: float64
2018-10-26 09:05:00    73.749985
2018-10-26 09:10:00    73.777625
2018-10-26 09:15:00    73.592013
2018-10-26 09:20:00    73.784840
2018-10-26 09:25:00    73.728397
                         ...    
2018-10-26 17:15:00    74.249998
2018-10-26 17:20:00    74.365058
2018-10-26 17:25:00    74.434743
2018-10-26 17:30:00    74.490000
2018-10-26 17:35:00    75.020000
Name: vwap, Length: 103, dtype: float64
2018-10-29 09:05:00    75.449135
2018-10-29 09:10:00    75.577000
2018-10-29 09:15:00    75.699856
2018-10-29 09:20:00    76.080746
2018-10-29 09:25:00    75.923705
                         ... 

2018-11-15 09:05:00    73.825910
2018-11-15 09:10:00    74.057925
2018-11-15 09:15:00    74.223219
2018-11-15 09:20:00    74.239401
2018-11-15 09:25:00    74.030924
                         ...    
2018-11-15 17:15:00    74.040440
2018-11-15 17:20:00    74.062314
2018-11-15 17:25:00    74.031272
2018-11-15 17:30:00    74.120000
2018-11-15 17:35:00    74.200000
Name: vwap, Length: 103, dtype: float64
2018-11-15 09:05:00    73.825910
2018-11-15 09:10:00    74.057925
2018-11-15 09:15:00    74.223219
2018-11-15 09:20:00    74.239401
2018-11-15 09:25:00    74.030924
                         ...    
2018-11-15 17:15:00    74.040440
2018-11-15 17:20:00    74.062314
2018-11-15 17:25:00    74.031272
2018-11-15 17:30:00    74.120000
2018-11-15 17:35:00    74.200000
Name: vwap, Length: 103, dtype: float64
2018-11-16 09:05:00    74.610886
2018-11-16 09:10:00    74.829217
2018-11-16 09:15:00    74.816548
2018-11-16 09:20:00    74.637595
2018-11-16 09:25:00    74.638790
                         ... 

Name: vwap, Length: 103, dtype: float64
2018-11-30 09:05:00    72.656637
2018-11-30 09:10:00    72.345331
2018-11-30 09:15:00    72.163093
2018-11-30 09:20:00    72.172788
2018-11-30 09:25:00    72.192103
                         ...    
2018-11-30 17:15:00    72.199514
2018-11-30 17:20:00    72.291999
2018-11-30 17:25:00    72.303073
2018-11-30 17:30:00    72.200000
2018-11-30 17:35:00    72.210000
Name: vwap, Length: 103, dtype: float64
2018-12-03 09:05:00    76.649699
2018-12-03 09:10:00    76.464569
2018-12-03 09:15:00    76.779579
2018-12-03 09:20:00    77.142811
2018-12-03 09:25:00    76.677425
                         ...    
2018-12-03 17:15:00    75.768662
2018-12-03 17:20:00    75.859921
2018-12-03 17:25:00    75.773589
2018-12-03 17:30:00    75.700000
2018-12-03 17:35:00    75.660000
Name: vwap, Length: 103, dtype: float64
2018-12-03 09:05:00    76.649699
2018-12-03 09:10:00    76.464569
2018-12-03 09:15:00    76.779579
2018-12-03 09:20:00    77.142811
2018-12-03 09:25:00   

Name: vwap, Length: 103, dtype: float64
2019-01-04 09:05:00    70.147053
2019-01-04 09:10:00    70.150704
2019-01-04 09:15:00    69.937455
2019-01-04 09:20:00    70.105706
2019-01-04 09:25:00    70.111344
                         ...    
2019-01-04 17:15:00    71.540410
2019-01-04 17:20:00    71.508443
2019-01-04 17:25:00    71.560315
2019-01-04 17:30:00    71.650000
2019-01-04 17:35:00    71.710000
Name: vwap, Length: 103, dtype: float64
2019-01-07 09:05:00    71.894880
2019-01-07 09:10:00    72.001596
2019-01-07 09:15:00    71.674053
2019-01-07 09:20:00    71.583066
2019-01-07 09:25:00    71.672584
                         ...    
2019-01-07 17:15:00    72.119984
2019-01-07 17:20:00    72.104967
2019-01-07 17:25:00    72.028430
2019-01-07 17:30:00    72.030000
2019-01-07 17:35:00    72.120000
Name: vwap, Length: 103, dtype: float64
2019-01-07 09:05:00    71.894880
2019-01-07 09:10:00    72.001596
2019-01-07 09:15:00    71.674053
2019-01-07 09:20:00    71.583066
2019-01-07 09:25:00   

Name: vwap, Length: 103, dtype: float64
2019-02-01 09:05:00    73.454541
2019-02-01 09:10:00    73.589164
2019-02-01 09:15:00    73.742627
2019-02-01 09:20:00    73.670162
2019-02-01 09:25:00    73.776582
                         ...    
2019-02-01 17:15:00    73.933291
2019-02-01 17:20:00    73.992907
2019-02-01 17:25:00    74.169715
2019-02-01 17:30:00    74.120000
2019-02-01 17:35:00    74.140000
Name: vwap, Length: 103, dtype: float64
2019-02-04 09:05:00    74.165634
2019-02-04 09:10:00    73.738790
2019-02-04 09:15:00    73.568058
2019-02-04 09:20:00    73.616239
2019-02-04 09:25:00    73.522982
                         ...    
2019-02-04 17:15:00    73.017707
2019-02-04 17:20:00    72.979462
2019-02-04 17:25:00    72.977941
2019-02-04 17:30:00    72.980000
2019-02-04 17:35:00    73.240000
Name: vwap, Length: 103, dtype: float64
2019-02-04 09:05:00    74.165634
2019-02-04 09:10:00    73.738790
2019-02-04 09:15:00    73.568058
2019-02-04 09:20:00    73.616239
2019-02-04 09:25:00   

2019-02-28 09:05:00    73.961733
2019-02-28 09:10:00    73.773410
2019-02-28 09:15:00    73.843997
2019-02-28 09:20:00    73.879243
2019-02-28 09:25:00    73.782867
                         ...    
2019-02-28 17:15:00    74.324477
2019-02-28 17:20:00    74.329717
2019-02-28 17:25:00    74.366896
2019-02-28 17:30:00    74.420000
2019-02-28 17:35:00    74.310000
Name: vwap, Length: 103, dtype: float64
2019-02-28 09:05:00    73.961733
2019-02-28 09:10:00    73.773410
2019-02-28 09:15:00    73.843997
2019-02-28 09:20:00    73.879243
2019-02-28 09:25:00    73.782867
                         ...    
2019-02-28 17:15:00    74.324477
2019-02-28 17:20:00    74.329717
2019-02-28 17:25:00    74.366896
2019-02-28 17:30:00    74.420000
2019-02-28 17:35:00    74.310000
Name: vwap, Length: 103, dtype: float64
2019-03-01 09:05:00    74.845065
2019-03-01 09:10:00    74.888183
2019-03-01 09:15:00    74.872358
2019-03-01 09:20:00    74.800628
2019-03-01 09:25:00    74.890106
                         ... 

2019-03-15 09:05:00    73.894916
2019-03-15 09:10:00    73.889594
2019-03-15 09:15:00    73.942378
2019-03-15 09:20:00    73.897412
2019-03-15 09:25:00    73.965035
                         ...    
2019-03-15 17:15:00    74.172976
2019-03-15 17:20:00    74.206883
2019-03-15 17:25:00    74.218204
2019-03-15 17:30:00    74.200000
2019-03-15 17:35:00    74.090000
Name: vwap, Length: 103, dtype: float64
2019-03-15 09:05:00    73.894916
2019-03-15 09:10:00    73.889594
2019-03-15 09:15:00    73.942378
2019-03-15 09:20:00    73.897412
2019-03-15 09:25:00    73.965035
                         ...    
2019-03-15 17:15:00    74.172976
2019-03-15 17:20:00    74.206883
2019-03-15 17:25:00    74.218204
2019-03-15 17:30:00    74.200000
2019-03-15 17:35:00    74.090000
Name: vwap, Length: 103, dtype: float64
2019-03-18 09:05:00    74.041323
2019-03-18 09:10:00    74.188422
2019-03-18 09:15:00    74.298070
2019-03-18 09:20:00    74.194535
2019-03-18 09:25:00    74.171754
                         ... 

Name: vwap, Length: 103, dtype: float64
2019-04-17 09:05:00    77.039233
2019-04-17 09:10:00    76.907242
2019-04-17 09:15:00    76.630316
2019-04-17 09:20:00    76.551921
2019-04-17 09:25:00    76.520053
                         ...    
2019-04-17 17:15:00    77.391912
2019-04-17 17:20:00    77.361679
2019-04-17 17:25:00    77.296065
2019-04-17 17:30:00    77.230000
2019-04-17 17:35:00    77.250000
Name: vwap, Length: 103, dtype: float64
2019-04-18 09:05:00    77.046387
2019-04-18 09:10:00    76.999642
2019-04-18 09:15:00    77.114478
2019-04-18 09:20:00    77.262330
2019-04-18 09:25:00    77.333536
                         ...    
2019-04-18 17:15:00    77.670155
2019-04-18 17:20:00    77.592628
2019-04-18 17:25:00    77.579661
2019-04-18 17:30:00    77.600000
2019-04-18 17:35:00    77.750000
Name: vwap, Length: 103, dtype: float64
2019-04-18 09:05:00    77.046387
2019-04-18 09:10:00    76.999642
2019-04-18 09:15:00    77.114478
2019-04-18 09:20:00    77.262330
2019-04-18 09:25:00   

Name: vwap, Length: 103, dtype: float64
2019-05-07 09:05:00    73.341946
2019-05-07 09:10:00    73.233268
2019-05-07 09:15:00    73.435543
2019-05-07 09:20:00    73.580388
2019-05-07 09:25:00    73.338872
                         ...    
2019-05-07 17:15:00    71.898580
2019-05-07 17:20:00    71.932786
2019-05-07 17:25:00    71.823727
2019-05-07 17:30:00    71.840000
2019-05-07 17:35:00    72.280000
Name: vwap, Length: 103, dtype: float64
2019-05-08 09:05:00    71.596520
2019-05-08 09:10:00    71.587009
2019-05-08 09:15:00    71.521584
2019-05-08 09:20:00    71.505021
2019-05-08 09:25:00    71.554690
                         ...    
2019-05-08 17:15:00    71.542203
2019-05-08 17:20:00    71.493637
2019-05-08 17:25:00    71.452014
2019-05-08 17:30:00    71.520231
2019-05-08 17:35:00    71.380000
Name: vwap, Length: 103, dtype: float64
2019-05-08 09:05:00    71.596520
2019-05-08 09:10:00    71.587009
2019-05-08 09:15:00    71.521584
2019-05-08 09:20:00    71.505021
2019-05-08 09:25:00   

Name: vwap, Length: 103, dtype: float64
2019-06-06 09:05:00    62.405293
2019-06-06 09:10:00    62.443690
2019-06-06 09:15:00    62.707742
2019-06-06 09:20:00    62.865870
2019-06-06 09:25:00    62.781936
                         ...    
2019-06-06 17:15:00    62.339661
2019-06-06 17:20:00    62.339456
2019-06-06 17:25:00    62.276727
2019-06-06 17:30:00    62.299334
2019-06-06 17:35:00    62.340000
Name: vwap, Length: 103, dtype: float64
2019-06-07 09:05:00    62.649762
2019-06-07 09:10:00    62.597454
2019-06-07 09:15:00    62.523632
2019-06-07 09:20:00    62.453222
2019-06-07 09:25:00    62.391416
                         ...    
2019-06-07 17:15:00    62.424870
2019-06-07 17:20:00    62.407551
2019-06-07 17:25:00    62.309462
2019-06-07 17:30:00    62.289561
2019-06-07 17:35:00    62.320000
Name: vwap, Length: 103, dtype: float64
2019-06-07 09:05:00    62.649762
2019-06-07 09:10:00    62.597454
2019-06-07 09:15:00    62.523632
2019-06-07 09:20:00    62.453222
2019-06-07 09:25:00   

Name: vwap, Length: 103, dtype: float64
2019-06-24 09:05:00    64.040236
2019-06-24 09:10:00    63.943829
2019-06-24 09:15:00    64.046447
2019-06-24 09:20:00    63.916798
2019-06-24 09:25:00    63.920702
                         ...    
2019-06-24 17:15:00    63.761725
2019-06-24 17:20:00    63.716315
2019-06-24 17:25:00    63.705462
2019-06-24 17:30:00    63.730000
2019-06-24 17:35:00    63.640000
Name: vwap, Length: 103, dtype: float64
2019-06-25 09:05:00    63.287490
2019-06-25 09:10:00    63.346100
2019-06-25 09:15:00    63.471323
2019-06-25 09:20:00    63.478325
2019-06-25 09:25:00    63.511753
                         ...    
2019-06-25 17:15:00    63.252240
2019-06-25 17:20:00    63.277222
2019-06-25 17:25:00    63.281950
2019-06-25 17:30:00    63.280000
2019-06-25 17:35:00    63.150000
Name: vwap, Length: 103, dtype: float64
2019-06-25 09:05:00    63.287490
2019-06-25 09:10:00    63.346100
2019-06-25 09:15:00    63.471323
2019-06-25 09:20:00    63.478325
2019-06-25 09:25:00   

Name: vwap, Length: 103, dtype: float64
2019-07-23 09:05:00    68.176453
2019-07-23 09:10:00    68.418442
2019-07-23 09:15:00    68.547410
2019-07-23 09:20:00    68.648714
2019-07-23 09:25:00    69.017590
                         ...    
2019-07-23 17:15:00    69.389198
2019-07-23 17:20:00    69.419436
2019-07-23 17:25:00    69.338404
2019-07-23 17:30:00    69.350000
2019-07-23 17:35:00    69.340000
Name: vwap, Length: 103, dtype: float64
2019-07-24 09:05:00    69.090459
2019-07-24 09:10:00    68.832959
2019-07-24 09:15:00    68.586783
2019-07-24 09:20:00    68.572508
2019-07-24 09:25:00    68.730726
                         ...    
2019-07-24 17:15:00    70.048071
2019-07-24 17:20:00    69.964626
2019-07-24 17:25:00    69.895167
2019-07-24 17:30:00    69.890000
2019-07-24 17:35:00    70.010000
Name: vwap, Length: 103, dtype: float64
2019-07-24 09:05:00    69.090459
2019-07-24 09:10:00    68.832959
2019-07-24 09:15:00    68.586783
2019-07-24 09:20:00    68.572508
2019-07-24 09:25:00   

2019-08-21 09:05:00    60.064231
2019-08-21 09:10:00    60.164410
2019-08-21 09:15:00    60.202840
2019-08-21 09:20:00    60.200516
2019-08-21 09:25:00    60.149526
                         ...    
2019-08-21 17:15:00    60.593216
2019-08-21 17:20:00    60.576990
2019-08-21 17:25:00    60.576270
2019-08-21 17:30:00    60.578943
2019-08-21 17:35:00    60.590000
Name: vwap, Length: 103, dtype: float64
2019-08-21 09:05:00    60.064231
2019-08-21 09:10:00    60.164410
2019-08-21 09:15:00    60.202840
2019-08-21 09:20:00    60.200516
2019-08-21 09:25:00    60.149526
                         ...    
2019-08-21 17:15:00    60.593216
2019-08-21 17:20:00    60.576990
2019-08-21 17:25:00    60.576270
2019-08-21 17:30:00    60.578943
2019-08-21 17:35:00    60.590000
Name: vwap, Length: 103, dtype: float64
2019-08-22 09:05:00    60.471285
2019-08-22 09:10:00    60.391720
2019-08-22 09:15:00    60.556897
2019-08-22 09:20:00    60.625244
2019-08-22 09:25:00    60.580381
                         ... 

Name: vwap, Length: 103, dtype: float64
2019-09-05 09:05:00    62.119890
2019-09-05 09:10:00    62.048025
2019-09-05 09:15:00    61.964009
2019-09-05 09:20:00    61.884864
2019-09-05 09:25:00    61.858924
                         ...    
2019-09-05 17:15:00    62.285928
2019-09-05 17:20:00    62.274634
2019-09-05 17:25:00    62.290702
2019-09-05 17:30:00    62.270000
2019-09-05 17:35:00    62.320000
Name: vwap, Length: 103, dtype: float64
2019-09-05 09:05:00    62.119890
2019-09-05 09:10:00    62.048025
2019-09-05 09:15:00    61.964009
2019-09-05 09:20:00    61.884864
2019-09-05 09:25:00    61.858924
                         ...    
2019-09-05 17:15:00    62.285928
2019-09-05 17:20:00    62.274634
2019-09-05 17:25:00    62.290702
2019-09-05 17:30:00    62.270000
2019-09-05 17:35:00    62.320000
Name: vwap, Length: 103, dtype: float64
2019-09-06 09:05:00    62.635309
2019-09-06 09:10:00    62.666451
2019-09-06 09:15:00    62.738213
2019-09-06 09:20:00    62.728193
2019-09-06 09:25:00   

Name: vwap, Length: 103, dtype: float64
2019-09-19 09:05:00    64.445246
2019-09-19 09:10:00    64.629539
2019-09-19 09:15:00    64.543331
2019-09-19 09:20:00    64.598901
2019-09-19 09:25:00    64.622916
                         ...    
2019-09-19 17:15:00    65.008870
2019-09-19 17:20:00    65.020167
2019-09-19 17:25:00    65.062568
2019-09-19 17:30:00    65.050837
2019-09-19 17:35:00    64.990000
Name: vwap, Length: 103, dtype: float64
2019-09-20 09:05:00    65.174003
2019-09-20 09:10:00    65.087211
2019-09-20 09:15:00    64.994717
2019-09-20 09:20:00    65.058187
2019-09-20 09:25:00    65.056589
                         ...    
2019-09-20 17:15:00    65.145149
2019-09-20 17:20:00    65.103728
2019-09-20 17:25:00    65.022651
2019-09-20 17:30:00    65.040000
2019-09-20 17:35:00    65.150000
Name: vwap, Length: 103, dtype: float64
2019-09-20 09:05:00    65.174003
2019-09-20 09:10:00    65.087211
2019-09-20 09:15:00    64.994717
2019-09-20 09:20:00    65.058187
2019-09-20 09:25:00   

In [47]:
print("Backtesting performance of dynamic predictor using segmented data ...\n")
mean_bps_diff_buys, sd_bps_diff_buys, mean_bps_diff_sells, sd_bps_diff_sells, percentiles_diff_vwap_buys, percentiles_diff_vwap_sells = get_algo_performance_by_regime(intraday_regime_train, intraday_regime_test, daily_regime_test[[VWAP, "regime"]], dynamic_flag=True, regimes=regime_intraday['regime'].unique())
print("The performance of the algorithm using dynamic predictor with segmented data on " + current_ticker + " is")
print("Mean: ±%f\nStandard Dev: ±%f" % (np.abs(mean_bps_diff_buys), sd_bps_diff_buys))
print("-"*80)

Backtesting performance of dynamic predictor using segmented data ...

Len result: 503
Len daily_vwap: 503
The performance of the algorithm using dynamic predictor with segmented data on BMW is
Mean: ±0.866964
Standard Dev: ±13.576209
--------------------------------------------------------------------------------


In [61]:
train_features_df, test_features_df = split_train_test_data(features_df[['betas_market_impact']], 2)

model_prediction = fit_model(best_gmm, features_df[['betas_market_impact']], train_features_df, ['betas_market_impact'])
regime = pd.Series(data = model_prediction, index = features_df.index).rename("regime")
regime = regime.resample("5T").asfreq().fillna(method="ffill")

regime_intraday = pd.merge(intraday_data, regime, left_index=True, right_index=True)
regime_intraday = regime_intraday.astype({"regime":"int32"})

regime_daily = pd.merge(daily_data, regime, left_index=True, right_index=True)
regime_daily = regime_daily.astype({"regime":"int32"})

intraday_regime_train, intraday_regime_test = split_train_test_data(regime_intraday, 2)
daily_regime_train, daily_regime_test = split_train_test_data(regime_daily, 2)

#     print("Model used: GMM, features used: [{}]\n".format(", ".join(features_gmm)))
print("Backtesting performance of static predictor using segmented data...\n")
mean_bps_diff_buys, sd_bps_diff_buys, mean_bps_diff_sells, sd_bps_diff_sells, percentiles_diff_vwap_buys, percentiles_diff_vwap_sells = get_algo_performance_by_regime(intraday_regime_train, intraday_regime_test, daily_regime_test[[VWAP, "regime"]], dynamic_flag=False, regimes=regime_intraday['regime'].unique())
print("The performance of the algorithm using static predictor with segmented data on " + current_ticker + " is")
print("Mean: ±%f\nStandard Dev: ±%f" % (np.abs(mean_bps_diff_buys), sd_bps_diff_buys))
print("-"*80)

print("Backtesting performance of dynamic predictor using segmented data ...\n")
mean_bps_diff_buys, sd_bps_diff_buys, mean_bps_diff_sells, sd_bps_diff_sells, percentiles_diff_vwap_buys, percentiles_diff_vwap_sells = get_algo_performance_by_regime(intraday_regime_train, intraday_regime_test, daily_regime_test[[VWAP, "regime"]], dynamic_flag=True, regimes=regime_intraday['regime'].unique())
print("The performance of the algorithm using dynamic predictor with segmented data on " + current_ticker + " is")
print("Mean: ±%f\nStandard Dev: ±%f" % (np.abs(mean_bps_diff_buys), sd_bps_diff_buys))
print("-"*80)

Backtesting performance of static predictor using segmented data...

(103,)
156450.6
(103,)
(103,)
(103,)
156450.6
(103,)
(103,)
(103,)
160572.7
(103,)
(103,)
(103,)
160572.7
(103,)
(103,)
(103,)
164151.15000000002
(103,)
(103,)
(103,)
164151.15000000002
(103,)
(103,)
(103,)
173336.2
(103,)
(103,)
(103,)
173336.2
(103,)
(103,)
(103,)
138200.9
(103,)
(103,)
(103,)
138200.9
(103,)
(103,)
(103,)
156450.6
(103,)
(103,)
(103,)
156450.6
(103,)
(103,)
(103,)
160572.7
(103,)
(103,)
(103,)
160572.7
(103,)
(103,)
(103,)
164151.15000000002
(103,)
(103,)
(103,)
164151.15000000002
(103,)
(103,)
(103,)
173336.2
(103,)
(103,)
(103,)
173336.2
(103,)
(103,)
(103,)
138200.9
(103,)
(103,)
(103,)
138200.9
(103,)
(103,)
(103,)
156450.6
(103,)
(103,)
(103,)
156450.6
(103,)
(103,)
(103,)
160572.7
(103,)
(103,)
(103,)
160572.7
(103,)
(103,)
(103,)
164151.15000000002
(103,)
(103,)
(103,)
164151.15000000002
(103,)
(103,)
(103,)
173336.2
(103,)
(103,)
(103,)
173336.2
(103,)
(103,)
(103,)
138200.9
(103,)
(103,)
(

(103,)
156450.6
(103,)
(103,)
(103,)
156450.6
(103,)
(103,)
(103,)
160572.7
(103,)
(103,)
(103,)
160572.7
(103,)
(103,)
(103,)
164151.15000000002
(103,)
(103,)
(103,)
164151.15000000002
(103,)
(103,)
(103,)
173336.2
(103,)
(103,)
(103,)
173336.2
(103,)
(103,)
(103,)
138200.9
(103,)
(103,)
(103,)
138200.9
(103,)
(103,)
(103,)
156450.6
(103,)
(103,)
(103,)
156450.6
(103,)
(103,)
(0,)
nan
(103,)
(0,)


ValueError: operands could not be broadcast together with shapes (0,) (103,) 

In [14]:
features_list = get_features_series_list(daily_data)
features_df = get_features_df(features_list)

best_gmm, features_gmm = get_best_gmm(features_list)
best_bgm, features_bgm = get_best_bgm(features_list)
best_ghmm, features_ghmm = get_best_ghmm(features_list)

##recuperar features

train_gmm, test_gmm = split_train_test_data(features_df[features_gmm], 2)

model_prediction = fit_model(best_gmm, features_df[features_gmm], train_gmm, features_gmm)
regime = pd.Series(data = model_prediction, index = features_df.index).rename("regime")
regime = regime.resample("5T").asfreq().fillna(method="ffill")

Optimal params are {'n_components': 4, 'covariance_type': 'tied', 'max_iter': 100} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Silhouette Score of 0.3543256696604773
Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 800, 'weight_concentration_prior_type': 'dirichlet_process'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Silhouette Score of 0.40578204755528824
Optimal params are {'n_components': 3, 'covariance_type': 'tied', 'max_iter': 200, 'algorithm': 'viterbi'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Silhouette Score of 0.4871622821911892


In [17]:
regime_intraday = pd.merge(intraday_data, regime, left_index=True, right_index=True)
regime_intraday = regime_intraday.astype({"regime":"int32"})

regime_daily = pd.merge(intraday_data, regime, left_index=True, right_index=True)
regime_daily = regime_daily.astype({"regime":"int32"})

intraday_regime_train, intraday_regime_test = split_train_test_data(regime_intraday, 2)
daily_regime_train, daily_regime_test = split_train_test_data(regime_daily, 2)

In [74]:
n_components = [2,3,4]
cov_type = ['full', 'diag', 'spherical', 'tied']
max_iter = [100, 200, 400, 800]

dict_scores = {}
dict_features = {}
for features_combination in list(powerset(features_list)):
    features_df = get_features_df(features_combination)
    train_data, test_data = split_train_test_data(features_df, 2)
    for n_comp in n_components:
        for cov in cov_type:
            for iters in max_iter:
                model = EvaluatedGMM(n_comp, cov, iters, train_data, test_data)
                model.fit_model()
                sc = model.compute_silhouette_score()
                dict_scores[sc] = model.get_params()
                dict_features[sc] = model.get_features()

max_sc = max(list(dict_scores.keys()))
max_params = dict_scores.get(max_sc)
features_used = dict_features.get(max_sc)
print("Optimal params are {} using {} obtaining a Silhouette Score of {}".format(max_params, list(features_used), max_sc))

best_model = GaussianMixture(n_components=max_params.get('n_components'), 
                             covariance_type=max_params.get('covariance_type'), 
                             max_iter=max_params.get('max_iter'), 
                             n_init=100)

ValueError: Number of labels is 1. Valid values are 2 to n_samples - 1 (inclusive)

# COMPARACIÓN

In [42]:
intraday_data

Unnamed: 0_level_0,open,low,high,close,volume,turnover,vwap
date_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2003-06-19 17:35:00,32.00,31.96,32.02,31.96,41910,1339443.60,31.960000
2003-06-20 09:05:00,31.70,31.61,31.76,31.73,19388,614962.85,31.718736
2003-06-20 09:10:00,31.71,31.65,31.73,31.72,12301,390105.50,31.713316
2003-06-20 09:15:00,31.73,31.65,31.78,31.75,10060,319272.22,31.736801
2003-06-20 09:20:00,31.80,31.80,31.97,31.84,25388,809844.84,31.898725
...,...,...,...,...,...,...,...
2019-10-11 17:15:00,64.77,64.77,64.87,64.77,33398,2164312.07,64.803643
2019-10-11 17:20:00,64.76,64.72,64.86,64.84,55348,3585546.75,64.781867
2019-10-11 17:25:00,64.83,64.80,64.87,64.83,56676,3674304.05,64.829982
2019-10-11 17:30:00,64.84,64.80,64.84,64.80,6646,430660.80,64.800000


In [None]:
print("Backtesting {}. Using data from {}\n".format(current_ticker, daily_data.index[0].year))
first_year = intraday_data.index[0].year
if from_year >= first_year:
    intraday_data = intraday_data[intraday_data.index.year >= from_year]

print("Backtesting {}. Using data from {}\n".format(ticker, formatted_df.index[0].year))

enough_data = True

last_year = intraday_data.index.year.unique()[-1]
algo_df = prepareDataframe(formatted_df)
train = algo_df[algo_df.index.year < last_year]
test = algo_df[algo_df.index.year >= last_year]

## If test data has less than 150 business days, use previous year
if len(test) < 150*102:
    train = algo_df[algo_df.index.year < (last_year - 1)]
    test = algo_df[algo_df.index.year >= (last_year - 1)]
    enough_data = False

In [43]:
intraday_data

Unnamed: 0_level_0,open,low,high,close,volume,turnover,vwap
date_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2003-06-19 17:35:00,32.00,31.96,32.02,31.96,41910,1339443.60,31.960000
2003-06-20 09:05:00,31.70,31.61,31.76,31.73,19388,614962.85,31.718736
2003-06-20 09:10:00,31.71,31.65,31.73,31.72,12301,390105.50,31.713316
2003-06-20 09:15:00,31.73,31.65,31.78,31.75,10060,319272.22,31.736801
2003-06-20 09:20:00,31.80,31.80,31.97,31.84,25388,809844.84,31.898725
...,...,...,...,...,...,...,...
2019-10-11 17:15:00,64.77,64.77,64.87,64.77,33398,2164312.07,64.803643
2019-10-11 17:20:00,64.76,64.72,64.86,64.84,55348,3585546.75,64.781867
2019-10-11 17:25:00,64.83,64.80,64.87,64.83,56676,3674304.05,64.829982
2019-10-11 17:30:00,64.84,64.80,64.84,64.80,6646,430660.80,64.800000
