# Backtest VWAP performance

## Imports

In [1]:
import os
import pandas as pd
import numpy as np
import warnings
import math

from datetime import datetime
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split, TimeSeriesSplit, GridSearchCV
from sklearn.mixture import GaussianMixture, BayesianGaussianMixture
from sklearn.metrics import silhouette_score
from hmmlearn.hmm import GaussianHMM, GMMHMM, MultinomialHMM
from best_model_selection import *

from dateutil.relativedelta import relativedelta
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
from itertools import chain, combinations

def powerset(iterable):
    s = list(iterable)
    return chain.from_iterable(combinations(s, r) for r in range(3, len(s)+1))

In [3]:
## GLOBAL VARS
## GLOBAL VARS
R_DF_COL_NAMES = ['mean_static', 'sd_static', 
                  'mean_dynamic', 'sd_dynamic',
                  'mean_static_gmm', 'sd_static_gmm', 
                  'mean_dynamic_gmm', 'sd_dynamic_gmm', 
                  'mean_static_bgm', 'sd_static_bgm', 
                  'mean_dynamic_bgm', 'sd_dynamic_bgm', 
                  'mean_static_ghmm', 'sd_static_ghmm', 
                  'mean_dynamic_ghmm', 'sd_dynamic_ghmm', 
                  'from_year']
DATE_TIME = 'date_time'
DATE = 'date'
TIME = 'time'
OPEN_PRICE = 'open'
HIGH_PRICE = 'high'
LOW_PRICE = 'low'
CLOSE_PRICE = 'close'
VOLUME = 'volume'
TURNOVER = 'turnover'
VWAP = 'vwap'
FEATURES = ['high_low_spread', "open_close_rets", "log_total_traded_vol", "daily_log_return", "short_term_vol", "long_term_vol", "money_flow_index"]

## Functions

In [4]:
def load_data(file):
    filepath = os.path.join(DATAPATH, file)
    df = pd.read_csv("file:///" + filepath, parse_dates=[['<DTYYYYMMDD>', '<TIME>']])

    return df

def format_data(df):
    returned_df = df.copy()
    returned_df.drop(labels=["<PER>", "<OPENINT>"], axis="columns", inplace=True)
    returned_df.columns = ['date_time', 'ticker', 'open', 'high', 'low', 'close', 'volume']
    returned_df.set_index('date_time', drop=True, inplace=True)
    returned_df = add_datetime(returned_df)

    return returned_df

def get_open_close_time(df):
    open_time = df.time.min()
    close_time = df.time.max()
    
    return open_time, close_time

def get_open_close_auction_time(df):
    open_time = df.time.min()
    auction_time = df.time.max()
    close_time = df[df[TIME] != df.time.max()].time.max()
    
    return open_time, close_time, auction_time

def get_intraday_data(df, start_time, end_time):
    df = df.between_time(open_time, end_time)
    df[TURNOVER] = df[VOLUME]*df[CLOSE_PRICE]
    df_resampled = df.resample('5T').agg({OPEN_PRICE: "first", 
                                          LOW_PRICE: "min", 
                                          HIGH_PRICE: "max", 
                                          CLOSE_PRICE: "last", 
                                          VOLUME: "sum", 
                                          TURNOVER: "sum"})
    df_resampled[VWAP] = df_resampled[TURNOVER]/df_resampled[VOLUME]
    df_resampled = df_resampled[(df_resampled.index.weekday != 5) | (df_resampled.index.weekday != 6)]
#     open_resampled = df_resampled.index.time.min()
#     close_resmapled = df_resampled.index.time.max()
    
#     print(open_resampled)
#     print(close_resampled)
#     df_resampled = df_resampled.between_time(open_resampled, close_resampled)
#     df_resampled.fillna(method='ffill', inplace=True)
    df_resampled.dropna(inplace=True)

    return df_resampled

def get_daily_data(df):
    daily_data = df.groupby(df.index.date).agg({OPEN_PRICE: "first", 
                                                LOW_PRICE: "min",
                                                HIGH_PRICE: "max", 
                                                CLOSE_PRICE: "last", 
                                                VOLUME: "sum", 
                                                TURNOVER: "sum"})
    daily_data[VWAP] = daily_data.turnover/daily_data.volume
    daily_data = daily_data[daily_data[VOLUME] > 0.1]
    daily_data = daily_data[(daily_data.index != 6) & (daily_data.index != 7)]
    daily_data.index = pd.to_datetime(daily_data.index)
    daily_data = daily_data.resample('B').first()
    daily_data.dropna(inplace=True)
    
    
    return daily_data

def split_train_test_data(df, size_in_years):
    training_data = df[df.index[0]:df.index[-1] - relativedelta(years=size_in_years, hours=-9, minutes = -5)]
    test_data = df[df.index[-1] - relativedelta(years=size_in_years, hours=-9):]

    return training_data, test_data

def get_weekdays_data_dict(df):
    weekdays_data_dict = {}
    weekdays_data_dict[0] = df[df.index.weekday == 0]
    weekdays_data_dict[1] = df[df.index.weekday == 1]
    weekdays_data_dict[2] = df[df.index.weekday == 2]
    weekdays_data_dict[3] = df[df.index.weekday == 3]
    weekdays_data_dict[4] = df[df.index.weekday == 4]
    weekdays_data_dict['else'] = df

    return weekdays_data_dict

def get_static_vol_predictor_by_weekday(data, weekdays_data_dict):
    static_vol_predictor = {}
    static_vol_predictor[0] = get_norm_static_vol_predictor(weekdays_data_dict.get(0))
    static_vol_predictor[1] = get_norm_static_vol_predictor(weekdays_data_dict.get(1))
    static_vol_predictor[2] = get_norm_static_vol_predictor(weekdays_data_dict.get(2))
    static_vol_predictor[3] = get_norm_static_vol_predictor(weekdays_data_dict.get(3))
    static_vol_predictor[4] = get_norm_static_vol_predictor(weekdays_data_dict.get(4))
    static_vol_predictor['else'] = get_norm_static_vol_predictor(data)

    return static_vol_predictor

def get_adv_median_by_weekday(data, weekdays_data_dict):
    adv_median = {}
    adv_median[0] = get_adv_median(weekdays_data_dict.get(0))
    adv_median[1] = get_adv_median(weekdays_data_dict.get(1))
    adv_median[2] = get_adv_median(weekdays_data_dict.get(2))
    adv_median[3] = get_adv_median(weekdays_data_dict.get(3))
    adv_median[4] = get_adv_median(weekdays_data_dict.get(4))
    adv_median['else'] = get_adv_median(data)

    return adv_median
  
def get_data_by_weekday(df, weekday):
    df_weekday = df[df.index.weekday == weekday]

    return df_weekday

def add_datetime(df):
    returned_df = df.copy()
    returned_df['date'] = pd.to_datetime(returned_df.index.date)
    returned_df['time'] = pd.to_datetime(returned_df.index, format = "%m-%d-%Y %H:%M:%S")
    returned_df['time'] = returned_df['time'].apply(lambda x: x.strftime('%H:%M:%S'))

    return returned_df

def get_norm_static_vol_predictor(df):
    static_volume_predictor = df.groupby(by=df.index.time)[VOLUME].median()
    norm_static_volume_predictor = static_volume_predictor/sum(static_volume_predictor)

    return norm_static_volume_predictor

def get_reversed_cum_vol(data):
    reversed_cumvol = []
    for day in np.unique(data.index.date):
        reversed_cumvol.append(data[data.index.date == day][VOLUME].cumsum().values[::-1])

    reversed_cumvol = pd.Series(data = np.concatenate(reversed_cumvol), index = data.index)

    return reversed_cumvol

def get_adv_median(df):
    return df['volume'].groupby(df.index.date).sum().median()

def get_adv_mean(df):
    return df['volume'].groupby(df.index.date).sum().median()

def get_daily_VWAP(df):
    return df[TURNOVER].groupby(df.index.date).sum()/df[VOLUME].groupby(df.index.date).sum()
  
def get_vol_predictor_next_bin(data, static_vol_predictor, adv_median):
    volume_predictor_next_interval = data.groupby(data.index.date)[VOLUME].shift(1)
    volume_predictor_next_interval.fillna(int (adv_median.get('else')*static_vol_predictor.get('else').iloc[0]), inplace=True)

    return volume_predictor_next_interval
  
def vwap_static_execution_algo(data, norm_static_vol_predictor, amount_shares, order_side, start_time, end_time, day):
    if day.weekday() == 0:
        vol_predictor = norm_static_vol_predictor.get(0)[start_time:end_time]
    elif day.weekday() == 1:
        vol_predictor = norm_static_vol_predictor.get(1)[start_time:end_time]
    elif day.weekday() == 2:
        vol_predictor = norm_static_vol_predictor.get(2)[start_time:end_time]
    elif day.weekday() == 3:
        vol_predictor = norm_static_vol_predictor.get(3)[start_time:end_time]
    elif day.weekday() == 4:
        vol_predictor = norm_static_vol_predictor.get(4)[start_time:end_time]
    else:
        vol_predictor = norm_static_vol_predictor.get('else')[start_time:end_time]
        
    shares_per_interval = vol_predictor*amount_shares
    
    data['time'] = data.index.time
    data = data.reset_index()
    data = data.set_index(data.time)
    exec_price = (data[HIGH_PRICE]+data[LOW_PRICE])/2
    
    vwap_this_exec_this_day = sum(shares_per_interval*exec_price)/sum(shares_per_interval)
#     vwap_this_exec_this_day = sum(shares_per_interval*data[VWAP])/sum(shares_per_interval)

    return vwap_this_exec_this_day

def vwap_dynamic_execution_algo(data, reversed_cumvol, static_vol_predictor, volume_predictor_next_interval, amount_shares, order_side, start_time, end_time, day):
    shares_per_interval = []
    if day.weekday() == 0:
        shares_per_interval.append(static_vol_predictor.get(0).iloc[0]*amount_shares)
        vol_predictor = reversed_cumvol[reversed_cumvol.index.weekday == 0].groupby(reversed_cumvol[reversed_cumvol.index.weekday == 0].index.time).median()
    elif day.weekday() == 1:
        shares_per_interval.append(static_vol_predictor.get(1).iloc[0]*amount_shares)
        vol_predictor = reversed_cumvol[reversed_cumvol.index.weekday == 1].groupby(reversed_cumvol[reversed_cumvol.index.weekday == 1].index.time).median()
    elif day.weekday() == 2:
        shares_per_interval.append(static_vol_predictor.get(2).iloc[0]*amount_shares)
        vol_predictor = reversed_cumvol[reversed_cumvol.index.weekday == 2].groupby(reversed_cumvol[reversed_cumvol.index.weekday == 2].index.time).median()
    elif day.weekday() == 3:
        shares_per_interval.append(static_vol_predictor.get(3).iloc[0]*amount_shares)
        vol_predictor = reversed_cumvol[reversed_cumvol.index.weekday == 3].groupby(reversed_cumvol[reversed_cumvol.index.weekday == 3].index.time).median()
    elif day.weekday() == 4:
        shares_per_interval.append(static_vol_predictor.get(4).iloc[0]*amount_shares)
        vol_predictor = reversed_cumvol[reversed_cumvol.index.weekday == 4].groupby(reversed_cumvol[reversed_cumvol.index.weekday == 4].index.time).median()
    else:
        shares_per_interval.append(static_vol_predictor.get('else').iloc[0]*amount_shares)
        vol_predictor = reversed_cumvol.groupby(reversed_cumvol.index.time).median()

    for i in range(1, len(data)):
        num = data[VOLUME].iloc[:i].sum()+volume_predictor_next_interval[volume_predictor_next_interval.index.date == day].iloc[i]
        denom = data[VOLUME].iloc[:i].sum()+vol_predictor[i]
        op = amount_shares*(num/denom)
        shares_next_interval = op - sum(shares_per_interval)
        #     shares_next_interval = (amount_shares*((data['volume'].iloc[0:i].sum()+volume_predictor_next_interval.xs(day, level='Date').iloc[i])/(data['volume'].iloc[0:i].sum()+volPredictor[i])))-sum(shares_per_interval)
        shares_per_interval.append(shares_next_interval)

    exec_price = (data[HIGH_PRICE]+data[LOW_PRICE])/2
    
    vwap_this_exec_this_day = sum(shares_per_interval*exec_price)/sum(shares_per_interval)
#     vwap_this_exec_this_day = sum(shares_per_interval*data[VWAP])/sum(shares_per_interval)

    return vwap_this_exec_this_day

def backtest_algo_static(training_data, test_data, adv_median, norm_static_vol_predictor):
    
    backtest_sell_vwap = []
    backtest_buy_vwap = []

    for day in np.unique(test_data.index.date):
        data = test_data[test_data.index.date == day]

        if day.weekday() == 0:
            amount_shares = adv_median.get(0)*0.1
        elif day.weekday() == 1:
            amount_shares = adv_median.get(1)*0.1
        elif day.weekday() == 2:
            amount_shares = adv_median.get(2)*0.1
        elif day.weekday() == 3:
            amount_shares = adv_median.get(3)*0.1
        elif day.weekday() == 4:
            amount_shares = adv_median.get(4)*0.1
        else:
            amount_shares = adv_median.get('else')*0.1
            
        sell_exec = vwap_static_execution_algo(data, norm_static_vol_predictor, amount_shares, 'sell', data.index[0].time(), data.index[-1].time(), day)
        buy_exec = vwap_static_execution_algo(data, norm_static_vol_predictor, amount_shares, 'buy', data.index[0].time(), data.index[-1].time(), day)
        
        backtest_sell_vwap.append(sell_exec)
        backtest_buy_vwap.append(buy_exec)

    return backtest_buy_vwap, backtest_sell_vwap

def backtest_algo_dynamic(train_data, test_data, adv_median, static_vol_predictor):
    reversed_cum_vol = get_reversed_cum_vol(train_data)
    vol_predictor_next_bin = get_vol_predictor_next_bin(test_data, static_vol_predictor, adv_median)

    backtest_sell_vwap_dynamic = []
    backtest_buy_vwap_dynamic = []
    for day in np.unique(test_data.index.date):
        data = test_data[test_data.index.date == day]
        if day.weekday() == 0:
            amount_shares = adv_median.get(0)*0.1
        elif day.weekday() == 1:
            amount_shares = adv_median.get(1)*0.1
        elif day.weekday() == 2:
            amount_shares = adv_median.get(2)*0.1
        elif day.weekday() == 3:
            amount_shares = adv_median.get(3)*0.1
        elif day.weekday() == 4:
            amount_shares = adv_median.get(4)*0.1
        else:
            amount_shares = adv_median.get('else')*0.1
            
        backtest_sell_vwap_dynamic.append(vwap_dynamic_execution_algo(data, 
                                                                      reversed_cum_vol,
                                                                      static_vol_predictor,
                                                                      vol_predictor_next_bin,
                                                                      amount_shares,
                                                                      'sell', 
                                                                      data.index[0].time(),
                                                                      data.index[-1].time(),
                                                                      day))
        backtest_buy_vwap_dynamic.append(vwap_dynamic_execution_algo(data, 
                                                                     reversed_cum_vol,
                                                                     static_vol_predictor,
                                                                     vol_predictor_next_bin,
                                                                     amount_shares,
                                                                     'buy',
                                                                     data.index[0].time(),
                                                                     data.index[-1].time(),
                                                                     day))

    return backtest_buy_vwap_dynamic, backtest_sell_vwap_dynamic
  
def get_algo_performance(train_intraday_data, test_intraday_data, train_daily_data, test_daily_data, dynamic_flag):
    daily_vwap = test_daily_data[VWAP]
    weekdays_data_dict = get_weekdays_data_dict(train_intraday_data)
    static_vol_predictor = get_static_vol_predictor_by_weekday(train_intraday_data, weekdays_data_dict)
    adv_median = get_adv_median_by_weekday(train_intraday_data, weekdays_data_dict)

    if dynamic_flag:
        backtest_buy_vwap, backtest_sell_vwap = backtest_algo_dynamic(train_intraday_data, test_intraday_data, adv_median, static_vol_predictor)
    else:
        backtest_buy_vwap, backtest_sell_vwap = backtest_algo_static(train_intraday_data, test_intraday_data, adv_median, static_vol_predictor)

    static_vwap_comparison = pd.DataFrame(data=daily_vwap.values.tolist(), index=daily_vwap.index, columns=['market_vwap'])
    static_vwap_comparison['backtest_buy_vwap'] = backtest_buy_vwap
    static_vwap_comparison['backtest_sell_vwap'] = backtest_sell_vwap

    static_vwap_comparison['diff_vwap_bps_buy'] = 1e4*(static_vwap_comparison['backtest_buy_vwap']-static_vwap_comparison['market_vwap'])/static_vwap_comparison['market_vwap']
    static_vwap_comparison['diff_vwap_bps_sell'] = -1e4*(static_vwap_comparison['backtest_sell_vwap']-static_vwap_comparison['market_vwap'])/static_vwap_comparison['market_vwap']

    mean_bps_diff_buys = static_vwap_comparison['diff_vwap_bps_buy'].mean()
    sd_bps_diff_buys = static_vwap_comparison['diff_vwap_bps_buy'].std()
    mean_bps_diff_sells = static_vwap_comparison['diff_vwap_bps_sell'].mean()
    sd_bps_diff_sells = static_vwap_comparison['diff_vwap_bps_sell'].std()

    percentiles_diff_vwap_sells = {'1': static_vwap_comparison['diff_vwap_bps_sell'].quantile(0.01),
                               '5': static_vwap_comparison['diff_vwap_bps_sell'].quantile(0.05),
                               '95': static_vwap_comparison['diff_vwap_bps_sell'].quantile(0.95),
                               '99': static_vwap_comparison['diff_vwap_bps_sell'].quantile(0.99)}
    percentiles_diff_vwap_buys = {'1': static_vwap_comparison['diff_vwap_bps_buy'].quantile(0.01),
                               '5': static_vwap_comparison['diff_vwap_bps_buy'].quantile(0.05),
                               '95': static_vwap_comparison['diff_vwap_bps_buy'].quantile(0.95),
                               '99': static_vwap_comparison['diff_vwap_bps_buy'].quantile(0.99)}

    return mean_bps_diff_buys, sd_bps_diff_buys, mean_bps_diff_sells, sd_bps_diff_sells, percentiles_diff_vwap_buys, percentiles_diff_vwap_sells

def backtest_algo_static_by_regime(training_data, test_data, adv_median, static_vol_predictor):
    backtest_sell_vwap = []
    backtest_buy_vwap = []

    for day in np.unique(test_data.index.date):
        data = test_data[test_data.index.date == day]
        regime = data['regime'].iloc[0]
        
        if day.weekday() == 0:
            amount_shares = adv_median.get(regime).get(0)*0.1
        elif day.weekday() == 1:
            amount_shares = adv_median.get(regime).get(1)*0.1
        elif day.weekday() == 2:
            amount_shares = adv_median.get(regime).get(2)*0.1
        elif day.weekday() == 3:
            amount_shares = adv_median.get(regime).get(3)*0.1
        elif day.weekday() == 4:
            amount_shares = adv_median.get(regime).get(4)*0.1
        else:
            amount_shares = adv_median.get(regime).get('else')*0.1
            
        sell_exec = vwap_static_execution_algo(data,
                                               static_vol_predictor.get(regime),
                                               amount_shares, 'sell',
                                               data.index[0].time(),
                                               data.index[-1].time(), 
                                               day)
        buy_exec = vwap_static_execution_algo(data,
                                              static_vol_predictor.get(regime), 
                                              amount_shares,
                                              'buy',
                                              data.index[0].time(), 
                                              data.index[-1].time(),
                                              day)
        
        backtest_sell_vwap.append(sell_exec)
        backtest_buy_vwap.append(buy_exec)

    return backtest_buy_vwap, backtest_sell_vwap

def backtest_algo_dynamic_by_regime(training_data, test_data, adv_median, static_vol_predictor, regimes):
    regime_reversed_cum_vol = {}
    regime_vol_predictor_next_bin = {}
    for regime in regimes:
        regime_reversed_cum_vol[regime] = get_reversed_cum_vol(training_data[training_data['regime']==regime])
        regime_vol_predictor_next_bin[regime] = get_vol_predictor_next_bin(test_data[test_data['regime']==regime], 
                                                                           static_vol_predictor.get(regime),
                                                                           adv_median.get(regime))

    backtest_sell_vwap_dynamic = []
    backtest_buy_vwap_dynamic = []
    for day in np.unique(test_data.index.date):
        data = test_data[test_data.index.date == day]
        regime = data['regime'].iloc[0]
        if day.weekday() == 0:
            amount_shares = adv_median.get(regime).get(0)*0.1
        elif day.weekday() == 1:
            amount_shares = adv_median.get(regime).get(1)*0.1
        elif day.weekday() == 2:
            amount_shares = adv_median.get(regime).get(2)*0.1
        elif day.weekday() == 3:
            amount_shares = adv_median.get(regime).get(3)*0.1
        elif day.weekday() == 4:
            amount_shares = adv_median.get(regime).get(4)*0.1
        else:
            amount_shares = adv_median.get(regime).get('else')*0.1
        
        backtest_sell_vwap_dynamic.append(vwap_dynamic_execution_algo(data, 
                                                                      regime_reversed_cum_vol.get(regime),
                                                                      static_vol_predictor.get(regime),
                                                                      regime_vol_predictor_next_bin.get(regime),
                                                                      amount_shares,
                                                                      'sell', 
                                                                      data.index[0].time(),
                                                                      data.index[-1].time(),
                                                                      day))
        backtest_buy_vwap_dynamic.append(vwap_dynamic_execution_algo(data, 
                                                                     regime_reversed_cum_vol.get(regime),
                                                                     static_vol_predictor.get(regime),
                                                                     regime_vol_predictor_next_bin.get(regime),
                                                                     amount_shares,
                                                                     'buy',
                                                                     data.index[0].time(),
                                                                     data.index[-1].time(),
                                                                     day))
            
        

    return backtest_buy_vwap_dynamic, backtest_sell_vwap_dynamic

def get_algo_performance_by_regime(training_data, test_data, vwap_and_regime, dynamic_flag, regimes):
    regime_daily_vwap = {}
    regime_weekdays_data_dict = {}
    regime_static_vol_predictor = {}
    regime_adv_median = {}
    daily_vwap = vwap_and_regime[VWAP]
    for regime in regimes:
        regime_daily_vwap[regime] = vwap_and_regime[vwap_and_regime['regime'] == regime][VWAP]
        regime_weekdays_data_dict[regime] = get_weekdays_data_dict(training_data[training_data['regime'] == regime])
        regime_static_vol_predictor[regime] = get_static_vol_predictor_by_weekday(training_data[training_data['regime'] == regime],
                                                                                  regime_weekdays_data_dict[regime])
        regime_adv_median[regime] = get_adv_median_by_weekday(training_data[training_data['regime'] == regime], 
                                                              regime_weekdays_data_dict[regime])

    if dynamic_flag:
        backtest_buy_vwap, backtest_sell_vwap = backtest_algo_dynamic_by_regime(training_data, 
                                                                                test_data,
                                                                                regime_adv_median,
                                                                                regime_static_vol_predictor,
                                                                                regimes)
    else:
        backtest_buy_vwap, backtest_sell_vwap = backtest_algo_static_by_regime(training_data,
                                                                               test_data,
                                                                               regime_adv_median,
                                                                               regime_static_vol_predictor)

    static_vwap_comparison = pd.DataFrame(data=daily_vwap.values.tolist(), index=daily_vwap.index, columns=['market_vwap'])
    static_vwap_comparison['backtest_buy_vwap'] = backtest_buy_vwap
    static_vwap_comparison['backtest_sell_vwap'] = backtest_sell_vwap

    static_vwap_comparison['diff_vwap_bps_buy'] = 1e4*(static_vwap_comparison['backtest_buy_vwap']-static_vwap_comparison['market_vwap'])/static_vwap_comparison['market_vwap']
    static_vwap_comparison['diff_vwap_bps_sell'] = -1e4*(static_vwap_comparison['backtest_sell_vwap']-static_vwap_comparison['market_vwap'])/static_vwap_comparison['market_vwap']

    mean_bps_diff_buys = static_vwap_comparison['diff_vwap_bps_buy'].mean()
    sd_bps_diff_buys = static_vwap_comparison['diff_vwap_bps_buy'].std()
    mean_bps_diff_sells = static_vwap_comparison['diff_vwap_bps_sell'].mean()
    sd_bps_diff_sells = static_vwap_comparison['diff_vwap_bps_sell'].std()

    percentiles_diff_vwap_sells = {'1': static_vwap_comparison['diff_vwap_bps_sell'].quantile(0.01),
                               '5': static_vwap_comparison['diff_vwap_bps_sell'].quantile(0.05),
                               '95': static_vwap_comparison['diff_vwap_bps_sell'].quantile(0.95),
                               '99': static_vwap_comparison['diff_vwap_bps_sell'].quantile(0.99)}
    percentiles_diff_vwap_buys = {'1': static_vwap_comparison['diff_vwap_bps_buy'].quantile(0.01),
                               '5': static_vwap_comparison['diff_vwap_bps_buy'].quantile(0.05),
                               '95': static_vwap_comparison['diff_vwap_bps_buy'].quantile(0.95),
                               '99': static_vwap_comparison['diff_vwap_bps_buy'].quantile(0.99)}

    return mean_bps_diff_buys, sd_bps_diff_buys, mean_bps_diff_sells, sd_bps_diff_sells, percentiles_diff_vwap_buys, percentiles_diff_vwap_sells

def get_log_total_trade_vol(df):
    log_total_traded_vol = get_log(get_total_traded_vol(df))
    log_total_traded_vol.name = "log_total_traded_vol"
    
    return log_total_traded_vol 

def get_total_traded_vol(df):
    total_traded_vol = df.resample('B').first()[[VOLUME]]
    total_traded_vol.name = "total_traded_vol"

    return total_traded_vol

def get_log_open_close_returns(df):
    open_close_returns = np.log(df[OPEN_PRICE]/df[CLOSE_PRICE].shift(-1)).dropna()
    open_close_returns.name = "log_overnight_returns"

    return open_close_returns

def get_log_returns(df):
    close = df[CLOSE_PRICE].resample('B').last()
    log_daily_returns = np.log(close/close.shift(-1))
    log_daily_returns.name = "log_returns"

    return log_daily_returns

def get_high_low_spread(df):
    high_low_spread = (df[HIGH_PRICE]-df[LOW_PRICE])
    high_low_spread.name = "high_low_spread"

    return high_low_spread

def get_log(df):
    return np.log(df)

def get_antilog_adv_median(df):
    antilog_adv = np.exp(get_log(daily_data[VOLUME]).rolling(20, min_periods=5).median().dropna())
    antilog_adv.name = "antilog_adv"
    
    return antilog_adv

def get_money_flow_index(df):
    typical_price = (df[CLOSE_PRICE]+df[HIGH_PRICE]+df[LOW_PRICE])/3
    volume = df[VOLUME]
    money_flow_index = typical_price*volume
    money_flow_index.name = "money_flow_index"

    return money_flow_index

def get_beta_market_impact(df):
    adv_antilog = get_antilog_adv_median(df)
    X = df[VOLUME]/adv_antilog
    Y = ((df[VWAP]-df[OPEN_PRICE])/df[OPEN_PRICE])*1e4
    betas = Y/X
    betas.name = "betas_market_impact"
    
    return betas

def get_features_df(list_of_features):
    features_df = pd.concat(list_of_features, axis=1).dropna()
    features_df = features_df.replace([np.inf, -np.inf], np.nan)
    features_df = features_df.dropna()
    features_df = features_df[features_df != 0.0].shift(1).dropna()
#     features_df.rename(columns={VOLUME: "log_total_traded_vol"}, inplace=True)

    return features_df

def fit_model(model, full_data, train_data, list_of_features, pca_flag=True):
    X = train_data.values
    scaler = MinMaxScaler()
    scaler.fit(X)
    X_scaled = scaler.transform(X)

    if pca_flag:
        pca = PCA(n_components=.95)
        pca.fit(X_scaled)
        X_pca = pca.transform(X_scaled)
        fitted_model = model.fit(X_pca)
        X_full = full_data.values
        X_full_scaled = scaler.transform(X_full)
        X_full_pca = pca.transform(X_full_scaled)
        prediction = fitted_model.predict(X_full_pca)
    else:
        fitted_model = model.fit(X_scaled)
        X_full = full_data.values
        X_full_scaled = scaler.transform(X_full)
        prediction = fitted_model.predict(X_full_scaled)

    return prediction

def get_features_series_list(daily_data):
    log_returns = get_log_returns(daily_data)
    adv_antilog = get_antilog_adv_median(daily_data)
    mfi = get_money_flow_index(daily_data)
    betas_mkt_impact = get_beta_market_impact(daily_data)
    log_traded_vol = get_log_total_trade_vol(daily_data)
    
    features = [log_returns, adv_antilog, mfi, betas_mkt_impact, log_traded_vol]
    
    return features



In [5]:
def get_results_df(ticker, list_of_results):
    arr = np.array(list_of_results).reshape(1,17)
    columns_names = R_DF_COL_NAMES
    r_df = pd.DataFrame(data=arr, columns=columns_names, index=[ticker])
    
    return r_df

def get_full_algo_performance(daily_data, intraday_data, current_ticker, from_year = 1900):
    results_list = []
    year_from = daily_data.index[0].year
    pca_flag=False
    
    train_daily_data, test_daily_data = split_train_test_data(daily_data, 2)
    train_intraday_data, test_intraday_data = split_train_test_data(intraday_data, 2)

    mean_bps_diff_buys, sd_bps_diff_buys, mean_bps_diff_sells, sd_bps_diff_sells, percentiles_diff_vwap_buys, percentiles_diff_vwap_sells = get_algo_performance(train_intraday_data, test_intraday_data, train_daily_data, test_daily_data, dynamic_flag=False)
    results_list.append(np.abs(mean_bps_diff_buys))
    results_list.append(np.abs(sd_bps_diff_buys))

    mean_bps_diff_buys, sd_bps_diff_buys, mean_bps_diff_sells, sd_bps_diff_sells, percentiles_diff_vwap_buys, percentiles_diff_vwap_sells = get_algo_performance(train_intraday_data, test_intraday_data, train_daily_data, test_daily_data, dynamic_flag=True)
    results_list.append(np.abs(mean_bps_diff_buys))
    results_list.append(np.abs(sd_bps_diff_buys))

    features_list = get_features_series_list(daily_data)
    features_df = get_features_df(features_list)

    best_gmm, features_gmm, scores_df = get_best_gmm(features_list, pca_flag=pca_flag)
    best_bgm, features_bgm, scores_df = get_best_bgm(features_list, pca_flag=pca_flag)
    best_ghmm, features_ghmm, scores_df = get_best_ghmm(features_list, pca_flag=pca_flag)
    
    ##test
#     features_to_use = ["log_total_traded_vol", "money_flow_index"]
#     gmm = GaussianMixture(n_components=3, covariance_type='full', max_iter=1000, n_init=100, random_state=3)
    ##test

    train_features_df, test_features_df = split_train_test_data(features_df[features_gmm], 2)

    model_prediction = fit_model(best_gmm, features_df[features_gmm], train_features_df, features_gmm, pca_flag=pca_flag)
    regime = pd.Series(data = model_prediction, index = features_df.index).rename("regime")

    intraday_date = intraday_data.copy()
    intraday_date['date'] = pd.to_datetime(intraday_date.index.date)
    regime_intraday = pd.merge(intraday_date, regime, right_index=True, how='left', left_on='date').dropna()
    regime_intraday = regime_intraday.astype({"regime":"int32"})

    regime_daily = pd.merge(daily_data, regime, left_index=True, right_index=True)
    regime_daily = regime_daily.astype({"regime":"int32"})

    intraday_regime_train, intraday_regime_test = split_train_test_data(regime_intraday, 2)
    daily_regime_train, daily_regime_test = split_train_test_data(regime_daily, 2)

    mean_bps_diff_buys, sd_bps_diff_buys, mean_bps_diff_sells, sd_bps_diff_sells, percentiles_diff_vwap_buys, percentiles_diff_vwap_sells = get_algo_performance_by_regime(intraday_regime_train, intraday_regime_test, daily_regime_test[[VWAP, "regime"]], dynamic_flag=False, regimes=regime_intraday['regime'].unique())
    results_list.append(np.abs(mean_bps_diff_buys))
    results_list.append(np.abs(sd_bps_diff_buys))

    mean_bps_diff_buys, sd_bps_diff_buys, mean_bps_diff_sells, sd_bps_diff_sells, percentiles_diff_vwap_buys, percentiles_diff_vwap_sells = get_algo_performance_by_regime(intraday_regime_train, intraday_regime_test, daily_regime_test[[VWAP, "regime"]], dynamic_flag=True, regimes=regime_intraday['regime'].unique())
    results_list.append(np.abs(mean_bps_diff_buys))
    results_list.append(np.abs(sd_bps_diff_buys))
    
    model_prediction = fit_model(best_bgm, features_df[features_bgm], train_features_df, features_bgm, pca_flag=pca_flag)
    regime = pd.Series(data = model_prediction, index = features_df.index).rename("regime")
    regime = regime.resample("5T").asfreq().fillna(method="ffill")

    regime_intraday = pd.merge(intraday_date, regime, right_index=True, how='left', left_on='date').dropna()
    regime_intraday = regime_intraday.astype({"regime":"int32"})

    regime_daily = pd.merge(daily_data, regime, left_index=True, right_index=True)
    regime_daily = regime_daily.astype({"regime":"int32"})

    intraday_regime_train, intraday_regime_test = split_train_test_data(regime_intraday, 2)
    daily_regime_train, daily_regime_test = split_train_test_data(regime_daily, 2)

    mean_bps_diff_buys, sd_bps_diff_buys, mean_bps_diff_sells, sd_bps_diff_sells, percentiles_diff_vwap_buys, percentiles_diff_vwap_sells = get_algo_performance_by_regime(intraday_regime_train, intraday_regime_test, daily_regime_test[[VWAP, "regime"]], dynamic_flag=False, regimes=regime_intraday['regime'].unique())
    results_list.append(np.abs(mean_bps_diff_buys))
    results_list.append(np.abs(sd_bps_diff_buys))

    mean_bps_diff_buys, sd_bps_diff_buys, mean_bps_diff_sells, sd_bps_diff_sells, percentiles_diff_vwap_buys, percentiles_diff_vwap_sells = get_algo_performance_by_regime(intraday_regime_train, intraday_regime_test, daily_regime_test[[VWAP, "regime"]], dynamic_flag=True, regimes=regime_intraday['regime'].unique())
    results_list.append(np.abs(mean_bps_diff_buys))
    results_list.append(np.abs(sd_bps_diff_buys))
    
    model_prediction = fit_model(best_ghmm, features_df[features_ghmm], train_features_df, features_ghmm, pca_flag=pca_flag)
    regime = pd.Series(data = model_prediction, index = features_df.index).rename("regime")
    regime = regime.resample("5T").asfreq().fillna(method="ffill")

    regime_intraday = pd.merge(intraday_date, regime, right_index=True, how='left', left_on='date').dropna()
    regime_intraday = regime_intraday.astype({"regime":"int32"})

    regime_daily = pd.merge(daily_data, regime, left_index=True, right_index=True)
    regime_daily = regime_daily.astype({"regime":"int32"})

    intraday_regime_train, intraday_regime_test = split_train_test_data(regime_intraday, 2)
    daily_regime_train, daily_regime_test = split_train_test_data(regime_daily, 2)

    mean_bps_diff_buys, sd_bps_diff_buys, mean_bps_diff_sells, sd_bps_diff_sells, percentiles_diff_vwap_buys, percentiles_diff_vwap_sells = get_algo_performance_by_regime(intraday_regime_train, intraday_regime_test, daily_regime_test[[VWAP, "regime"]], dynamic_flag=False, regimes=regime_intraday['regime'].unique())
    results_list.append(np.abs(mean_bps_diff_buys))
    results_list.append(np.abs(sd_bps_diff_buys))
    
    mean_bps_diff_buys, sd_bps_diff_buys, mean_bps_diff_sells, sd_bps_diff_sells, percentiles_diff_vwap_buys, percentiles_diff_vwap_sells = get_algo_performance_by_regime(intraday_regime_train, intraday_regime_test, daily_regime_test[[VWAP, "regime"]], dynamic_flag=True, regimes=regime_intraday['regime'].unique())
    results_list.append(np.abs(mean_bps_diff_buys))
    results_list.append(np.abs(sd_bps_diff_buys))
    results_list.append(from_year)
    
    results_df = get_results_df(current_ticker, results_list)
    
    return results_df
    
def printFullAlgoPerformance(daily_data, intraday_data, current_ticker, from_year = 1900):
    print("Backtesting {}. Using data from {}\n".format(current_ticker, daily_data.index[0].year))
    train_daily_data, test_daily_data = split_train_test_data(daily_data, 2)
    train_intraday_data, test_intraday_data = split_train_test_data(intraday_data, 2)

#     print("Backtesting performance with static predictor...\n")
#     mean_bps_diff_buys, sd_bps_diff_buys, mean_bps_diff_sells, sd_bps_diff_sells, percentiles_diff_vwap_buys, percentiles_diff_vwap_sells = get_algo_performance(train_intraday_data, test_intraday_data, train_daily_data, test_daily_data, dynamic_flag=False)
#     print("The performance of the algorithm using static predictor on " + current_ticker + " is")
#     print("Mean: ±%f\nStandard Dev: ±%f" % (np.abs(mean_bps_diff_buys), sd_bps_diff_buys))

#     print("-"*80)

#     print("Backtesting performance with dynamic predictor...\n")
#     mean_bps_diff_buys, sd_bps_diff_buys, mean_bps_diff_sells, sd_bps_diff_sells, percentiles_diff_vwap_buys, percentiles_diff_vwap_sells = get_algo_performance(train_intraday_data, test_intraday_data, train_daily_data, test_daily_data, dynamic_flag=True)
#     print("The performance of the algorithm using dynamic predictor on " + current_ticker + " is")
#     print("Mean: ±%f\nStandard Dev: ±%f" % (np.abs(mean_bps_diff_buys), sd_bps_diff_buys))

#     print("-"*80)

    features_list = get_features_series_list(daily_data)
    features_df = get_features_df(features_list)

    best_gmm, features_gmm = get_best_gmm(features_list)
    best_bgm, features_bgm = get_best_bgm(features_list)
    best_ghmm, features_ghmm = get_best_ghmm(features_list)
    
    ##test
    features_to_use = ["log_total_traded_vol", "money_flow_index"]
    gmm = GaussianMixture(n_components=3, covariance_type='full', max_iter=1000, n_init=100, random_state=3)
    ##test

    train_features_df, test_features_df = split_train_test_data(features_df[features_to_use], 2)
    
    print(train_features_df)
    print(test_features_df)

    model_prediction = fit_model(gmm, features_df[features_to_use], train_features_df, features_to_use)
    regime = pd.Series(data = model_prediction, index = features_df.index).rename("regime")
#     regime = regime.resample("5T").asfreq().fillna(method="ffill")

    intraday_date = intraday_data.copy()
    intraday_date['date'] = pd.to_datetime(intraday_date.index.date)
    regime_intraday = pd.merge(intraday_date, regime, right_index=True, how='left', left_on='date').dropna()
    regime_intraday = regime_intraday.astype({"regime":"int32"})

    regime_daily = pd.merge(daily_data, regime, left_index=True, right_index=True)
    regime_daily = regime_daily.astype({"regime":"int32"})

    intraday_regime_train, intraday_regime_test = split_train_test_data(regime_intraday, 2)
    daily_regime_train, daily_regime_test = split_train_test_data(regime_daily, 2)

    #     print("Model used: GMM, features used: [{}]\n".format(", ".join(features_gmm)))
    print("Backtesting performance of static predictor using segmented data...\n")
    mean_bps_diff_buys, sd_bps_diff_buys, mean_bps_diff_sells, sd_bps_diff_sells, percentiles_diff_vwap_buys, percentiles_diff_vwap_sells = get_algo_performance_by_regime(intraday_regime_train, intraday_regime_test, daily_regime_test[[VWAP, "regime"]], dynamic_flag=False, regimes=regime_intraday['regime'].unique())
    print("The performance of the algorithm using static predictor with segmented data on " + current_ticker + " is")
    print("Mean: ±%f\nStandard Dev: ±%f" % (np.abs(mean_bps_diff_buys), sd_bps_diff_buys))
    print("-"*80)

    print("Backtesting performance of dynamic predictor using segmented data ...\n")
    mean_bps_diff_buys, sd_bps_diff_buys, mean_bps_diff_sells, sd_bps_diff_sells, percentiles_diff_vwap_buys, percentiles_diff_vwap_sells = get_algo_performance_by_regime(intraday_regime_train, intraday_regime_test, daily_regime_test[[VWAP, "regime"]], dynamic_flag=True, regimes=regime_intraday['regime'].unique())
    print("The performance of the algorithm using dynamic predictor with segmented data on " + current_ticker + " is")
    print("Mean: ±%f\nStandard Dev: ±%f" % (np.abs(mean_bps_diff_buys), sd_bps_diff_buys))
    print("-"*80)
    
#     train_features_df, test_features_df = split_train_test_data(features_df[["volume", "betas_market_impact", "money_flow_index"]], 2)

#     model_prediction = fit_model(best_bgm, features_df[["volume", "betas_market_impact", "money_flow_index"]], train_features_df, features_bgm)
    model_prediction = fit_model(best_bgm, features_df[features_to_use], train_features_df, features_to_use)
    regime = pd.Series(data = model_prediction, index = features_df.index).rename("regime")
    regime = regime.resample("5T").asfreq().fillna(method="ffill")

    regime_intraday = pd.merge(intraday_date, regime, right_index=True, how='left', left_on='date').dropna()
    regime_intraday = regime_intraday.astype({"regime":"int32"})

    regime_daily = pd.merge(daily_data, regime, left_index=True, right_index=True)
    regime_daily = regime_daily.astype({"regime":"int32"})

    intraday_regime_train, intraday_regime_test = split_train_test_data(regime_intraday, 2)
    daily_regime_train, daily_regime_test = split_train_test_data(regime_daily, 2)

    #     print("Model used: GMM, features used: [{}]\n".format(", ".join(features_gmm)))
    print("Backtesting performance of static predictor using segmented data...\n")
    mean_bps_diff_buys, sd_bps_diff_buys, mean_bps_diff_sells, sd_bps_diff_sells, percentiles_diff_vwap_buys, percentiles_diff_vwap_sells = get_algo_performance_by_regime(intraday_regime_train, intraday_regime_test, daily_regime_test[[VWAP, "regime"]], dynamic_flag=False, regimes=regime_intraday['regime'].unique())
    print("The performance of the algorithm using static predictor with segmented data on " + current_ticker + " is")
    print("Mean: ±%f\nStandard Dev: ±%f" % (np.abs(mean_bps_diff_buys), sd_bps_diff_buys))
    print("-"*80)

    print("Backtesting performance of dynamic predictor using segmented data ...\n")
    mean_bps_diff_buys, sd_bps_diff_buys, mean_bps_diff_sells, sd_bps_diff_sells, percentiles_diff_vwap_buys, percentiles_diff_vwap_sells = get_algo_performance_by_regime(intraday_regime_train, intraday_regime_test, daily_regime_test[[VWAP, "regime"]], dynamic_flag=True, regimes=regime_intraday['regime'].unique())
    print("The performance of the algorithm using dynamic predictor with segmented data on " + current_ticker + " is")
    print("Mean: ±%f\nStandard Dev: ±%f" % (np.abs(mean_bps_diff_buys), sd_bps_diff_buys))
    print("-"*80)
    
#     train_features_df, test_features_df = split_train_test_data(features_df[["volume", "betas_market_impact", "money_flow_index"]], 2)
    
#     model_prediction = fit_model(best_ghmm, features_df[["volume", "betas_market_impact", "money_flow_index"]], train_features_df, features_ghmm)
    model_prediction = fit_model(best_ghmm, features_df[features_to_use], train_features_df, features_to_use)
    regime = pd.Series(data = model_prediction, index = features_df.index).rename("regime")
    regime = regime.resample("5T").asfreq().fillna(method="ffill")

    regime_intraday = pd.merge(intraday_date, regime, right_index=True, how='left', left_on='date').dropna()
    regime_intraday = regime_intraday.astype({"regime":"int32"})

    regime_daily = pd.merge(daily_data, regime, left_index=True, right_index=True)
    regime_daily = regime_daily.astype({"regime":"int32"})

    intraday_regime_train, intraday_regime_test = split_train_test_data(regime_intraday, 2)
    daily_regime_train, daily_regime_test = split_train_test_data(regime_daily, 2)

    #     print("Model used: GMM, features used: [{}]\n".format(", ".join(features_gmm)))
    print("Backtesting performance of static predictor using segmented data...\n")
    mean_bps_diff_buys, sd_bps_diff_buys, mean_bps_diff_sells, sd_bps_diff_sells, percentiles_diff_vwap_buys, percentiles_diff_vwap_sells = get_algo_performance_by_regime(intraday_regime_train, intraday_regime_test, daily_regime_test[[VWAP, "regime"]], dynamic_flag=False, regimes=regime_intraday['regime'].unique())
    print("The performance of the algorithm using static predictor with segmented data on " + current_ticker + " is")
    print("Mean: ±%f\nStandard Dev: ±%f" % (np.abs(mean_bps_diff_buys), sd_bps_diff_buys))
    print("-"*80)

    print("Backtesting performance of dynamic predictor using segmented data ...\n")
    mean_bps_diff_buys, sd_bps_diff_buys, mean_bps_diff_sells, sd_bps_diff_sells, percentiles_diff_vwap_buys, percentiles_diff_vwap_sells = get_algo_performance_by_regime(intraday_regime_train, intraday_regime_test, daily_regime_test[[VWAP, "regime"]], dynamic_flag=True, regimes=regime_intraday['regime'].unique())
    print("The performance of the algorithm using dynamic predictor with segmented data on " + current_ticker + " is")
    print("Mean: ±%f\nStandard Dev: ±%f" % (np.abs(mean_bps_diff_buys), sd_bps_diff_buys))
    print("-"*80)

In [6]:
warnings.filterwarnings("ignore")
DATAPATH = os.getcwd()+"\\Data\\"
filepath_anon = os.path.join(DATAPATH, "volume_price_2014_18.csv")

In [7]:
files_list = []
for root, dirs, files in os.walk(DATAPATH):
    for file in files:
        files_list.append(file)

### Por item

In [46]:
df[df['date'] == '2020-03-09'].head(50)

Unnamed: 0_level_0,ticker,open,high,low,close,volume,date,time
date_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2020-03-09 09:31:00,AAPL,263.75,266.92,263.0,266.19,2158162,2020-03-09,09:31:00
2020-03-09 09:32:00,AAPL,266.19,267.25,265.16,267.1,304557,2020-03-09,09:32:00
2020-03-09 09:33:00,AAPL,267.12,267.93,266.87,267.84,306729,2020-03-09,09:33:00
2020-03-09 09:34:00,AAPL,267.87,268.13,266.64,267.41,313054,2020-03-09,09:34:00
2020-03-09 09:35:00,AAPL,267.47,267.64,267.19,267.43,46808,2020-03-09,09:35:00
2020-03-09 09:50:00,AAPL,267.45,269.3,265.01,269.25,536551,2020-03-09,09:50:00
2020-03-09 09:51:00,AAPL,269.16,269.25,264.36,268.13,350914,2020-03-09,09:51:00
2020-03-09 09:52:00,AAPL,268.27,270.44,267.01,267.25,322718,2020-03-09,09:52:00
2020-03-09 09:53:00,AAPL,267.28,269.5,267.08,269.5,274057,2020-03-09,09:53:00
2020-03-09 09:54:00,AAPL,269.5,272.84,269.43,272.58,375866,2020-03-09,09:54:00


In [45]:
intraday_data[intraday_data.index.date == pd.to_datetime('2020-03-09')]

Unnamed: 0_level_0,open,low,high,close,volume,turnover,vwap
date_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2020-03-09 09:30:00,263.75,263.00,268.13,267.41,3082502,8.216964e+08,266.567997
2020-03-09 09:35:00,267.47,267.19,267.64,267.43,46808,1.251786e+07,267.430000
2020-03-09 09:50:00,267.45,264.36,272.84,272.58,1860106,5.011152e+08,269.401437
2020-03-09 09:55:00,272.58,270.79,275.10,271.72,1533456,4.184631e+08,272.888889
2020-03-09 10:00:00,271.64,271.51,274.44,271.86,930362,2.538639e+08,272.865688
...,...,...,...,...,...,...,...
2020-03-09 15:35:00,270.96,270.93,273.80,273.17,825756,2.249774e+08,272.450163
2020-03-09 15:40:00,273.13,272.37,273.61,272.83,699281,1.908920e+08,272.983314
2020-03-09 15:45:00,272.83,271.66,273.65,273.44,805900,2.198859e+08,272.845191
2020-03-09 15:50:00,273.46,270.98,273.70,271.77,1071133,2.914611e+08,272.105388


### Probando con precio medio entre high y low

In [12]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[0]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 3, 'covariance_type': 'diag', 'max_iter': 200} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.6019629065587822
Optimal params are {'n_components': 2, 'covariance_type': 'spherical', 'max_iter': 400, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.6054584896185055
Optimal params are {'n_components': 3, 'covariance_type': 'tied', 'max_iter': 100, 'algorithm': 'map'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.3263155233956592


Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
AAPL,1.396556,11.599807,1.055697,23.878009,1.297945,12.064786,0.380458,22.235763,1.392944,11.675734,0.945408,15.522455,1.346832,12.125675,0.990051,11.328981,1900.0


### Probando con precio medio entre high y low

In [8]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[0]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 3, 'covariance_type': 'diag', 'max_iter': 400} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.601962906558782
Optimal params are {'n_components': 2, 'covariance_type': 'spherical', 'max_iter': 800, 'weight_concentration_prior_type': 'dirichlet_process'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.6054584896185055
Optimal params are {'n_components': 3, 'covariance_type': 'spherical', 'max_iter': 400, 'algorithm': 'viterbi'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.32631552339565945


Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
AAPL,0.172362,1.136649,0.135569,2.404073,0.164319,1.181817,0.07475,2.248926,0.173006,1.145077,0.117962,1.604024,0.17038,1.19833,0.162334,1.307393,1900.0


In [9]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[1]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 3, 'covariance_type': 'spherical', 'max_iter': 200} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.705193036479779
Optimal params are {'n_components': 3, 'covariance_type': 'spherical', 'max_iter': 100, 'weight_concentration_prior_type': 'dirichlet_process'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.705193036479779
Optimal params are {'n_components': 3, 'covariance_type': 'tied', 'max_iter': 800, 'algorithm': 'map'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.25


Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
AIR,0.09129,1.558926,0.040578,2.172817,0.075998,1.710671,0.070776,1.978492,0.069632,1.400107,0.11923,2.503843,0.092909,1.563379,0.034293,2.190229,1900.0


In [10]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[2]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 3, 'covariance_type': 'full', 'max_iter': 400} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.472094960350096
Optimal params are {'n_components': 2, 'covariance_type': 'diag', 'max_iter': 800, 'weight_concentration_prior_type': 'dirichlet_process'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.491493229141817
Optimal params are {'n_components': 3, 'covariance_type': 'tied', 'max_iter': 800, 'algorithm': 'map'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.25


Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
BBVA.MC,0.018221,1.244839,0.025108,0.973098,0.002378,1.235923,0.011129,0.923723,0.011796,1.2342,0.002143,0.973919,0.015895,1.243448,0.041661,0.968049,1900.0


**BOEING**

In [11]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[4]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 2, 'covariance_type': 'tied', 'max_iter': 100} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.3779697166126222
Optimal params are {'n_components': 3, 'covariance_type': 'tied', 'max_iter': 400, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.3779697166126225
Optimal params are {'n_components': 2, 'covariance_type': 'tied', 'max_iter': 100, 'algorithm': 'map'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.3186685105104767


Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
BA,0.065086,1.851205,0.180401,6.280641,0.088409,2.145334,0.177647,5.994589,0.150649,2.187025,0.165323,5.583865,0.089365,1.914903,0.152562,6.917942,1900.0


In [12]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[5]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 3, 'covariance_type': 'diag', 'max_iter': 100} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.502416844499417
Optimal params are {'n_components': 3, 'covariance_type': 'diag', 'max_iter': 200, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.5130444273895015
Optimal params are {'n_components': 2, 'covariance_type': 'tied', 'max_iter': 800, 'algorithm': 'viterbi'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.25196568026517097


Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
CBK,0.062419,2.321645,0.256453,5.24347,0.036933,2.505276,0.145839,2.013812,0.047697,2.368665,0.028387,3.831598,0.033307,2.490096,0.154228,1.98783,1900.0


In [8]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[6]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 2, 'covariance_type': 'spherical', 'max_iter': 200} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.5033747170848477
Optimal params are {'n_components': 2, 'covariance_type': 'spherical', 'max_iter': 800, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.5044741130395229
Optimal params are {'n_components': 3, 'covariance_type': 'spherical', 'max_iter': 100, 'algorithm': 'viterbi'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.4025288896346846


Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
E-DGE,0.044524,0.608996,0.093752,1.714131,0.064285,0.802603,0.089479,1.409895,0.036593,0.633548,0.119952,1.663247,0.047796,0.663211,0.073806,1.556274,1900.0


### A partir de aquí, con ejecución bien de precios

In [14]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[8]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 3, 'covariance_type': 'diag', 'max_iter': 400} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.49153912431388347
Optimal params are {'n_components': 3, 'covariance_type': 'spherical', 'max_iter': 100, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.3344282523355486
Optimal params are {'n_components': 3, 'covariance_type': 'spherical', 'max_iter': 800, 'algorithm': 'viterbi'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.38927660959361965


Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
ENGI,0.442662,8.249448,1.932665,29.826655,0.286264,7.714994,1.929084,27.015995,0.554641,9.036291,1.907891,25.784267,0.571363,9.344834,1.820013,24.541639,1900.0


In [30]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[11]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 3, 'covariance_type': 'spherical', 'max_iter': 400} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.5128728677893343
Optimal params are {'n_components': 3, 'covariance_type': 'spherical', 'max_iter': 100, 'weight_concentration_prior_type': 'dirichlet_process'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.5386410395929189
Optimal params are {'n_components': 2, 'covariance_type': 'tied', 'max_iter': 100, 'algorithm': 'map'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.09268982251767185


Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
GE,0.82966,19.244686,4.557188,50.963703,0.647877,19.731922,3.788462,39.057371,0.688943,20.481339,2.469481,37.526683,0.741453,19.879695,2.654356,35.081509,1900.0


In [31]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[12]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 3, 'covariance_type': 'full', 'max_iter': 200} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.5953031399242293
Optimal params are {'n_components': 2, 'covariance_type': 'full', 'max_iter': 800, 'weight_concentration_prior_type': 'dirichlet_process'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.5953031399242297
Optimal params are {'n_components': 3, 'covariance_type': 'spherical', 'max_iter': 200, 'algorithm': 'viterbi'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of -0.15055811885738063


Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
GILD,0.199523,18.308264,1.196985,25.519445,0.071681,19.533026,0.59028,25.15578,0.114911,18.991733,1.379861,29.45727,0.166583,18.659649,0.766778,18.185382,1900.0


In [32]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[13]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 3, 'covariance_type': 'tied', 'max_iter': 800} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.6251761518396748
Optimal params are {'n_components': 2, 'covariance_type': 'full', 'max_iter': 800, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.5722119123823449
Optimal params are {'n_components': 3, 'covariance_type': 'tied', 'max_iter': 400, 'algorithm': 'viterbi'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.5632817336851466


Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
E-GSK,0.080696,8.800301,0.579514,32.253741,0.098244,9.535633,0.472138,28.867988,0.139984,9.237068,0.451525,29.697345,0.300463,9.499814,0.224447,28.08506,1900.0


In [33]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[14]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 2, 'covariance_type': 'diag', 'max_iter': 400} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.6577157350370492
Optimal params are {'n_components': 2, 'covariance_type': 'diag', 'max_iter': 200, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.6577157350370493
Optimal params are {'n_components': 3, 'covariance_type': 'spherical', 'max_iter': 100, 'algorithm': 'map'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.27383273245205597


Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
IBE.MC,0.641527,7.381247,1.296622,9.758614,0.76301,7.366366,0.525529,8.338283,0.651443,7.360257,0.979542,8.389449,0.57586,7.335337,0.465046,7.822871,1900.0


In [34]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[15]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 3, 'covariance_type': 'diag', 'max_iter': 400} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.7244535094732014
Optimal params are {'n_components': 2, 'covariance_type': 'full', 'max_iter': 400, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.7244535094732014
Optimal params are {'n_components': 3, 'covariance_type': 'spherical', 'max_iter': 800, 'algorithm': 'map'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.4387064482330712


Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
MEL.MC,2.964914,14.428485,4.59661,32.858181,2.589506,14.090406,4.290855,29.248963,4.059364,13.020379,3.70309,28.183428,3.923666,14.293276,3.780634,25.664347,1900.0


In [35]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[16]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 3, 'covariance_type': 'full', 'max_iter': 200} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.5413037698497153
Optimal params are {'n_components': 2, 'covariance_type': 'diag', 'max_iter': 800, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.5433283210662437
Optimal params are {'n_components': 2, 'covariance_type': 'spherical', 'max_iter': 100, 'algorithm': 'map'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.5280337780201557


Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
MSFT,1.657929,8.527853,1.790677,13.158808,1.632105,9.206094,1.448348,15.062327,1.683343,8.98755,1.603884,14.658482,1.693401,8.612301,1.039795,8.810342,1900.0


In [37]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[18]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 2, 'covariance_type': 'diag', 'max_iter': 400} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.5032140390239093
Optimal params are {'n_components': 2, 'covariance_type': 'full', 'max_iter': 200, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.5442147643531762
Optimal params are {'n_components': 3, 'covariance_type': 'full', 'max_iter': 400, 'algorithm': 'viterbi'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.29873091265383916


Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
NOKIA,0.086009,11.5771,0.249896,12.642312,0.12581,11.64478,0.575889,13.196772,0.054513,11.582836,0.30726,13.633442,0.053845,11.563337,0.244171,14.363563,1900.0


In [38]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[19]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 3, 'covariance_type': 'tied', 'max_iter': 800} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.25
Optimal params are {'n_components': 3, 'covariance_type': 'full', 'max_iter': 800, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.2586739452529631
Optimal params are {'n_components': 3, 'covariance_type': 'tied', 'max_iter': 800, 'algorithm': 'map'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.25


Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
REE.MC,0.09871,11.124457,0.56101,11.219609,0.272763,11.109151,0.43036,11.640716,0.173455,11.106795,0.557082,11.523021,0.103369,11.498777,0.512012,12.884346,1900.0


In [9]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[20]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, auction_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 2, 'covariance_type': 'full', 'max_iter': 800} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.47946243016580625
Optimal params are {'n_components': 3, 'covariance_type': 'full', 'max_iter': 200, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.44661696936133477
Optimal params are {'n_components': 3, 'covariance_type': 'tied', 'max_iter': 800, 'algorithm': 'viterbi'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.2675280754776613


Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
GLE,0.418003,14.968742,1.871768,32.362549,0.463511,14.071344,1.778475,34.369286,0.393302,14.390398,1.724783,33.306645,0.501227,15.920049,1.266063,28.013087,1900.0


In [10]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[21]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, auction_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 3, 'covariance_type': 'diag', 'max_iter': 200} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.4550973818373307
Optimal params are {'n_components': 2, 'covariance_type': 'diag', 'max_iter': 200, 'weight_concentration_prior_type': 'dirichlet_process'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.4550973818373307
Optimal params are {'n_components': 2, 'covariance_type': 'diag', 'max_iter': 100, 'algorithm': 'viterbi'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.2252904979493954


Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
TMUS,0.628226,15.093506,0.119691,25.075955,0.458968,15.527825,0.047892,20.119438,0.686215,15.361643,0.259411,20.40795,0.657298,15.250631,0.101092,20.335488,1900.0


In [11]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[22]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, auction_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 2, 'covariance_type': 'full', 'max_iter': 800} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.4331050085510489
Optimal params are {'n_components': 3, 'covariance_type': 'diag', 'max_iter': 800, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.5591032229692747
Optimal params are {'n_components': 3, 'covariance_type': 'diag', 'max_iter': 400, 'algorithm': 'map'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of -0.3064710113190657


Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
TEF.MC,0.710262,13.773641,0.167855,13.978297,0.348496,10.320449,0.67034,21.663034,0.356181,10.323648,0.925239,21.708843,0.333857,9.864071,1.025524,22.580202,1900.0


In [12]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[23]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, auction_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 2, 'covariance_type': 'diag', 'max_iter': 100} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.5735606734855675
Optimal params are {'n_components': 3, 'covariance_type': 'full', 'max_iter': 400, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.6022933698524995
Optimal params are {'n_components': 3, 'covariance_type': 'spherical', 'max_iter': 800, 'algorithm': 'viterbi'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.5229394154534579


Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
E-TSCO,0.776956,10.879526,0.248084,28.709273,0.641521,10.304015,0.051522,27.148338,0.709295,10.262502,0.220987,26.859087,0.746793,10.558768,0.281199,27.157256,1900.0


In [8]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[24]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, auction_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 2, 'covariance_type': 'spherical', 'max_iter': 200} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.5901338782219286
Optimal params are {'n_components': 2, 'covariance_type': 'diag', 'max_iter': 200, 'weight_concentration_prior_type': 'dirichlet_process'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.5901338782219278
Optimal params are {'n_components': 3, 'covariance_type': 'tied', 'max_iter': 800, 'algorithm': 'map'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.25


Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
VIV,0.876139,11.209795,1.911583,16.197323,0.869809,10.158327,2.045499,17.815336,0.633924,10.50182,1.963376,17.275445,0.882187,11.36131,1.95478,16.469104,1900.0


In [9]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[25]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, auction_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 3, 'covariance_type': 'tied', 'max_iter': 400} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.6387915310122773
Optimal params are {'n_components': 3, 'covariance_type': 'tied', 'max_iter': 800, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.6678109194742713
Optimal params are {'n_components': 3, 'covariance_type': 'spherical', 'max_iter': 800, 'algorithm': 'viterbi'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.49917154757365007


Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
WMT,0.077632,8.874011,0.386089,11.748196,0.125229,10.621167,0.282899,8.253769,0.046488,9.870742,0.415481,12.271869,0.155769,9.672265,0.25084,9.298319,1900.0


## Backtest

In [47]:
get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 3, 'covariance_type': 'spherical', 'max_iter': 800} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.6187127688096521
Optimal params are {'n_components': 3, 'covariance_type': 'tied', 'max_iter': 200, 'weight_concentration_prior_type': 'dirichlet_process'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.6187127688096521
Optimal params are {'n_components': 3, 'covariance_type': 'tied', 'max_iter': 800, 'algorithm': 'map'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'log_total_traded_vol'] obtaining a Silhouette Score of 0.25


Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
BMW,0.125761,1.206045,0.052502,1.035624,0.14874,1.2373,0.03708,1.030391,0.138108,1.305583,0.068016,1.281392,0.139875,1.338019,0.065808,1.142081,1900.0


In [None]:
all_results = None
for instrument in files_list:
    raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[3]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
    df = format_data(raw)
    df_less_days = df[df.index.year >= 1900]
    open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
    intraday_data = get_intraday_data(df_less_days, open_time, close_time)
    daily_data = get_daily_data(intraday_data)
    current_ticker = df.ticker.iloc[0]
    
    this_ticker_results = get_full_algo_performance(daily_data, intraday_data, current_ticker)
    if all_results == None:
        all_results = this_ticker_results
    else:
        all_results = pd.concat([all_results, this_ticker_results])

### Optimizando modelo

In [19]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[0]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 2, 'covariance_type': 'diag', 'max_iter': 800} using ['antilog_adv', 'money_flow_index', 'volume'] obtaining a Silhouette Score of 0.6019655729048375
Optimal params are {'n_components': 2, 'covariance_type': 'diag', 'max_iter': 800, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['antilog_adv', 'money_flow_index', 'volume'] obtaining a Silhouette Score of 0.6019655729048375
Optimal params are {'n_components': 3, 'covariance_type': 'spherical', 'max_iter': 800, 'algorithm': 'map'} using ['antilog_adv', 'money_flow_index', 'volume'] obtaining a Silhouette Score of 0.5175625333056264


Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
AAPL,1.396556,11.599807,1.055697,23.878009,1.410235,11.485118,1.714385,17.017828,1.465687,11.479375,1.480913,16.343285,1.358213,12.199604,1.303468,13.340874,1900.0


In [20]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[1]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 2, 'covariance_type': 'tied', 'max_iter': 800} using ['money_flow_index', 'betas_market_impact', 'volume'] obtaining a Silhouette Score of 0.7293866583126822
Optimal params are {'n_components': 2, 'covariance_type': 'spherical', 'max_iter': 800, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['antilog_adv', 'money_flow_index', 'volume'] obtaining a Silhouette Score of 0.7160714226473224
Optimal params are {'n_components': 2, 'covariance_type': 'spherical', 'max_iter': 800, 'algorithm': 'map'} using ['log_returns', 'money_flow_index', 'volume'] obtaining a Silhouette Score of 0.4275628979413119


Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
AIR,0.90639,15.675521,0.370445,21.824672,0.912774,15.870969,0.338775,21.888062,0.940687,15.684749,0.415419,21.79724,0.940687,15.684749,0.415419,21.79724,1900.0


In [8]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[2]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 2, 'covariance_type': 'spherical', 'max_iter': 800} using ['antilog_adv', 'money_flow_index', 'volume'] obtaining a Silhouette Score of 0.5825774262978707
Optimal params are {'n_components': 2, 'covariance_type': 'spherical', 'max_iter': 800, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['antilog_adv', 'money_flow_index', 'volume'] obtaining a Silhouette Score of 0.5825774262978707
Optimal params are {'n_components': 2, 'covariance_type': 'diag', 'max_iter': 800, 'algorithm': 'map'} using ['antilog_adv', 'money_flow_index', 'volume'] obtaining a Silhouette Score of 0.5825774262978707


Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
BBVA.MC,0.266778,12.583242,0.13542,9.797298,0.146774,12.610578,0.110495,9.416146,0.243545,12.54143,0.20968,9.433075,0.241888,12.5552,0.316381,9.56322,1900.0


In [9]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[3]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 4, 'covariance_type': 'tied', 'max_iter': 800} using ['antilog_adv', 'money_flow_index', 'volume'] obtaining a Silhouette Score of 0.6593863615344363
Optimal params are {'n_components': 4, 'covariance_type': 'tied', 'max_iter': 800, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['antilog_adv', 'money_flow_index', 'volume'] obtaining a Silhouette Score of 0.6593863615344363
Optimal params are {'n_components': 2, 'covariance_type': 'spherical', 'max_iter': 800, 'algorithm': 'map'} using ['log_returns', 'money_flow_index', 'volume'] obtaining a Silhouette Score of 0.6169800548862833


Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
BMW,1.26112,12.162185,0.499717,10.58948,1.523618,13.473716,0.991175,13.620807,1.358151,13.324877,0.691496,13.576865,1.222004,12.130519,0.563303,10.636721,1900.0


In [10]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[4]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 4, 'covariance_type': 'tied', 'max_iter': 400} using ['log_returns', 'money_flow_index', 'volume'] obtaining a Silhouette Score of 0.44332871705314963
Optimal params are {'n_components': 4, 'covariance_type': 'tied', 'max_iter': 800, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['antilog_adv', 'betas_market_impact', 'volume'] obtaining a Silhouette Score of 0.4244260747609857
Optimal params are {'n_components': 4, 'covariance_type': 'tied', 'max_iter': 800, 'algorithm': 'map'} using ['money_flow_index', 'betas_market_impact', 'volume'] obtaining a Silhouette Score of 0.4060982937028613


Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
BA,0.540487,18.645351,1.544329,63.028557,1.098496,22.764184,0.593006,55.344016,0.674554,18.459424,1.061292,62.050682,0.674531,18.461562,1.065048,62.045347,1900.0


In [11]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[5]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 400} using ['money_flow_index', 'betas_market_impact', 'volume'] obtaining a Silhouette Score of 0.5122221389129115
Optimal params are {'n_components': 4, 'covariance_type': 'tied', 'max_iter': 800, 'weight_concentration_prior_type': 'dirichlet_process'} using ['money_flow_index', 'betas_market_impact', 'volume'] obtaining a Silhouette Score of 0.5313579425314044
Optimal params are {'n_components': 2, 'covariance_type': 'tied', 'max_iter': 800, 'algorithm': 'map'} using ['log_returns', 'money_flow_index', 'betas_market_impact'] obtaining a Silhouette Score of 0.4941825346709258


Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
CBK,0.603572,23.493326,2.974795,52.810821,0.336519,25.363131,1.345855,20.355564,0.315237,25.185222,1.447788,19.962747,0.607175,23.503256,2.899194,52.651024,1900.0


In [8]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[6]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 3, 'covariance_type': 'spherical', 'max_iter': 800} using ['money_flow_index', 'betas_market_impact', 'volume'] obtaining a Silhouette Score of 0.620169179475285
Optimal params are {'n_components': 4, 'covariance_type': 'tied', 'max_iter': 800, 'weight_concentration_prior_type': 'dirichlet_process'} using ['money_flow_index', 'betas_market_impact', 'volume'] obtaining a Silhouette Score of 0.6088846236903803
Optimal params are {'n_components': 3, 'covariance_type': 'tied', 'max_iter': 800, 'algorithm': 'map'} using ['money_flow_index', 'betas_market_impact', 'volume'] obtaining a Silhouette Score of 0.6101499668584108


Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
E-DGE,0.474081,6.121075,0.98286,17.181186,0.440568,6.470698,1.001339,16.563088,0.42314,6.323802,0.905903,17.647829,0.368475,5.586555,0.860385,18.915563,1900.0


In [10]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[8]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 4, 'covariance_type': 'spherical', 'max_iter': 200} using ['log_returns', 'antilog_adv', 'money_flow_index'] obtaining a Silhouette Score of 0.633919810641398
Optimal params are {'n_components': 3, 'covariance_type': 'tied', 'max_iter': 400, 'weight_concentration_prior_type': 'dirichlet_process'} using ['log_returns', 'antilog_adv', 'money_flow_index'] obtaining a Silhouette Score of 0.633919810641398
Optimal params are {'n_components': 4, 'covariance_type': 'tied', 'max_iter': 800, 'algorithm': 'map'} using ['log_returns', 'antilog_adv', 'money_flow_index'] obtaining a Silhouette Score of 0.582705679745969


Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
ENGI,0.442662,8.249448,1.932665,29.826655,0.392805,7.995978,1.75669,25.275127,0.448689,8.686734,2.059922,26.305974,0.487881,8.797283,1.921617,25.346271,1900.0


In [13]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[11]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 4, 'covariance_type': 'diag', 'max_iter': 800} using ['antilog_adv', 'money_flow_index', 'volume'] obtaining a Silhouette Score of 0.5812087154521326
Optimal params are {'n_components': 4, 'covariance_type': 'diag', 'max_iter': 800, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['antilog_adv', 'money_flow_index', 'volume'] obtaining a Silhouette Score of 0.5759393461714769
Optimal params are {'n_components': 2, 'covariance_type': 'tied', 'max_iter': 800, 'algorithm': 'map'} using ['log_returns', 'money_flow_index', 'betas_market_impact'] obtaining a Silhouette Score of 0.38163283926136454


Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
GE,0.82966,19.244686,4.557188,50.963703,0.545857,19.818586,2.139846,36.326973,0.755268,19.867788,2.031265,34.196239,0.801532,19.290887,4.394137,50.858747,1900.0


In [8]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[12]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 4, 'covariance_type': 'spherical', 'max_iter': 800} using ['antilog_adv', 'money_flow_index', 'volume'] obtaining a Silhouette Score of 0.6316071016266339
Optimal params are {'n_components': 4, 'covariance_type': 'spherical', 'max_iter': 800, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['antilog_adv', 'money_flow_index', 'volume'] obtaining a Silhouette Score of 0.6316071016266339
Optimal params are {'n_components': 3, 'covariance_type': 'spherical', 'max_iter': 800, 'algorithm': 'map'} using ['money_flow_index', 'betas_market_impact', 'volume'] obtaining a Silhouette Score of 0.5308498330231192


Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
GILD,0.199523,18.308264,1.196985,25.519445,0.241561,18.653743,0.973346,22.085533,0.150796,18.692089,0.99992,20.592166,0.157696,18.268276,1.210519,25.481787,1900.0


In [9]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[13]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 4, 'covariance_type': 'tied', 'max_iter': 800} using ['antilog_adv', 'betas_market_impact', 'volume'] obtaining a Silhouette Score of 0.677061529130971
Optimal params are {'n_components': 4, 'covariance_type': 'tied', 'max_iter': 200, 'weight_concentration_prior_type': 'dirichlet_process'} using ['antilog_adv', 'betas_market_impact', 'volume'] obtaining a Silhouette Score of 0.677061529130971
Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 800, 'algorithm': 'map'} using ['antilog_adv', 'betas_market_impact', 'volume'] obtaining a Silhouette Score of 0.677061529130971


Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
E-GSK,0.080696,8.800301,0.579514,32.253741,0.217374,8.606517,0.447085,32.709991,0.210388,9.500779,0.404074,30.461113,0.072137,8.008455,0.624109,33.61243,1900.0


In [10]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[14]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 2, 'covariance_type': 'diag', 'max_iter': 800} using ['antilog_adv', 'money_flow_index', 'volume'] obtaining a Silhouette Score of 0.6330493855189759
Optimal params are {'n_components': 2, 'covariance_type': 'full', 'max_iter': 800, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['money_flow_index', 'betas_market_impact', 'volume'] obtaining a Silhouette Score of 0.6408744914067704
Optimal params are {'n_components': 2, 'covariance_type': 'diag', 'max_iter': 800, 'algorithm': 'map'} using ['antilog_adv', 'money_flow_index', 'volume'] obtaining a Silhouette Score of 0.668543457073951


Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
IBE.MC,0.641527,7.381247,1.296622,9.758614,0.717818,7.309807,0.902914,8.088339,0.641266,7.383605,1.180116,10.39296,0.640363,7.376157,1.050765,7.790668,1900.0


In [11]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[15]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 3, 'covariance_type': 'tied', 'max_iter': 800} using ['money_flow_index', 'betas_market_impact', 'volume'] obtaining a Silhouette Score of 0.8267796537534926
Optimal params are {'n_components': 3, 'covariance_type': 'tied', 'max_iter': 800, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['money_flow_index', 'betas_market_impact', 'volume'] obtaining a Silhouette Score of 0.8267796537534926
Optimal params are {'n_components': 3, 'covariance_type': 'tied', 'max_iter': 800, 'algorithm': 'map'} using ['money_flow_index', 'betas_market_impact', 'volume'] obtaining a Silhouette Score of 0.8267796537534926


Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
MEL.MC,2.964914,14.428485,4.59661,32.858181,2.846163,14.832046,5.109781,33.193637,2.713636,14.832699,4.723235,32.859039,2.992869,13.901704,4.793368,30.344673,1900.0


In [12]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[16]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 4, 'covariance_type': 'spherical', 'max_iter': 800} using ['log_returns', 'antilog_adv', 'betas_market_impact'] obtaining a Silhouette Score of 0.5716518002226753
Optimal params are {'n_components': 4, 'covariance_type': 'spherical', 'max_iter': 800, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['log_returns', 'antilog_adv', 'betas_market_impact'] obtaining a Silhouette Score of 0.5716518002226753
Optimal params are {'n_components': 3, 'covariance_type': 'diag', 'max_iter': 800, 'algorithm': 'viterbi'} using ['antilog_adv', 'money_flow_index', 'volume'] obtaining a Silhouette Score of 0.5226039785214852


Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
MSFT,1.657929,8.527853,1.790677,13.158808,1.612425,8.895652,2.210287,11.546039,1.688512,8.962219,2.255762,11.598821,1.656063,8.524418,1.869918,13.163509,1900.0


In [13]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[18]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 3, 'covariance_type': 'spherical', 'max_iter': 800} using ['log_returns', 'money_flow_index', 'betas_market_impact'] obtaining a Silhouette Score of 0.715050658909821
Optimal params are {'n_components': 4, 'covariance_type': 'tied', 'max_iter': 800, 'weight_concentration_prior_type': 'dirichlet_process'} using ['log_returns', 'money_flow_index', 'betas_market_impact'] obtaining a Silhouette Score of 0.715050658909821
Optimal params are {'n_components': 3, 'covariance_type': 'spherical', 'max_iter': 800, 'algorithm': 'map'} using ['log_returns', 'money_flow_index', 'betas_market_impact'] obtaining a Silhouette Score of 0.715050658909821


Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
NOKIA,0.086009,11.5771,0.249896,12.642312,0.188361,11.720024,0.219981,13.703734,0.285286,12.241989,0.141115,13.956781,0.037091,11.5788,0.477674,12.974398,1900.0


In [14]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[19]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 800} using ['antilog_adv', 'money_flow_index', 'volume'] obtaining a Silhouette Score of 0.6086116887167146
Optimal params are {'n_components': 4, 'covariance_type': 'full', 'max_iter': 800, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['antilog_adv', 'money_flow_index', 'volume'] obtaining a Silhouette Score of 0.6086116887167146
Optimal params are {'n_components': 2, 'covariance_type': 'diag', 'max_iter': 800, 'algorithm': 'map'} using ['antilog_adv', 'money_flow_index', 'volume'] obtaining a Silhouette Score of 0.6086116887167146


Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
REE.MC,0.09871,11.124457,0.56101,11.219609,0.218779,11.536446,0.302737,13.603668,0.117056,11.523966,0.521802,12.969526,0.141787,11.276001,0.446988,12.23702,1900.0


In [15]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[20]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 2, 'covariance_type': 'full', 'max_iter': 800} using ['log_returns', 'money_flow_index', 'volume'] obtaining a Silhouette Score of 0.4764402980397033
Optimal params are {'n_components': 2, 'covariance_type': 'spherical', 'max_iter': 100, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['log_returns', 'money_flow_index', 'volume'] obtaining a Silhouette Score of 0.4764402980397033
Optimal params are {'n_components': 2, 'covariance_type': 'full', 'max_iter': 800, 'algorithm': 'map'} using ['log_returns', 'money_flow_index', 'volume'] obtaining a Silhouette Score of 0.4185650998656366


Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
GLE,0.419535,14.966104,2.284036,33.529292,0.544593,15.395719,1.846478,32.606842,0.663618,14.695238,1.091437,32.179131,0.439241,14.457026,1.75217,33.134898,1900.0


In [16]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[21]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 3, 'covariance_type': 'tied', 'max_iter': 800} using ['money_flow_index', 'betas_market_impact', 'volume'] obtaining a Silhouette Score of 0.7855403566167458
Optimal params are {'n_components': 4, 'covariance_type': 'tied', 'max_iter': 200, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Silhouette Score of 0.832682782563471
Optimal params are {'n_components': 2, 'covariance_type': 'tied', 'max_iter': 800, 'algorithm': 'map'} using ['log_returns', 'money_flow_index', 'volume'] obtaining a Silhouette Score of 0.8696302010556539


ValueError: operands could not be broadcast together with shapes (3091,5) (3,) (3091,5) 

In [18]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[22]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 4, 'covariance_type': 'diag', 'max_iter': 800} using ['antilog_adv', 'money_flow_index', 'volume'] obtaining a Silhouette Score of 0.5200279427929585
Optimal params are {'n_components': 4, 'covariance_type': 'tied', 'max_iter': 800, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['antilog_adv', 'money_flow_index', 'volume'] obtaining a Silhouette Score of 0.5386301420807847
Optimal params are {'n_components': 3, 'covariance_type': 'spherical', 'max_iter': 800, 'algorithm': 'map'} using ['log_returns', 'money_flow_index', 'betas_market_impact'] obtaining a Silhouette Score of 0.3778147994816214


Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
TEF.MC,0.499464,8.237029,0.981296,14.400765,0.526809,8.368276,0.237542,8.158519,0.543551,8.397913,0.11684,8.034925,0.425467,8.244023,1.115768,14.427633,1900.0


In [19]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[23]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 2, 'covariance_type': 'diag', 'max_iter': 800} using ['log_returns', 'antilog_adv', 'betas_market_impact'] obtaining a Silhouette Score of 0.650846845596423
Optimal params are {'n_components': 4, 'covariance_type': 'diag', 'max_iter': 800, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['log_returns', 'antilog_adv', 'betas_market_impact'] obtaining a Silhouette Score of 0.650846845596423
Optimal params are {'n_components': 3, 'covariance_type': 'full', 'max_iter': 800, 'algorithm': 'map'} using ['antilog_adv', 'money_flow_index', 'volume'] obtaining a Silhouette Score of 0.5966740681312116


Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
E-TSCO,0.776956,10.879526,0.247111,28.709747,0.567405,10.530537,0.099493,28.19005,0.739847,10.446305,0.558721,26.776425,0.755948,10.864529,0.265341,28.723516,1900.0


In [20]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[24]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 3, 'covariance_type': 'tied', 'max_iter': 800} using ['antilog_adv', 'money_flow_index', 'volume'] obtaining a Silhouette Score of 0.6838865855093906
Optimal params are {'n_components': 4, 'covariance_type': 'tied', 'max_iter': 400, 'weight_concentration_prior_type': 'dirichlet_distribution'} using ['antilog_adv', 'money_flow_index', 'volume'] obtaining a Silhouette Score of 0.6838865855093906
Optimal params are {'n_components': 4, 'covariance_type': 'spherical', 'max_iter': 800, 'algorithm': 'map'} using ['antilog_adv', 'money_flow_index', 'volume'] obtaining a Silhouette Score of 0.6838865855093906


Unnamed: 0,mean_static,sd_static,mean_dynamic,sd_dynamic,mean_static_gmm,sd_static_gmm,mean_dynamic_gmm,sd_dynamic_gmm,mean_static_bgm,sd_static_bgm,mean_dynamic_bgm,sd_dynamic_bgm,mean_static_ghmm,sd_static_ghmm,mean_dynamic_ghmm,sd_dynamic_ghmm,from_year
VIV,0.875276,11.210578,1.910846,16.729966,0.815107,10.299091,2.255008,19.749641,0.710729,10.952248,1.974044,17.22798,0.425668,9.930285,2.23854,20.663601,1900.0


In [21]:
raw = pd.read_csv("file:///" + os.path.join(DATAPATH, files_list[25]), parse_dates=[['<DTYYYYMMDD>', '<TIME>']])
df = format_data(raw)
df_less_days = df[df.index.year >= 1900]
open_time, close_time, auction_time = get_open_close_auction_time(df_less_days)
intraday_data = get_intraday_data(df_less_days, open_time, close_time)
daily_data = get_daily_data(intraday_data)
current_ticker = df.ticker.iloc[0]

get_full_algo_performance(daily_data, intraday_data, current_ticker)

Optimal params are {'n_components': 2, 'covariance_type': 'tied', 'max_iter': 800} using ['antilog_adv', 'money_flow_index', 'volume'] obtaining a Silhouette Score of 0.822659835780838
Optimal params are {'n_components': 4, 'covariance_type': 'tied', 'max_iter': 200, 'weight_concentration_prior_type': 'dirichlet_process'} using ['log_returns', 'antilog_adv', 'money_flow_index', 'betas_market_impact', 'volume'] obtaining a Silhouette Score of 0.7692635349144581
Optimal params are {'n_components': 3, 'covariance_type': 'tied', 'max_iter': 800, 'algorithm': 'map'} using ['log_returns', 'money_flow_index', 'volume'] obtaining a Silhouette Score of 0.6315975805759716


ValueError: operands could not be broadcast together with shapes (3133,5) (3,) (3133,5) 