### Historical Shape Indicator (HSI),  ETL for Adviser Validation

In [1]:
import pandas as pd
from pandas import DatetimeIndex
import datetime 
import os
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm_notebook
import scipy
from scipy import stats as scs
import statsmodels
from statsmodels import stats
from statsmodels.stats import weightstats
from statsmodels.stats.power import TTestIndPower
import sys
import warnings
if not sys.warnoptions:
    warnings.simplefilter("ignore")
import seaborn as sb
sb.set()
from alpha_vantage.timeseries import TimeSeries
from datetime import datetime, timedelta
np.set_printoptions(precision=2)
%matplotlib inline
from joblib import dump, load
import pickle

#### Raw Data

In [2]:
def get_raw(sym='V'):
    '''
    download data and return data dictionary
    '''
    # download historical prices
    ts = TimeSeries(key='enter your access key')
    # Get json object with the intraday data and another with  the call's metadata
    data, meta_data = ts.get_daily_adjusted(sym, outputsize='full')
    return data

#### Format Raw Data

In [3]:
def format_raw(raw_dict):
    '''
    import raw dictionary
    format column names and sort date ascending
    return dataframe
    '''
    # reformat
    data = raw_dict.copy()
    df_raw = pd.DataFrame.from_dict(data).T
    df_raw.reset_index(level=0, inplace=True)
    df_raw = df_raw.rename(index=str, columns={'index':'date',
                                               '1. open': 'open', 
                                               '2. high': 'high',
                                               '3. low': 'low',
                                               '4. close':'close',
                                               '5. adjusted close':'adj_close',
                                               '6. volume':'volume',
                                               '7. dividend amount':'dividend',
                                               '8. split coefficient':'split',
                                              })
    df_raw = df_raw.sort_values(by='date', ascending=True)
    df_raw = df_raw.reset_index(drop=True)
    df_raw.date = pd.to_datetime(df_raw.date)
    
    return df_raw

In [4]:
def scale_adjusted(df_raw):
    '''
    import raw dataframe
    scale open,high,low, close to adjusted close
    return updated dataframe
    '''
    df = df_raw.copy()
    df_scale = pd.DataFrame()
    close = df.close.to_numpy().astype(float)
    adj = df.adj_close.to_numpy().astype(float)
    scale = adj / close
    df_scale['date'] = df['date'].copy()
    df_scale['open']=df.open.to_numpy().astype(float)*scale
    df_scale['high']=df.high.to_numpy().astype(float)*scale
    df_scale['low']=df.low.to_numpy().astype(float)*scale
    df_scale['close']=df.close.to_numpy().astype(float)*scale
    
    return df_scale

#### Preprocess Data

In [5]:
def compute_log_returns(prices):
    '''
    compute log returns
    '''
    return np.log(prices) - np.log(prices.shift(1))

In [6]:
def shift_returns(returns, shift_n):
    '''
    compute shift returns for trade assessment
    '''
    return returns.shift(shift_n)

In [7]:
def compute_proj(prices, lookahead_days):
    '''
    compute projected future lookahead returns
    lookahead_days is the number of days ahead we want to predict
    '''
    return (prices.shift(-lookahead_days) - prices)/prices

In [8]:
def compute_day_shape(prices, sigmas, dayspan):
    '''
    compute one day shape
    '''
    abs_deltas = (prices) - (prices.shift(dayspan))
    s_ratios = abs_deltas / sigmas
    ups = 3*(s_ratios>1)
    downs = 1*(s_ratios<-1)
    neuts = 2*((s_ratios>=-1)&(s_ratios<=1))      
    return (ups+downs+neuts)

In [9]:
def compute_shape(dayshape, dayspan):
    '''
    compute 5 day shape ordinals
    '''
    ago5s = 10000*(dayshape.shift(4*dayspan))
    ago4s = 1000*(dayshape.shift(3*dayspan))
    ago3s = 100*(dayshape.shift(2*dayspan))
    ago2s = 10*(dayshape.shift(1*dayspan))
    return (ago5s+ago4s+ago3s+ago2s+dayshape)

In [10]:
def preprocess(df):
    '''
    compute statistics 
    add return parameters
    add lookahead projections of 7 days
    use day shape spans of 1, 3 and 5 days
    build shape ordinals
    '''
    df_for = df.copy()
    
    # raw data overlaps
    shifts = [['o1','h1','l1','c1'],
              ['o2','h2','l2','c2'],
              ['o3','h3','l3','c3'],
              ['o4','h4','l4','c4'],
             ]
    # format df to calculate price estimates and standard deviations
    for j, shift in zip(range(1,6),shifts):
        df_for[shift[0]] = df_for.open.shift(-j)
        df_for[shift[1]] = df_for.high.shift(-j)
        df_for[shift[2]] = df_for.low.shift(-j)
        df_for[shift[3]] = df_for.close.shift(-j)

    # define price estimate columns for 1,3,5 day spans
    p1_col = df_for.loc[:,"open":"close"].astype(float)
    p3_col = df_for.loc[:,"open":"c2"].astype(float)
    p5_col = df_for.loc[:,"open":"c4"].astype(float)  
    p_cols = [p1_col, p3_col, p5_col]

    # compute price estimates and standard deviations for spans
    stats = [['pe1','sd1'],['pe3','sd3'],['pe5','sd5']]
    for stat, p_col in zip(stats, p_cols):
        df_for[stat[0]] = p_col.mean(axis=1)
        df_for[stat[1]] = p_col.std(axis=1)

    # keep date but leave raw data behind
    df_prep = df_for[['date','pe1','sd1','pe3','sd3','pe5','sd5']].copy()
    
    # add daily returns to df based on 1 day price estimates
    daily_returns = compute_log_returns(df_prep['pe1'])
    df_prep['log_ret'] = daily_returns
    
    # compute shift returns
    shift_1dlog = shift_returns(df_prep['log_ret'],-1)
    df_prep['shift_ret'] = shift_1dlog
    
    # add projections to df
    lookahead_days = 7
    aheads = compute_proj(df_prep['pe1'], lookahead_days)
    df_prep['proj'] = aheads
    
    # add day shapes to df
    dayshapes = ['ds1','ds3','ds5']
    dayspans = [1,3,5]
    for shape, stat, span in zip(dayshapes, stats, dayspans):
        df_prep[shape] = compute_day_shape(df_prep[stat[0]], df_prep[stat[1]], span)
        
    # add shapes to df
    shapes = ['shp1','shp3','shp5']
    for shape, dayshape, span in zip(shapes, dayshapes, dayspans):
        df_prep[shape] = compute_shape(df_prep[dayshape], span)

    #trim the head then format
    df_trim = df_prep[25:].copy()
    df_trim[['shp1','shp3','shp5']] = df_trim[['shp1','shp3','shp5']].astype(int)
    
    return df_trim  

In [11]:
def test_train_split(df_mkt, test_year):
    '''
    split preprocessed data into train and test dataframes
    train data comes from years prior to test year
    data in years beyond the test year is not used
    '''
    df = df_mkt.copy()
    years = df.date.map(lambda x: x.strftime('%Y')).astype(int)
    
    #train = years < test_year for 3 years behind
    train = ((test_year-4 < years) & (years < test_year))
    test = np.isin(years, test_year)

    df_train = df[train].copy()
    df_test = df[test].copy()
    
    return df_train, df_test

#### Shape Ranks

In [12]:
def compute_shaperank(df_train, shapename):
    '''
    enter preprocessed train data and shapename string
    return HSI dataframe for that shapename
    '''
    shapes = df_train[shapename]
    projs = df_train['proj']
    
    s_list = list(set(shapes))
    p_avgs = []
    p_stds = []
    for shape in s_list:
        p_avgs.append((projs*(shapes==shape)).mean())
        p_stds.append((projs*(shapes==shape)).std())
    
    # initiate dataframe build
    df_shape = pd.DataFrame()
    df_shape['shape'] = s_list
    df_shape['p_avg'] = p_avgs
    df_shape['p_std'] = p_stds
    
    # shape ratio as a mini sharpe
    df_shape['p_srs'] = df_shape['p_avg']/df_shape['p_std']
    df_shape = df_shape.sort_values(by=['p_srs'])
    df_shape = df_shape.reset_index(drop=True)
    
    # normalize shape ratios into indicator
    short_range = df_shape['p_srs'].max() - df_shape['p_srs'].min() 
    short_min = df_shape['p_srs'].min()
    df_shape['HSI'] = (df_shape['p_srs'] - short_min)/short_range
    
    return df_shape     

In [13]:
def build_hsi(df_train):
    '''
    import train dataframe
    return completed shape dataframe
    '''
    df1 = compute_shaperank(df_train, 'shp1')
    df3 = compute_shaperank(df_train, 'shp3')
    df5 = compute_shaperank(df_train, 'shp5')
    df_hsi = pd.concat({'shp1':df1, 'shp3':df3, 'shp5':df5}, axis=1)
    return df_hsi

In [14]:
def assign_hsi(df, df_shape):
    '''
    for daily market data
    lookup the HSI figures given shape ordinals
    return updated dataframe with daily HSC assignment
    '''
    df_mkt = df.copy()
    
    # HSI lookups
    shapenames = ['shp1','shp3','shp5']
    hsi_names = ['hsi1','hsi3','hsi5']
    
    for sname, hsi_name in zip(shapenames, hsi_names):
        lookups = []
        s_list = df_shape[sname]['shape'].tolist()
        for i,nrows in df_mkt.iterrows():
            shp = nrows[sname]
            # assign 0.5's for unknown shapes
            if shp in s_list:
                lookups.append(np.asscalar(df_shape[sname][df_shape[sname]['shape']==shp]['HSI'].values))
            else:
                lookups.append(0.5)
        df_mkt[hsi_name] = lookups
        
    # compile three into the average of the two closest
    nearest_two = []
    for i,nrows in df_mkt.iterrows():
        v1 = nrows['hsi1']
        v2 = nrows['hsi3']
        v3 = nrows['hsi5']
        diffs = np.abs([v1-v2, v2-v3, v1-v3])
        sums = [v1+v2, v2+v3, v1+v3]
        nearest_two.append(np.max((diffs==np.amin(diffs))*sums)/2)
        
    df_mkt['HSC'] = nearest_two

    return df_mkt 

#### Trade Rules

In [15]:
def compute_trades(indicator, highT, lowT):
    '''
    compare HSC to thresholds
    return binaries of in/out days
    '''
    trades = []
    inout = 0
    for ind in indicator:
        # from out to enter
        if inout == 0:
            if ind > highT:
                trades.append(1)
                inout = 1
            else:
                trades.append(0)
        # from in to exit
        else:
            if ind < lowT:
                trades.append(0)
                inout = 0
            else:
                trades.append(1)
    return trades

In [16]:
def opt_tresh(seedLow, seedHigh, step_range, df):
    '''
    successive approximation applied to optimizing thresholds
    '''
    df_mkt = df.copy()
    bestL = 0
    bestH = 0
    bestR = 0
    for i in range(20):
        t_low = seedLow + step_range*i/20

        for j in range(20):
            t_high = seedHigh + step_range*j/20

            trade = compute_trades(df_mkt['HSC'], t_high, t_low)
            returns = df_mkt['shift_ret']*trade
            expret = (np.exp(returns[1:].T.sum())-1)*100

            if expret > bestR:
                bestL = t_low
                bestH = t_high
                bestR = expret
                
    return bestL, bestH

In [17]:
def thresholds(df_train):
    '''
    determine trade rule thresholds
    '''
    # trim leader NaN's
    df = df_train.iloc[:-7].copy()
    
    low = 0.25
    high = 0.75
    res = 0
    r_values = [0.5,0.25,0.125]
    for r in r_values:
        low, high = opt_tresh((low-(r/2)),(high-(r/2)),r,df)
    
    return low, high

In [18]:
def add_class_fields(df_trade):
    '''
    import consolidated summaries
    add symbol category and year fields
    return updated dataframe
    '''
    df = df_trade.copy()
    # Add symbol ordinals
    with open('dict_symbolcat.pkl', 'rb') as handle:
        d_symcat = pickle.load(handle)
    
    symcat = list(map(lambda x: d_symcat[x], df.symbol.tolist()))
    df['sym'] = symcat
    
    # Add year column
    df.date = pd.to_datetime(df.date)
    df['year'] = df['date'].map(lambda x: x.year)
    
    return df

In [19]:
def class_trades(df_trade):
    '''
    Import trade dataframe
    Load trained classifier
    Predict trade classifications
    Return updated dataframe
    '''
    df = df_trade.copy()
    
    if len(df) > 0:
        # load models
        logreg = load('logreg_model.joblib')
        # specify the same features as model trainings
        class_cols = ['HSC', 
            'ds1', 'ds3', 'ds5',
            'hsi1', 'hsi3', 'hsi5',
            'pe1', 'pe3', 'pe5', 
            'sd1', 'sd3', 'sd5',
            'shp1', 'shp3', 'shp5', 
            'sym','year']
        # model predictions
        df_class = df[class_cols].copy()
        df['year'] = df['year'].astype(float)
        df['pred'] = logreg.predict(df_class)
        df['prob'] = logreg.predict_proba(df_class).T[1]
    
    return df

In [20]:
def find_trade_masks(trade_array):
    '''
    Import optimal trade in/out boolean array
    Export buy and sell day masks
    '''
    trades = trade_array.copy()
    num_days = len(trades)
    # trade latency range limit
    late = 2
    
    # trade changes as threshold crossings
    difference = np.diff(trades)
    
    # optimal threshold day indices
    buys = np.where(difference==1)
    sells = np.where(difference==-1)
    
    # optimals + late day indices using heavy numpy
    late_days = np.arange(late)
    buy_index_array = np.unique(np.sort(np.add(np.tile(
        late_days,[len(buys[0]),1]).T,buys[0]).flatten()))
    sell_index_array = np.unique(np.sort(np.add(np.tile(
        late_days,[len(sells[0]),1]).T,sells[0]).flatten()))
    
    # truncate those out of range
    buy_index_array = buy_index_array[buy_index_array<num_days]
    sell_index_array = sell_index_array[sell_index_array<num_days]
    
    # build mask arrays from indices
    buy_mask_array = np.zeros(num_days, dtype=int)
    buy_mask_array[buy_index_array] = 1
    sell_mask_array = np.zeros(num_days, dtype=int)
    sell_mask_array[sell_index_array] = 1
    
    return buy_mask_array, sell_mask_array

In [21]:
def assign_actions(df_trade, lowT, highT):
    '''
    identify buy and sell day possibilities
    return dataframe with buy sell boolean fields
    '''
    df = df_trade.copy()
    
    # get trade mask possibilities
    trades = df['trade'].to_numpy()
    buys, sells = find_trade_masks(trades)
    
    # mask for those above or below thresholds
    HSC = df['HSC'].to_numpy()
    b_ok = (HSC>highT)*1
    s_ok = (HSC<lowT)*1
    
    # check that the classifier is agreeable
    pred = df['pred'].to_numpy()
    c_ok = (pred==1)*1
    
    # assign buy and sell boolean flags
    df['buyday'] = (buys*b_ok)*c_ok
    df['sellday'] = (sells*s_ok)*c_ok
    
    return df

In [22]:
def find_trade_idxs(b_mask, s_mask):
    '''
    import buy and sell masks
    return list of buy/sell index tuples 
    '''
    find = []
    buys = b_mask.copy()
    sells = s_mask.copy()
    s_num = len(sells)
    # itterate through buys
    for i, buy in enumerate(buys):
        if buy ==1:
            # manage buys at end without sells
            try:
                s_index = np.argmax(sells[i+1:])+i+1
            except:
                s_index = 0
        else:
            s_index = 0
        if s_index != 0:
            flag = 0
            c = 0
            # permutate out sells for each buy
            while flag == 0:
                find.append((i, s_index))
                if s_index+1 < s_num:
                    if sells[s_index+1]==1:
                        s_index = s_index + 1
                    else:
                        flag = 1
                else:
                    flag = 1
    return find

In [23]:
def update_indays(df_trade):
    '''
    inport trade dataframe
    build column of indays based on buy/sell days
    return updated dataframe
    '''
    df = df_trade.copy()
    buys = df['buyday'].to_numpy()
    sells = df['sellday'].to_numpy()
    
    # find trade indexes
    t_tuples = find_trade_idxs(buys, sells)
    
    # assign in-days
    indays = np.zeros(len(df_trade))
    for trade in t_tuples:
        buy,sell = trade
        np.put(indays,[d for d in range(buy,sell+1)],1)
    df['inday'] = indays
    
    return df

#### Analysis Functions

In [24]:
def compute_trade_returns(df):
    '''
    compute trade returns
    '''
    return df['shift_ret']*df['inday']

In [25]:
def statistical_test(df):
    '''
    Unequal Variance Stats Test of equal Sample Size
    This is a two-sided test for the null hypothesis that:
    2 independent samples have identical average (expected) values.
    With a small p_value, the null hypothesis is rejected
    '''
    all_ins = df[df['inday']==1]['shift_ret'].dropna()
    all_outs = df[df['inday']==0]['shift_ret'].dropna()
    
    if len(all_ins)<len(all_outs):
        all_outs = np.asarray(np.random.choice(all_outs, len(all_ins)))
    else:
        all_ins = np.asarray(np.random.choice(all_ins, len(all_outs)))
    
    results = statsmodels.stats.weightstats.ttest_ind(all_ins, all_outs,
                                                      alternative="two-sided",
                                                      usevar="unequal")
    t_value = results[0]
    p_value = results[1]
    
    return t_value, p_value

In [26]:
def get_expected_return(returns):
    '''
    compute integrated return in percentage
    '''
    return (np.exp(returns[1:].T.sum())-1)*100

In [27]:
def get_volatility(returns):
    '''
    compute annualized volatility
    '''
    return np.std(returns)*np.sqrt(252)   

In [28]:
def get_years(df_mkt):
    '''
    compute years for sharpe
    '''
    df = df_mkt.copy()
    df = df.reset_index(drop=True)
    return np.asscalar((df['date'].tail(1)-df['date'][0])/timedelta(days=365))

In [29]:
def get_sharpe(returns, years, vol_year):
    '''
    compute sharpe ratio assuming 3.5% risk free interest rate
    '''
    ret_year = (np.exp(returns[1:].T.sum())-1)/years
    risk_free = 0.035
    return (ret_year - risk_free) / vol_year

In [30]:
def get_benchmark(df_mkt, exp_return):
    '''
    compute beat the market percentage
    calculates S&P500 returns using same trade days
    converts log returns to simple percentage
    returns difference in percentage returns
    '''
    df_spy = pd.read_csv('spy_index_102719.csv')
    df_spy['date'] = pd.to_datetime(df_spy['date'])
    df_bench = pd.merge(df_spy[['date', 'shift_ret']], df_mkt[['date','inday']], on='date', how='inner')
    bench_returns = df_bench['shift_ret']*df_bench['inday']
    bench_return = (np.exp(bench_returns[1:].T.sum())-1)*100
    beat_percent = exp_return - bench_return
    return beat_percent

#### ETL Pipeline

In [31]:
def run_etl(ticker, equity):
    '''
    run ETL pipeline
    '''
    print('Runnning ETL for '+ ticker)
    dict_raw = get_raw(ticker)
    
    print('formatting')
    df_for = format_raw(dict_raw)
    df_scale = scale_adjusted(df_for)
    
    print('preprocessing')
    df_pre = preprocess(df_scale)
    df_pre['symbol'] = ticker
    
    print('begin test itterations')
    years = years = [2014, 2015, 2016, 2017, 2018, 2019]
    df_res = pd.DataFrame() 
    
    for test_year in years[3:]:
        
        print('starting test year {}'.format(test_year))
        results = [ticker, equity, test_year]
        print('test-train split')
        df_train, df_test = test_train_split(df_pre[:-7], test_year)
        est_price = np.asscalar(df_test['pe1'].tail(1).values)
        results.append(est_price)

        print('training shapes')
        df_shape = build_hsi(df_train)
        df_train = assign_hsi(df_train, df_shape)
        df_test = assign_hsi(df_test, df_shape)

        print('optimizing trade thresholds')
        lowT, highT = thresholds(df_train)
        results.append(lowT)
        results.append(highT)
        
        print('computing unclassified trades')
        trades = compute_trades(df_test['HSC'], highT, lowT)
        df_test['trade'] = trades
        
        print('computing classifications')
        df_test = add_class_fields(df_test)
        df_test = class_trades(df_test)
        
        print('computing classified trades')
        df_test = assign_actions(df_test, lowT, highT)
        df_test = update_indays(df_test)
        indays = df_test['inday'].to_numpy()
        num_trades = ((np.diff(indays))==-1).sum() + indays[-1]
        results.append(num_trades)

        print('evaluating performance')
        returns = compute_trade_returns(df_test)
        results.append(np.count_nonzero(returns))

        tval, pval = statistical_test(df_test)
        results.append(tval)
        results.append(pval)
        print('t-value, p-value = ', tval, pval)

        exp_ret = get_expected_return(returns)
        results.append(exp_ret)
        print('expected return = ', exp_ret)

        vol = get_volatility(returns)
        results.append(vol)
        print('volatility = ', vol)

        years = get_years(df_test)
        results.append(years)
        print('years = ', years)

        sharpe = get_sharpe(returns, years, vol)
        results.append(sharpe)
        print('sharpe ratio = ', sharpe)

        beat_percent = get_benchmark(df_test, exp_ret)
        results.append(beat_percent)
        print('beat percent = ', beat_percent)

        print('saving result')
        df_res = df_res.append(pd.Series(results),ignore_index=True)
        
    print('formatting summary')
    cols = ['symbol','equity','test_year','price$','lowT','highT','#trades','in_days',
            't-val','p-val','exp_ret%','volatility','years','sharpe','beat%']
    df_res.columns = cols
    df_res.test_year = df_res.test_year.astype(int)
    df_res.in_days = df_res.in_days.astype(int)
       
    return df_res, df_shape, lowT, highT

#### Run Adviser Survey ETL

In [32]:
# import candidates
df_stocks = pd.read_csv('cherries_102919.csv')
t_list = df_stocks.symbol.tolist()
e_list = df_stocks.equity.tolist()
df_res = pd.DataFrame()
# run etl
ts_list = []
hi_list = []
low_list = []
hiccup = []
for ticker, equity in zip(t_list,e_list):
    try:
        df_results, df_shape, lowT, highT = run_etl(ticker, equity)
        df_res = pd.concat([df_res,df_results], ignore_index=True)
        df_shape.to_csv("hsi_data/{}_hsi.csv".format(ticker), index=False)
        ts_list.append(ticker)
        hi_list.append(highT)
        low_list.append(lowT)
    except:
        hiccup.append(ticker)

df_res.to_csv('adviser_survey_102919.csv', index=None)
df_thrsh = pd.DataFrame({'ticker':ts_list,'lowT':low_list,'highT':hi_list})
df_thrsh.to_csv('hsi_thrsh.csv',index=False)
df_res.head()

Runnning ETL for ENTG
formatting
preprocessing
begin test itterations
starting test year 2017
test-train split
training shapes
optimizing trade thresholds
computing unclassified trades
computing classifications
computing classified trades
evaluating performance
t-value, p-value =  -0.715666513192296 0.47554331048367715
expected return =  71.2526495736371
volatility =  0.18995801057119255
years =  0.9863013698630136
sharpe ratio =  3.6188139420864815
beat percent =  58.53491641038153
saving result
starting test year 2018
test-train split
training shapes
optimizing trade thresholds
computing unclassified trades
computing classifications
computing classified trades
evaluating performance
t-value, p-value =  1.5023467788836526 0.1355096787689119
expected return =  -1.2546139648548782
volatility =  0.23654806157887312
years =  0.9945205479452055
sharpe ratio =  -0.20129213516733
beat percent =  -5.167195065972308
saving result
starting test year 2019
test-train split
training shapes
optimiz

sharpe ratio =  -0.4907368218592815
beat percent =  -9.81501615253687
saving result
starting test year 2019
test-train split
training shapes
optimizing trade thresholds
computing unclassified trades
computing classifications
computing classified trades
evaluating performance
t-value, p-value =  -0.0029515746600560326 0.9976498043766064
expected return =  8.159224948229959
volatility =  0.11603232849813577
years =  0.7917808219178082
sharpe ratio =  0.5864661658873653
beat percent =  2.934895294564276
saving result
formatting summary
Runnning ETL for WCN
formatting
preprocessing
begin test itterations
starting test year 2017
test-train split
training shapes
optimizing trade thresholds
computing unclassified trades
computing classifications
computing classified trades
evaluating performance
t-value, p-value =  1.5141613061223231 0.1317203119716696
expected return =  31.288880663081507
volatility =  0.07846761142592926
years =  0.9863013698630136
sharpe ratio =  3.5968277786447884
beat pe

t-value, p-value =  -1.0427024652196255 0.29821059594019445
expected return =  20.44768910280881
volatility =  0.09781465167560995
years =  0.9863013698630136
sharpe ratio =  1.7616670396106906
beat percent =  11.36951138223712
saving result
starting test year 2018
test-train split
training shapes
optimizing trade thresholds
computing unclassified trades
computing classifications
computing classified trades
evaluating performance
t-value, p-value =  1.855002674704002 0.06523238793482602
expected return =  2.410525838243771
volatility =  0.08395318717736414
years =  0.9945205479452055
sharpe ratio =  -0.12818965709873498
beat percent =  1.1335512482635535
saving result
starting test year 2019
test-train split
training shapes
optimizing trade thresholds
computing unclassified trades
computing classifications
computing classified trades
evaluating performance
t-value, p-value =  -0.18955742512298196 0.8501950023805089
expected return =  36.267090919450084
volatility =  0.07416578677705539

training shapes
optimizing trade thresholds
computing unclassified trades
computing classifications
computing classified trades
evaluating performance
t-value, p-value =  0.4217594647047642 0.6737965981374776
expected return =  20.110357075030926
volatility =  0.07421337027636979
years =  0.9863013698630136
sharpe ratio =  2.275825437797121
beat percent =  9.170614209530358
saving result
starting test year 2018
test-train split
training shapes
optimizing trade thresholds
computing unclassified trades
computing classifications
computing classified trades
evaluating performance
t-value, p-value =  0.8823425451271784 0.37861973028817253
expected return =  4.221596737399325
volatility =  0.09168460419024274
years =  0.9945205479452055
sharpe ratio =  0.08124114513383732
beat percent =  0.13934469038823227
saving result
starting test year 2019
test-train split
training shapes
optimizing trade thresholds
computing unclassified trades
computing classifications
computing classified trades
eval

expected return =  10.990483133601693
volatility =  0.17315772882441435
years =  0.9945205479452055
sharpe ratio =  0.43607852857316315
beat percent =  6.045323032447404
saving result
starting test year 2019
test-train split
training shapes
optimizing trade thresholds
computing unclassified trades
computing classifications
computing classified trades
evaluating performance
t-value, p-value =  1.323904786509966 0.18725195345368617
expected return =  48.867705650545126
volatility =  0.19476565487402234
years =  0.7917808219178082
sharpe ratio =  2.9891681173174844
beat percent =  35.23688913289909
saving result
formatting summary
Runnning ETL for LDOS
formatting
preprocessing
begin test itterations
starting test year 2017
test-train split
training shapes
optimizing trade thresholds
computing unclassified trades
computing classifications
computing classified trades
evaluating performance
t-value, p-value =  -0.4486348233849946 0.6542435168776306
expected return =  11.532023389363477
volat

sharpe ratio =  0.8952242328983352
beat percent =  -3.457195759633679
saving result
starting test year 2018
test-train split
training shapes
optimizing trade thresholds
computing unclassified trades
computing classifications
computing classified trades
evaluating performance
t-value, p-value =  0.6421511770756886 0.5215081222658935
expected return =  36.32299183333465
volatility =  0.13531839330878773
years =  0.9945205479452055
sharpe ratio =  2.440401315935354
beat percent =  35.28900732113691
saving result
starting test year 2019
test-train split
training shapes
optimizing trade thresholds
computing unclassified trades
computing classifications
computing classified trades
evaluating performance
t-value, p-value =  -0.48823529201403865 0.6264357289450749
expected return =  8.724666780855884
volatility =  0.1498454622543326
years =  0.7917808219178082
sharpe ratio =  0.5017864876915682
beat percent =  -9.302346584234499
saving result
formatting summary
Runnning ETL for TGT
formatting


starting test year 2017
test-train split
training shapes
optimizing trade thresholds
computing unclassified trades
computing classifications
computing classified trades
evaluating performance
t-value, p-value =  0.41552509183571146 0.6781826258091039
expected return =  37.09360679564813
volatility =  0.1586529174351026
years =  0.9863013698630136
sharpe ratio =  2.14990031890673
beat percent =  23.412852808746432
saving result
starting test year 2018
test-train split
training shapes
optimizing trade thresholds
computing unclassified trades
computing classifications
computing classified trades
evaluating performance
t-value, p-value =  -0.24567570285762513 0.8062750118381636
expected return =  -6.561886626711488
volatility =  0.21054988187084817
years =  0.9945205479452055
sharpe ratio =  -0.4796032266706227
beat percent =  -7.171732568064093
saving result
starting test year 2019
test-train split
training shapes
optimizing trade thresholds
computing unclassified trades
computing classif

starting test year 2019
test-train split
training shapes
optimizing trade thresholds
computing unclassified trades
computing classifications
computing classified trades
evaluating performance
t-value, p-value =  1.2466156095688956 0.21523850752696627
expected return =  24.565107102920948
volatility =  0.08194895967253922
years =  0.7917808219178082
sharpe ratio =  3.358814483842501
beat percent =  8.903589783112254
saving result
formatting summary
Runnning ETL for AIN
formatting
preprocessing
begin test itterations
starting test year 2017
test-train split
training shapes
optimizing trade thresholds
computing unclassified trades
computing classifications
computing classified trades
evaluating performance
t-value, p-value =  0.1955039030368718 0.8453000845143123
expected return =  29.82788429125156
volatility =  0.14910751398691005
years =  0.9863013698630136
sharpe ratio =  1.7934817466215052
beat percent =  18.03560813562115
saving result
starting test year 2018
test-train split
traini

sharpe ratio =  2.3296962205929215
beat percent =  11.798027798356792
saving result
starting test year 2018
test-train split
training shapes
optimizing trade thresholds
computing unclassified trades
computing classifications
computing classified trades
evaluating performance
t-value, p-value =  1.5220221662489994 0.12946641220801255
expected return =  14.296785897203822
volatility =  0.10798349848820475
years =  0.9945205479452055
sharpe ratio =  1.0071498163790562
beat percent =  0.3689656942490238
saving result
starting test year 2019
test-train split
training shapes
optimizing trade thresholds
computing unclassified trades
computing classifications
computing classified trades
evaluating performance
t-value, p-value =  1.6035977825463286 0.11231860474912844
expected return =  21.1873203536441
volatility =  0.1459910712809446
years =  0.7917808219178082
sharpe ratio =  1.5931845841587542
beat percent =  0.4304414861864494
saving result
formatting summary
Runnning ETL for ADI
formattin

Unnamed: 0,symbol,equity,test_year,price$,lowT,highT,#trades,in_days,t-val,p-val,exp_ret%,volatility,years,sharpe,beat%
0,ENTG,ENTEGRIS INC,2017,30.315661,0.35625,0.7,8.0,188,-0.715667,0.475543,71.25265,0.189958,0.986301,3.618814,58.534916
1,ENTG,ENTEGRIS INC,2018,27.665078,0.3125,0.725,4.0,187,1.502347,0.13551,-1.254614,0.236548,0.994521,-0.201292,-5.167195
2,ENTG,ENTEGRIS INC,2019,48.4425,0.29375,0.725,6.0,126,1.499221,0.135945,67.910416,0.185745,0.791781,4.429156,44.592937
3,JBSS,SANFILIPPO JOHN B&SON,2017,59.578032,0.2875,0.6375,5.0,141,0.657639,0.511526,7.095994,0.140648,0.986301,0.26268,-0.664858
4,JBSS,SANFILIPPO JOHN B&SON,2018,53.946187,0.45625,0.65,5.0,157,2.179768,0.030543,13.416253,0.207075,0.994521,0.482443,9.462504


In [33]:
print(hiccup)

['BLD', 'CTLT', 'CTRE', 'KNSL', 'TPVG', 'FCAP', 'MGIC']


### Disclaimer: this notebook is intended for educational purposes only and not recommended for real trading.