In [None]:
%pylab inline
from tmqr.settings import *
from tmqrfeed import DataManager
from pymongo import MongoClient
from tmqrindex.index_base import IndexBase
from datetime import datetime
import re
import pandas as pd


import seaborn as sns
#import cufflinks as cf
#cf.go_offline()

import pandas as pd
import numpy as np
import scipy

#import matplotlib
#%matplotlib notebook

figsize(15,7)

In [None]:
from sklearn import (pipeline, preprocessing, ensemble, neighbors, linear_model, neural_network, cluster, metrics, decomposition,
                     naive_bayes, calibration, svm, multioutput,
                     feature_selection, discriminant_analysis, model_selection, multiclass
                    )

In [None]:
client = MongoClient(MONGO_CONNSTR)
db = client[MONGO_DB]

dm = DataManager()

In [None]:
def get_exo_list(exo_filter='*', return_names=True):
    """
    Return EXO list stored in MongoDB V2
    :param exo_filter: '*' - include all, wildcard is allowed (like, 'ES_Bullish*')
    :param return_names: if True returns names list of EXO, otherwize returns MongoDB data collection list
    :return: list of EXO names
    """
    re_val = exo_filter.replace('*','.*')

    data = db['index_data'].find({'name': re.compile(re_val, re.IGNORECASE)})
    if return_names:
        return [exo['name'] for exo in data]
    else:
        return list(data)

In [None]:
def universal_fisher_transform(series, transform_with='arctanh'):
    '"transform_with" options - "tanh", "arctanh"'
    
    # Centering the series
    series = series.expanding().apply(lambda x: preprocessing.StandardScaler().fit_transform(x.reshape(-1, 1)
                                                                                                          ).ravel()[-1])

    # limiting it to -0.999 > x < 0.999
    series = series.expanding().apply(lambda x: preprocessing.MinMaxScaler(feature_range=(-0.999,0.999)
                                                                                        ).fit_transform(x.reshape(-1, 1)).ravel()[-1])

    if transform_with == 'arctanh':
        ft_ser = np.arctanh(series)
        
    elif transform_with == 'tanh':
        ft_ser = np.tanh(series)
        
    return ft_ser

In [None]:
#exo_filter = 'neutralOnly'     # All 
exo_filter = '*'  # ES only
#exo_filter = '*'  # ES Collars (incl Bearish, Bullish, BW and vanilla)

exo_dict = {}
for exo in get_exo_list(exo_filter, return_names=False):
    idx = IndexBase.deserialize(dm, exo, as_readonly=True)
    exo_dict[idx.index_name] = idx

In [None]:
for exo_name, exo in exo_dict.items():
    exo_df = exo.data
    
    if len(exo_df) < 200:
        print("{0:<70} [NODATA DataLen: {1}]".format(exo.index_name, len(exo_df)))
    elif (datetime.now().date() - exo_df.index.date[-1]).days > 4:
        print("{0:<70} [DELAYED: LastDate: {1}]".format(exo.index_name, exo_df.index[-1]))
    else:
        print("{0:<70} [OK]".format(exo.index_name))

In [None]:
exo_df = exo_dict['US.6C_ContFutEOD'].data

In [None]:
vwap = ((exo_df.c * exo_df.v).cumsum() / exo_df.v.cumsum())

#vwap = (exo_df.c * exo_df.v).rolling(250).mean() / exo_df.v.rolling(250).mean()

In [None]:
exo_df.c.plot()
vwap.plot(label='VWAP', legend=True)
exo_df.c.expanding().mean().plot(label='Expanding mean', legend=True);

#### BBands style

In [None]:
exo_df.c.plot()

vwap.plot()
(vwap + 2*vwap.expanding(20).std()).plot()
(vwap - 2*vwap.expanding(20).std()).plot()
(vwap + 4*vwap.expanding(20).std()).plot()
(vwap - 4*vwap.expanding(20).std()).plot()
(vwap + 6*vwap.expanding(20).std()).plot()
(vwap - 6*vwap.expanding(20).std()).plot()
(vwap + 8*vwap.expanding(20).std()).plot()
(vwap - 8*vwap.expanding(20).std()).plot()
(vwap + 10*vwap.expanding(20).std()).plot()
(vwap - 10*vwap.expanding(20).std()).plot()

plt.ylim([exo_df.c.min(), exo_df.c.max()])

#### Top/bottom finder

## CONT FUT VERSION

In [None]:
high_rollingmax = exo_df.h.rolling(int(exo_df.h.size * 0.01)).max()
low_rollingmin = exo_df.l.rolling(int(exo_df.l.size * 0.01)).min()

topfinder_ser = pd.Series(index=exo_df.index)
botfinder_ser = pd.Series(index=exo_df.index)

highest_high = exo_df.h >= high_rollingmax
lowest_low = exo_df.l <= low_rollingmin

highest_high = highest_high[highest_high == True]
lowest_low = lowest_low[lowest_low == True]

for i in range(highest_high.size):
    if highest_high[i] == True:
        if i == 0:
            midas_close = (exo_df.h.ix[:highest_high.index[i]] + exo_df.l.ix[:highest_high.index[i]]) / 2
            midas_volume = exo_df.v.ix[:highest_high.index[i]]

            price_volume_onstart = pd.Series((midas_close[0] * midas_volume[0]), index=midas_close.index)
            volume_onstart = pd.Series(midas_volume[0], index=midas_close.index)

            midas_vwap = (((midas_close * midas_volume).cumsum() - price_volume_onstart) / 
                          (midas_volume.cumsum() - volume_onstart))
            
            topfinder_ser[midas_vwap.index] = midas_vwap
            
        else:
            midas_close = (exo_df.h.ix[highest_high.index[i-1]: highest_high.index[i]] + 
                           exo_df.l.ix[highest_high.index[i-1]: highest_high.index[i]]) / 2
            
            midas_volume = exo_df.v.ix[highest_high.index[i-1]: highest_high.index[i]]

            price_volume_onstart = pd.Series((midas_close[0] * midas_volume[0]), index=midas_close.index)
            volume_onstart = pd.Series(midas_volume[0], index=midas_close.index)

            midas_vwap = (((midas_close * midas_volume).cumsum() - price_volume_onstart) / 
                          (midas_volume.cumsum() - volume_onstart))

            
            topfinder_ser[midas_vwap.index] = midas_vwap
        

for i in range(lowest_low.size):
    if lowest_low[i] == True:
        if i == 0:
            midas_close = (exo_df.h.ix[:lowest_low.index[i]] + exo_df.l.ix[:lowest_low.index[i]]) / 2
            midas_volume = exo_df.v.ix[:lowest_low.index[i]]

            price_volume_onstart = pd.Series((midas_close[0] * midas_volume[0]), index=midas_close.index)
            volume_onstart = pd.Series(midas_volume[0], index=midas_close.index)

            midas_vwap = (((midas_close * midas_volume).cumsum() - price_volume_onstart) / 
                          (midas_volume.cumsum() - volume_onstart))
            
            topfinder_ser[midas_vwap.index] = midas_vwap
            
        else:
            midas_close = (exo_df.h.ix[lowest_low.index[i-1]: lowest_low.index[i]] + 
                           exo_df.l.ix[lowest_low.index[i-1]: lowest_low.index[i]]) / 2

            midas_volume = exo_df.v.ix[lowest_low.index[i-1]: lowest_low.index[i]]

            price_volume_onstart = pd.Series((midas_close[0] * midas_volume[0]), index=midas_close.index)
            volume_onstart = pd.Series(midas_volume[0], index=midas_close.index)

            midas_vwap = (((midas_close * midas_volume).cumsum() - price_volume_onstart) / 
                          (midas_volume.cumsum() - volume_onstart))

            
            botfinder_ser[midas_vwap.index] = midas_vwap
        
'''if lowest_low[i] == True:
    midas_close = exo_df.c.ix[highest_high.index[i]:]#.iloc[:i]
    midas_volume = exo_df.v.ix[highest_high.index[i]:]#.iloc[:i]

    price_volume_onstart = pd.Series((midas_close[0] * midas_volume[0]), index=midas_close.index)
    volume_onstart = pd.Series(midas_volume[0], index=midas_close.index)

    midas_vwap = ((midas_close * midas_volume).cumsum() - price_volume_onstart) / (midas_volume.cumsum() - volume_onstart)

    botfinder_ser[midas_vwap.index] = midas_vwap'''

In [None]:
#exo_df.c.iloc[-500:].plot()
((exo_df.h + exo_df.l)/2).iloc[-500:].plot()

#low_rollingmin.iloc[-500:].plot()
botfinder_ser.ffill().iloc[-500:].plot()
#topfinder_ser.ffill().iloc[-500:].plot();
topfinder_ser.ffill().iloc[-500:].plot();

In [None]:
exo_df.c.iloc[:].plot()
#botfinder_ser.iloc[-500:].plot()
#topfinder_ser.iloc[-500:].plot();

(topfinder_ser - botfinder_ser).ffill().iloc[:].plot(secondary_y=True)

In [None]:
((exo_df.h + exo_df.l)/2).iloc[:].plot()

x = pd.Series(np.mean([exo_df.h, exo_df.l], axis=0), index=exo_df.index)

pd.Series(np.mean([exo_df.h, exo_df.l], axis=0), index=exo_df.index).iloc[:].plot()

#### Yearly VWAP

In [None]:
for y in np.unique(exo_df.index.year):
    exo_df_slice = exo_df[exo_df.index.year == y]
    vwap_y = ((exo_df_slice.c * exo_df_slice.v).cumsum() / exo_df_slice.v.cumsum())

    exo_df['vwap_yearly_{}'.format(y)] = vwap_y

In [None]:
for y in np.unique(exo_df.index.year):
    for q in np.unique(exo_df.index.quarter):
        exo_df_slice = exo_df[(exo_df.index.year == y) & (exo_df.index.quarter == q)]
        vwap = ((exo_df_slice.c * exo_df_slice.v).cumsum() / exo_df_slice.v.cumsum())

        exo_df['vwap_qtr_{}_{}'.format(q, y)] = vwap

In [None]:
for y in np.unique(exo_df.index.year):
    for w in np.unique(exo_df.index.weekofyear):
        exo_df_slice = exo_df[(exo_df.index.year == y) & (exo_df.index.weekofyear == w)]
        vwap = ((exo_df_slice.c * exo_df_slice.v).cumsum() / exo_df_slice.v.cumsum())

        exo_df['vwap_weekly_{}_{}'.format(w, y)] = vwap

In [None]:
for y in np.unique(exo_df.index.year):
    for m in np.unique(exo_df.index.month):
        exo_df_slice = exo_df[(exo_df.index.year == y) & (exo_df.index.month == m)]
        vwap = ((exo_df_slice.c * exo_df_slice.v).cumsum() / exo_df_slice.v.cumsum())

        exo_df['vwap_monthly_{}_{}'.format(m, y)] = vwap

In [None]:
exo_df.c.iloc[-200:].plot(legend=False);
exo_df.filter(regex='vwap_weekly').sum(axis=1).iloc[-200:].plot(legend=False);
exo_df.filter(regex='vwap_month').sum(axis=1).iloc[-200:].plot(legend=False);
exo_df.filter(regex='vwap_q').sum(axis=1).iloc[-200:].plot(legend=False);
exo_df.filter(regex='vwap_y').sum(axis=1).iloc[-200:].plot(legend=False);

In [None]:
#exo_df.filter(regex='vwap_yearly').sum(axis=1).plot()
#exo_df.filter(regex='vwap_qtr').plot(cmap='jet')
#exo_df.filter(regex='vwap_weekly').iloc[-200:].plot(cmap='jet', legend=False)
exo_df.filter(regex='vwap_month').iloc[-800:].plot(c='g', legend=False)
exo_df.c.iloc[-800:].plot()

#(exo_df.c - exo_df.filter(regex='vwap_yearly').sum(axis=1)).plot(secondary_y=True)

In [None]:
#c_vwap_diff = (exo_df.c - exo_df.filter(regex='vwap_yearly').sum(axis=1))
#c_vwap_diff =  c_vwap_diff - c_vwap_diff.expanding().mean()
c_vwap_diff = (exo_df.c - exo_df.filter(regex='vwap_qtr').sum(axis=1))

c_vwap_diff_expmedian = c_vwap_diff.expanding().median()

c_vwap_diff.plot(label='close - vwap', legend=True)
c_vwap_diff.expanding().median().plot(label='median(close - vwap)', legend=True)


#(c_vwap_diff_expmedian + c_vwap_diff.std()*2).plot(label='+2 sigma', legend=True)
#(c_vwap_diff_expmedian - c_vwap_diff.std()*2).plot(label='-2 sigma', legend=True)

(c_vwap_diff_expmedian + c_vwap_diff.std()*0.5).plot(label='+0.5 sigma', legend=True)
(c_vwap_diff_expmedian - c_vwap_diff.std()*0.5).plot(label='-0.5 sigma', legend=True)
#exo_df.filter(regex='vwap_yearly').sum(axis=1).plot(label='VWAP yearly', legend=True, secondary_y=True, alpha=0.25)

exo_df.c.plot(secondary_y=True, label='close', legend=True, alpha=0.5);

In [None]:
print(c_vwap_diff.skew())
print(c_vwap_diff.kurt())
c_vwap_diff.plot.kde()
axvline(c_vwap_diff.mean())

In [None]:
import statsmodels.api as sm


sm.qqplot(c_vwap_diff.dropna(), fit=True, line='45')

# Accumulation strategy


In [None]:
exo_df = exo_dict['US.HO_ContFutEOD'].data

acc_df = pd.DataFrame()
acc_df['c'] = exo_df.c

In [None]:
# Create a new week trigger
# shift -1 = new week is on monday
# shift 1 = new week is on friday
acc_df.loc[acc_df.index.week != acc_df.index.shift(-1, 'B').week, 'new_week'] = True
acc_df['week_id'] = acc_df.new_week.cumsum().bfill()

In [None]:
acc_df.tail(20)

In [None]:
# Quarterly VWAP
for y in np.unique(exo_df.index.year):
    for q in np.unique(exo_df.index.quarter):
        exo_df_slice = exo_df[(exo_df.index.year == y) & (exo_df.index.quarter == q)]
        vwap_qtr = ((exo_df_slice.c * exo_df_slice.v).cumsum() / exo_df_slice.v.cumsum())

        exo_df['vwap_qtr_{}_{}'.format(q, y)] = vwap_qtr
        
c_vwap_diff = (exo_df.c - exo_df.filter(regex='vwap_qtr').sum(axis=1))

acc_df['close_vwap_diff'] = c_vwap_diff

acc_df['close_vwap_diff_+05 sigma'] = acc_df['close_vwap_diff'].expanding().median() + acc_df['close_vwap_diff'].std()*0.5
acc_df['close_vwap_diff_-05 sigma'] = acc_df['close_vwap_diff'].expanding().median() - acc_df['close_vwap_diff'].std()*0.5

# Mark the regimes 
#  over weight = 2, neutral = 0 or underweight = 1
regime_0_condition = ((acc_df['close_vwap_diff'] < acc_df['close_vwap_diff_+05 sigma']) & 
                      (acc_df['close_vwap_diff'] > acc_df['close_vwap_diff_-05 sigma']))

acc_df.loc[regime_0_condition, 'regime_0'] = True
acc_df['regime_0'].fillna(False, inplace=True)


regime_1_condition = (acc_df['close_vwap_diff'] < acc_df['close_vwap_diff_-05 sigma'])

acc_df.loc[regime_1_condition, 'regime_1'] = True
acc_df['regime_1'].fillna(False, inplace=True)


regime_2_condition = (acc_df['close_vwap_diff'] > acc_df['close_vwap_diff_+05 sigma'])

acc_df.loc[regime_2_condition, 'regime_2'] = True
acc_df['regime_2'].fillna(False, inplace=True)

In [None]:
# Trading logic
# 1) set weekly quantity of units to sell 
n_units_per_week = 100

n_units_per_day = n_units_per_week / 10

# 2) set multipliers for defined regimes
reg_0_mul = n_units_per_day * 1
reg_1_mul = n_units_per_day
reg_2_mul = n_units_per_day * 5


acc_df['units'] = np.nan
acc_df['buy_n_units'] = np.nan
acc_df['sell_n_units'] = np.nan

acc_df.loc[acc_df.regime_0 == True, 'units'] = reg_0_mul
acc_df.loc[acc_df.regime_1 == True, 'units'] = reg_1_mul
acc_df.loc[acc_df.regime_2 == True, 'units'] = reg_2_mul

'''for week_id in acc_df.week_id.unique():
    # Get cumulative sum of units for every week id
    units_slice = acc_df.copy().query('week_id == {}'.format(week_id)).units.cumsum()
    # Limit this sum
    units_slice.loc[units_slice > n_units_per_week] = n_units_per_week
    
    acc_df.loc[units_slice.index, 'units'] = units_slice'''

In [None]:
acc_df.units.describe()

In [None]:
print('N units_per_week', n_units_per_week)
print('N  units per day', n_units_per_day)

print('Regime 0 multiplier', reg_0_mul)
print('Regime 1 multiplier', reg_1_mul)
print('Regime 2 multiplier', reg_2_mul)

acc_df.c.plot(label='close', legend=True)
acc_df.units.plot(secondary_y=True, label='units', legend=True);

In [None]:
acc_df.units.iloc[:100].plot()

In [None]:
(acc_df.c.diff() * (acc_df.units*1)).cumsum().plot(label='close * units', legend=True)
(acc_df.c.diff() *1).cumsum().plot(secondary_y=True, label='close', legend=True);

In [None]:
acc_df.units.cumsum().plot()