In [None]:
import sys
sys.path.append('../..')
from backtester import matlab, backtester
from backtester.analysis import *
import pandas as pd
import numpy as np
import glob, os

import itertools

import statsmodels.tsa.stattools as ts

from backtester.exoinfo import EXOInfo
from exobuilder.data.exostorage import EXOStorage
from IPython.core.display import display, HTML

import pandas as pd
import numpy as np
import scipy
import pprint

%pylab inline
figsize(20,10)

In [None]:
def hurst(ts):
    """Returns the Hurst Exponent of the time series vector ts"""
    # Create the range of lag values
    lags = range(2, 100)
    # Calculate the array of the variances of the lagged differences
    tau = [sqrt(std(subtract(ts[lag:], ts[:-lag]))) for lag in lags]
    # Use a linear fit to estimate the Hurst Exponent
    poly = polyfit(log(lags), log(tau), 1)
    # Return the Hurst exponent from the polyfit output
    return poly[0]*2.0

#hurst(ticker)

In [None]:
#
# random time series
#
df = pd.DataFrame()
for i in range(100):
    
    date = pd.date_range(start='2015-01', end='2016-10', freq='D')
    price = np.random.randint(-100, 101, size=len(date))
    
    ser = pd.Series(price, index=date, name='rng_system'+str(i))
    df['rng_system'+str(i)] = ser.cumsum()

In [None]:
# Only Cont Fut Flag
only_cont_fut = False

In [None]:
# Loading global setting for MongoDB etc.
from scripts.settings import *

try:
    from scripts.settings_local import *
except:
    pass

storage = EXOStorage(MONGO_CONNSTR, MONGO_EXO_DB)
pp = pprint.PrettyPrinter(indent=4)

exo_list = storage.exo_list()
pp.pprint(exo_list)


df = pd.DataFrame()

for e in exo_list:
    exo_series, exo_dict = storage.load_series(e)
    if only_cont_fut == True:   
        if 'Fut' in exo_dict['name']:   
            exo_series = exo_series[exo_series.columns[0]]
            df[exo_dict['name']] = exo_series
            
    if only_cont_fut == False:
        exo_series = exo_series[exo_series.columns[0]]
        df[exo_dict['name']] = exo_series
#exo_series.plot()

In [None]:
df.head()

# 2 legs spreads

In [None]:
combinations_l = []
for L in range(0, len(df.columns)+1):
    for comb in itertools.combinations(df.columns, L):
        combinations_l.append(comb)

In [None]:
combination_ser = pd.Series(combinations_l)

In [None]:
combination_ser = combination_ser[combination_ser.str.len() == 2].reset_index(drop=True)

In [None]:
combination_ser.tail()

In [None]:
for i in combination_ser:
    spread_df = df[list(i)]
    df[spread_df.columns[0] + '+' + spread_df.columns[1]] = spread_df[spread_df.columns[0]] + \
                                                                       spread_df[spread_df.columns[1]]

    df[spread_df.columns[0] + '-' + spread_df.columns[1]] = spread_df[spread_df.columns[0]] - \
                                                                       spread_df[spread_df.columns[1]]   

In [None]:
df.head()

In [None]:
temp_data_l = []
temp_name_l = []

for col in df.columns:
    ticker = df[col].dropna()
        
    price_change = ticker - ticker.shift(1)
    max_dd = (ticker - ticker.expanding().max()).min()

    period_end_price = ticker[-1]
    period_start_price = ticker[0]

    median_vola = (ticker - ticker.shift(1)).rolling(30).median()

    
    d = {}
    
    adf = ts.adfuller(ticker, 1)
    adf_teststats = adf[0]
    adf_pvalue = adf[1] 
    
    d = {'hurst': hurst(ticker), 'adf_teststats': adf_teststats, 'adf_pvalue': np.round(adf_pvalue, decimals=4)}
    
    for period in [2,5,10]:
        ema = ticker.ewm(period).mean()

        crossup = CrossUp(ticker, ema)
        crossdn = CrossDown(ticker, ema)

        days_wo_crossings_count = np.zeros_like(ticker.values)
        days_wo_crossings_counter = 0

        for i in range(len(ticker)):
            if (crossup[i] == False) | (crossdn[i] == False):
                days_wo_crossings_counter += 1
                days_wo_crossings_count[i] = days_wo_crossings_counter

            if (crossup[i] == True) | (crossdn[i] == True):
                days_wo_crossings_counter = 0
                days_wo_crossings_count[i] = days_wo_crossings_counter

        d['avg_days_wo_crossings_ema'+str(period)] = pd.Series(days_wo_crossings_count).mean()       

    temp_data_l.append(d)
    temp_name_l.append(ticker.name)

In [None]:
np.round(adf_pvalue, decimals=2)

In [None]:
ts.adfuller(df.ZN_ContFut.dropna())

In [None]:
ticker_stats_df = pd.DataFrame(temp_data_l, index=temp_name_l).dropna()

ticker_stats_df['avg_days_wo_crossings_sum'] = ticker_stats_df.filter(like='crossings', axis=1).sum(1)

#ticker_stats_df.sort_values('avg_days_wo_crossings_sum', ascending=False)
sorted_df = ticker_stats_df.sort_values('avg_days_wo_crossings_sum', ascending=False)#.reset_index()
sorted_df.reset_index()

In [None]:
#
# Select an index of ticker or spread from DF above
#
i = 10

print(sorted_df.index[i])
df[sorted_df.index[i]].plot()

In [None]:
ticker = df[ticker_stats_df.sort_values('avg_days_wo_crossings_sum', ascending=False).index[i]]

ema = ticker.ewm(10).mean()

ticker.plot()
CrossUp(ticker, ema).plot(secondary_y=True)
CrossDown(ticker, ema).plot(secondary_y=True)

crossup = CrossUp(ticker, ema)
crossdn = CrossDown(ticker, ema)

days_wo_crossings_count = np.zeros_like(ticker.values)
days_wo_crossings_counter = 0

for i in range(len(ticker)):
    if (crossup[i] == False) | (crossdn[i] == False):
        days_wo_crossings_counter += 1
        days_wo_crossings_count[i] = days_wo_crossings_counter
                
    if (crossup[i] == True) | (crossdn[i] == True):
        days_wo_crossings_counter = 0
        days_wo_crossings_count[i] = days_wo_crossings_counter
        
        
ticker.plot()
ticker.ewm(10).mean().plot()
#CrossUp(ticker, ema).plot(secondary_y=True)
#CrossDown(ticker, ema).plot(secondary_y=True)

pd.Series(days_wo_crossings_count, index=ticker.index).plot(secondary_y=True)

axhline(pd.Series(days_wo_crossings_count, index=ticker.index).mean())

axhline(pd.Series(days_wo_crossings_count, index=ticker.index).quantile(0.8))