In [1]:
import sys
import os
from os import sep
from os.path import dirname, realpath
from pathlib import Path
import logging

def get_cwd(fname, subdir, crunch_dir=realpath(Path.home()) +sep +'crunch' +sep):
    """
    Convenience function to make a directory string for the current file based on inputs.
    Jupyter Notebook in Anaconda invokes the Python interpreter in Anaconda's subdirectory
    which is why changing sys.argv[0] is necessary. In the future a better way to do this
    should be preferred..
    """
    return crunch_dir +subdir +fname
    
def fix_path(cwd):
    """
    Convenience function to fix argv and python path so that jupyter notebook can run the same as
    any script in crunch.
    """
    sys.argv[0] = cwd
    module_path = os.path.abspath(os.path.join('..'))
    if module_path not in sys.path:
        sys.path.append(module_path)

fix_path(get_cwd('test.ipynb', 'recon' +sep))

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from numba import jit, vectorize, float64

from ipywidgets import interact, interactive, fixed
from IPython.display import display

pd.set_option("display.max_rows", 100)
pd.set_option('display.max_columns', 50)

from common_util import DT_HOURLY_FREQ, DT_CAL_DAILY_FREQ, DT_BIZ_DAILY_FREQ, get_custom_biz_freq, query_df, search_df, chained_filter, benchmark
from data.data_api import DataAPI
from data.access_util import col_subsetters as cs
from ignore.common import dum

In [2]:
	logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)

	date_range = {
		'id': ('lt', 2018)
	}

	search_terms = {
		'stage': 'raw',
		'raw_cat': 'us_equity_index'
	}
	price_dfs, price_recs = {}, {}
	for rec, df in DataAPI.generate(search_terms):
		price_recs[rec.basis] = rec
		price_dfs[rec.basis] = df.loc[search_df(df, date_range)]
	logging.info('pricing loaded')

INFO:root:pricing loaded


In [3]:
	search_terms = {
		'stage': 'mutate',
		'mutate_type': 'thresh',
		'raw_cat': 'us_equity_index'
	}
	thresh_dfs, thresh_recs = {}, {}
	for rec, df in DataAPI.generate(search_terms):
		thresh_recs[rec.basis] = rec
		thresh_dfs[rec.basis] = df.loc[search_df(df, date_range)]
	logging.info('threshes loaded')

                           pba_oc_fth-af_spread_savg  \
id                                                     
1998-01-01 01:00:00+00:00                        NaN   
1998-01-01 02:00:00+00:00                        NaN   
1998-01-01 03:00:00+00:00                        NaN   
1998-01-01 04:00:00+00:00                        NaN   
1998-01-01 05:00:00+00:00                        NaN   
1998-01-01 06:00:00+00:00                        NaN   
1998-01-01 07:00:00+00:00                        NaN   
1998-01-01 08:00:00+00:00                        NaN   
1998-01-01 09:00:00+00:00                        NaN   
1998-01-01 10:00:00+00:00                        NaN   
1998-01-01 11:00:00+00:00                        NaN   
1998-01-01 12:00:00+00:00                        NaN   
1998-01-01 13:00:00+00:00                        NaN   
1998-01-01 14:00:00+00:00                        NaN   
1998-01-01 15:00:00+00:00                        NaN   
1998-01-01 16:00:00+00:00                       

                           pba_oc_fth-af_spread_savg  \
id                                                     
1998-01-01 01:00:00+00:00                        NaN   
1998-01-01 02:00:00+00:00                        NaN   
1998-01-01 03:00:00+00:00                        NaN   
1998-01-01 04:00:00+00:00                        NaN   
1998-01-01 05:00:00+00:00                        NaN   
1998-01-01 06:00:00+00:00                        NaN   
1998-01-01 07:00:00+00:00                        NaN   
1998-01-01 08:00:00+00:00                        NaN   
1998-01-01 09:00:00+00:00                        NaN   
1998-01-01 10:00:00+00:00                        NaN   
1998-01-01 11:00:00+00:00                        NaN   
1998-01-01 12:00:00+00:00                        NaN   
1998-01-01 13:00:00+00:00                        NaN   
1998-01-01 14:00:00+00:00                        NaN   
1998-01-01 15:00:00+00:00                        NaN   
1998-01-01 16:00:00+00:00                       

                           pba_oc_fth-af_spread_savg  \
id                                                     
1998-01-01 01:00:00+00:00                        NaN   
1998-01-01 02:00:00+00:00                        NaN   
1998-01-01 03:00:00+00:00                        NaN   
1998-01-01 04:00:00+00:00                        NaN   
1998-01-01 05:00:00+00:00                        NaN   
1998-01-01 06:00:00+00:00                        NaN   
1998-01-01 07:00:00+00:00                        NaN   
1998-01-01 08:00:00+00:00                        NaN   
1998-01-01 09:00:00+00:00                        NaN   
1998-01-01 10:00:00+00:00                        NaN   
1998-01-01 11:00:00+00:00                        NaN   
1998-01-01 12:00:00+00:00                        NaN   
1998-01-01 13:00:00+00:00                        NaN   
1998-01-01 14:00:00+00:00                        NaN   
1998-01-01 15:00:00+00:00                        NaN   
1998-01-01 16:00:00+00:00                       

                           pba_oc_fth-af_spread_savg  \
id                                                     
1998-01-01 01:00:00+00:00                        NaN   
1998-01-01 02:00:00+00:00                        NaN   
1998-01-01 03:00:00+00:00                        NaN   
1998-01-01 04:00:00+00:00                        NaN   
1998-01-01 05:00:00+00:00                        NaN   
1998-01-01 06:00:00+00:00                        NaN   
1998-01-01 07:00:00+00:00                        NaN   
1998-01-01 08:00:00+00:00                        NaN   
1998-01-01 09:00:00+00:00                        NaN   
1998-01-01 10:00:00+00:00                        NaN   
1998-01-01 11:00:00+00:00                        NaN   
1998-01-01 12:00:00+00:00                        NaN   
1998-01-01 13:00:00+00:00                        NaN   
1998-01-01 14:00:00+00:00                        NaN   
1998-01-01 15:00:00+00:00                        NaN   
1998-01-01 16:00:00+00:00                       

In [4]:
print(thresh_dfs.keys())

dict_keys(['dow_jones_raw_0', 'sp_500_raw_1', 'nasdaq_100_raw_2', 'russell_2000_raw_3'])


In [6]:
thresh = thresh_dfs['sp_500_raw_1'].dropna()

In [8]:
custom_freq = get_custom_biz_freq(thresh)

In [17]:
shifted = thresh.shift(freq=custom_freq).fillna(method='ffill')



In [18]:
shifted

Unnamed: 0_level_0,pba_oc_fth-af_spread_savg,pba_oc_fth-af_spread_std,pba_oc_fth-af_spread_eavg,pba_oc_fth-af_spread_estd,pba_oc_fth-af_spread_smed,pba_oc_fth-af_spread_smax,pba_oc_fth-af_spread_smin,pba_oc_fth-af_spread_ssec,pba_oc_fth-af_spread_sfin,pba_oc_fth-af_spread_swhl,pba_oc_fth-of_spread_prev,pba_oc_fth-of_spread_xavg,pba_oc_fth-of_spread_xmed,pba_oc_fth-of_spread_xstd,pba_oc_fth-of_spread_xmax,pba_oc_fth-of_spread_xmin,pba_oc_fth-af_ansr_savg,pba_oc_fth-af_ansr_std,pba_oc_fth-af_ansr_eavg,pba_oc_fth-af_ansr_estd,pba_oc_fth-af_ansr_smed,pba_oc_fth-af_ansr_smax,pba_oc_fth-af_ansr_smin,pba_oc_fth-af_ansr_ssec,pba_oc_fth-af_ansr_sfin,...,vol_ac_fth-af_ansr_ssec,vol_ac_fth-af_ansr_sfin,vol_ac_fth-af_ansr_swhl,vol_ac_fth-of_ansr_prev,vol_ac_fth-of_ansr_xavg,vol_ac_fth-of_ansr_xmed,vol_ac_fth-of_ansr_xstd,vol_ac_fth-of_ansr_xmax,vol_ac_fth-of_ansr_xmin,vol_ac_fth-af_log_savg,vol_ac_fth-af_log_std,vol_ac_fth-af_log_eavg,vol_ac_fth-af_log_estd,vol_ac_fth-af_log_smed,vol_ac_fth-af_log_smax,vol_ac_fth-af_log_smin,vol_ac_fth-af_log_ssec,vol_ac_fth-af_log_sfin,vol_ac_fth-af_log_swhl,vol_ac_fth-of_log_prev,vol_ac_fth-of_log_xavg,vol_ac_fth-of_log_xmed,vol_ac_fth-of_log_xstd,vol_ac_fth-of_log_xmax,vol_ac_fth-of_log_xmin
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1
1998-01-05 16:00:00+00:00,2.03000,1.860706,0.757927,0.379092,1.695,6.34,0.39,6.34,1.47,4.57,0.66,0.740000,0.660,0.396106,1.17,0.39,0.002095,0.001925,0.000781,0.000390,0.001744,0.006556,0.000402,0.006556,0.001510,...,0.002022,0.020431,0.064580,0.009075,0.010865,0.010865,0.002532,0.012656,0.009075,0.690776,0.005451,0.693143,0.007690,0.691016,0.697674,0.682879,0.692136,0.682879,0.660324,0.697674,0.692237,0.692237,0.007690,0.697674,0.686799
1998-01-05 17:00:00+00:00,2.03000,1.860706,1.235308,0.813250,1.695,6.34,0.39,6.34,1.47,4.57,2.12,1.085000,0.915,0.762037,2.12,0.39,0.002095,0.001925,0.001274,0.000840,0.001744,0.006556,0.000402,0.006556,0.001510,...,0.002022,0.020431,0.064580,0.006490,0.009407,0.009075,0.003096,0.012656,0.006490,0.690776,0.005451,0.691684,0.005357,0.691016,0.697674,0.682879,0.692136,0.682879,0.660324,0.689897,0.691457,0.689897,0.005603,0.697674,0.686799
1998-01-05 18:00:00+00:00,2.03000,1.860706,1.448000,0.747679,1.695,6.34,0.39,6.34,1.47,4.57,1.92,1.252000,1.170,0.758268,2.12,0.39,0.002095,0.001925,0.001493,0.000771,0.001744,0.006556,0.000402,0.006556,0.001510,...,0.002022,0.020431,0.064580,0.004247,0.008117,0.007783,0.003612,0.012656,0.004247,0.690776,0.005451,0.693068,0.004478,0.691016,0.697674,0.682879,0.692136,0.682879,0.660324,0.695269,0.692410,0.692583,0.004956,0.697674,0.686799
1998-01-05 19:00:00+00:00,2.03000,1.860706,1.654062,0.719455,1.695,6.34,0.39,6.34,1.47,4.57,2.17,1.405000,1.545,0.774874,2.17,0.39,0.002095,0.001925,0.001706,0.000742,0.001744,0.006556,0.000402,0.006556,0.001510,...,0.002022,0.020431,0.064580,0.002022,0.006898,0.006490,0.004149,0.012656,0.002022,0.690776,0.005451,0.692741,0.003545,0.691016,0.697674,0.682879,0.692136,0.682879,0.660324,0.692136,0.692355,0.692136,0.004294,0.697674,0.686799
1998-01-05 20:00:00+00:00,2.03000,1.860706,2.911971,2.368423,1.695,6.34,0.39,6.34,1.47,4.57,6.34,2.110000,1.920,1.994877,6.34,0.39,0.002095,0.001925,0.003008,0.002451,0.001744,0.006556,0.000402,0.006556,0.001510,...,0.002022,0.020431,0.064580,0.020431,0.009154,0.007783,0.006656,0.020431,0.002022,0.690776,0.005451,0.689492,0.005969,0.691016,0.697674,0.682879,0.692136,0.682879,0.660324,0.682879,0.690776,0.691016,0.005451,0.697674,0.682879
1998-01-06 16:00:00+00:00,2.21500,1.563092,2.516425,1.276866,2.225,4.99,0.18,1.82,1.13,2.03,3.17,2.323333,3.050,1.363867,3.17,0.75,0.002265,0.001595,0.002574,0.001305,0.002273,0.005095,0.000185,0.001868,0.001158,...,0.010619,0.010135,0.016997,1.120592,0.562117,0.562117,0.789803,1.120592,0.003642,0.767168,0.181749,0.951898,0.315863,0.694971,1.138023,0.687823,0.687823,0.688067,0.684612,1.138023,0.914674,0.914674,0.315863,1.138023,0.691324
1998-01-06 17:00:00+00:00,2.21500,1.563092,2.556231,0.974757,2.225,4.99,0.18,1.82,1.13,2.03,2.63,2.400000,2.840,1.124100,3.17,0.75,0.002265,0.001595,0.002611,0.000996,0.002273,0.005095,0.000185,0.001868,0.001158,...,0.010619,0.010135,0.016997,0.012045,0.378760,0.012045,0.642460,1.120592,0.003642,0.767168,0.181749,0.838278,0.257265,0.694971,1.138023,0.687823,0.687823,0.688067,0.684612,0.699151,0.842833,0.699151,0.255672,1.138023,0.691324
1998-01-06 18:00:00+00:00,2.21500,1.563092,3.312254,1.500894,2.225,4.99,0.18,1.82,1.13,2.03,4.99,2.918000,3.050,1.513050,4.99,0.75,0.002265,0.001595,0.003382,0.001532,0.002273,0.005095,0.000185,0.001868,0.001158,...,0.010619,0.010135,0.016997,0.010970,0.286812,0.011507,0.555866,1.120592,0.003642,0.767168,0.181749,0.784333,0.207099,0.694971,1.138023,0.687823,0.687823,0.688067,0.684612,0.698617,0.806779,0.698884,0.220858,1.138023,0.691324
1998-01-06 19:00:00+00:00,2.21500,1.563092,2.418295,2.010446,2.225,4.99,0.18,1.82,1.13,2.03,0.18,2.461667,2.840,1.755248,4.99,0.18,0.002265,0.001595,0.002470,0.002052,0.002273,0.005095,0.000185,0.001868,0.001158,...,0.010619,0.010135,0.016997,0.010619,0.231574,0.010970,0.496988,1.120592,0.003642,0.767168,0.181749,0.750461,0.170661,0.694971,1.138023,0.687823,0.687823,0.688067,0.684612,0.687823,0.782988,0.698617,0.198529,1.138023,0.687823
1998-01-06 20:00:00+00:00,2.21500,1.563092,2.257687,1.724452,2.225,4.99,0.18,1.82,1.13,2.03,1.82,2.370000,2.630,1.620566,4.99,0.18,0.002265,0.001595,0.002308,0.001760,0.002273,0.005095,0.000185,0.001868,0.001158,...,0.010619,0.010135,0.016997,0.010135,0.194667,0.010795,0.453619,1.120592,0.003642,0.767168,0.181749,0.729904,0.141430,0.694971,1.138023,0.687823,0.687823,0.688067,0.684612,0.688067,0.767168,0.694971,0.181749,1.138023,0.687823


In [14]:
shifted = thresh.groupby(pd.Grouper(freq='B'))