# dataset_util_test

In [1]:
import sys
import os
from os import sep
from os.path import dirname, realpath
from pathlib import Path
from functools import reduce
import logging
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)

def get_cwd(fname, subdir, crunch_dir=realpath(Path.home()) +sep +'crunch' +sep):
    """
    Convenience function to make a directory string for the current file based on inputs.
    Jupyter Notebook in Anaconda invokes the Python interpreter in Anaconda's subdirectory
    which is why changing sys.argv[0] is necessary. In the future a better way to do this
    should be preferred..
    """
    return crunch_dir +subdir +fname
    
def fix_path(cwd):
    """
    Convenience function to fix argv and python path so that jupyter notebook can run the same as
    any script in crunch.
    """
    sys.argv[0] = cwd
    module_path = os.path.abspath(os.path.join('..'))
    if module_path not in sys.path:
        sys.path.append(module_path)

fix_path(get_cwd('dataset_util_test.ipynb', 'recon' +sep))

import numpy as np
import pandas as pd
from dask import delayed

from ipywidgets import interact, interactive, fixed
from IPython.display import display

pd.set_option("display.max_rows", 100)
pd.set_option("display.max_columns", 50)

from common_util import MUTATE_DIR, DT_HOURLY_FREQ, DT_CAL_DAILY_FREQ, is_valid, is_type, null_fn, identity_fn, pd_abs, pd_is_empty, ser_range_center_clip, pd_common_idx_rows, midx_split
from common_util import load_json, dump_json, ser_shift, df_sk_mw_transform, arr_nonzero, df_rows_in_year, get_variants, remove_dups_list, list_get_dict, is_empty_df, search_df, benchmark
from data.data_api import DataAPI
from recon.common import DATASET_DIR
from recon.dataset_util import *

DEBUG:matplotlib:$HOME=/home/kev
DEBUG:matplotlib:CONFIGDIR=/home/kev/.config/matplotlib
DEBUG:matplotlib:matplotlib data path: /home/kev/miniconda3/lib/python3.7/site-packages/matplotlib/mpl-data
DEBUG:matplotlib:loaded rc file /home/kev/miniconda3/lib/python3.7/site-packages/matplotlib/mpl-data/matplotlibrc
DEBUG:matplotlib:matplotlib version 3.1.0
DEBUG:matplotlib:interactive is False
DEBUG:matplotlib:platform is linux


CRITICAL:root:script location: /home/kev/crunch/recon/dataset_util_test.ipynb
CRITICAL:root:using project dir: /home/kev/crunch/


## Init DataAPI

In [2]:
DataAPI.__init__()
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)

## System Test

In [5]:
list(prep_dataset('xg1.json')['features']['dfs'].keys())

[['dow_jones',
  'dc',
  'dc',
  'trmi3_etf3_news_forecast_dc',
  'trmi3_etf3_news_forecast_dc'],
 ['dow_jones',
  'dc',
  'dc',
  'trmi3_etf3_news_fundamental_dc',
  'trmi3_etf3_news_fundamental_dc'],
 ['dow_jones',
  'dc',
  'dc',
  'trmi3_etf3_news_sentiment_dc',
  'trmi3_etf3_news_sentiment_dc'],
 ['dow_jones',
  'dc',
  'dc',
  'trmi3_etf3_social_forecast_dc',
  'trmi3_etf3_social_forecast_dc'],
 ['dow_jones',
  'dc',
  'dc',
  'trmi3_etf3_social_fundamental_dc',
  'trmi3_etf3_social_fundamental_dc'],
 ['dow_jones',
  'dc',
  'dc',
  'trmi3_etf3_social_sentiment_dc',
  'trmi3_etf3_social_sentiment_dc'],
 ['dow_jones',
  'dc',
  'dc',
  'trmi3_mkt3_news_bank_dc',
  'trmi3_mkt3_news_bank_dc'],
 ['dow_jones',
  'dc',
  'dc',
  'trmi3_mkt3_news_bond_dc',
  'trmi3_mkt3_news_bond_dc'],
 ['dow_jones',
  'dc',
  'dc',
  'trmi3_mkt3_news_stock_dc',
  'trmi3_mkt3_news_stock_dc'],
 ['dow_jones',
  'dc',
  'dc',
  'trmi3_mkt3_social_bank_dc',
  'trmi3_mkt3_social_bank_dc'],
 ['dow_jones',
  '