In [43]:
import sys
sys.path.append('../')

import pandas as pd
import numpy as np
import math
from timeit import default_timer as timer
from datetime import datetime, timedelta
import numba
from ensemble_processing import load_data, load, save
from tsfresh import extract_relevant_features
from tsfresh.utilities.dataframe_functions import make_forecasting_frame
from tsfresh.utilities.dataframe_functions import impute

In [2]:
pd.options.display.max_rows=500

In [44]:
# experiment_symbols = ['WAX', 'IVV', 'WESN', 'IAA', 'XRO', 'MTB', 'MXR', 'RCP', 'ISX', 'IMI']
experiment_symbols = ['WAX']

In [46]:
all_df = pd.read_pickle('../data/ml-20180714-data.pkl.gz', compression='gzip')
all_df.head(100)

Unnamed: 0,symbol,quoteDate,lastTradePriceOnly,adjustedPrice,volume,daysHigh,daysLow,previousClose,change,changeInPercent,...,Future12WeekReturn,Future12WeekRiskAdjustedReturn,Future26WeekDividend,Future26WeekPrice,Future26WeekReturn,Future26WeekRiskAdjustedReturn,Future52WeekDividend,Future52WeekPrice,Future52WeekReturn,Future52WeekRiskAdjustedReturn
0,CG1,2018-05-14,0.43,0.43,27759,0.43,0.43,0.43,,,...,,,,,,,,,,
1,CG1,2018-05-15,0.425,0.425,45633,0.43,0.425,0.43,-0.005,-0.011628,...,,,,,,,,,,
2,CG1,2018-05-17,0.42,0.42,287372,0.43,0.415,0.425,-0.005,-0.011765,...,,,,,,,,,,
3,CG1,2018-05-18,0.465,0.465,42373,0.465,0.42,0.42,0.045,0.107143,...,,,,,,,,,,
4,CG1,2018-05-21,0.51,0.51,229346,0.51,0.47,0.465,0.045,0.096774,...,,,,,,,,,,
5,CG1,2018-05-22,0.5,0.5,2378,0.5,0.5,0.51,-0.01,-0.019608,...,,,,,,,,,,
6,CG1,2018-05-23,0.49,0.49,92622,0.5,0.49,0.5,-0.01,-0.02,...,,,,,,,,,,
7,CG1,2018-05-24,0.485,0.485,2922,0.485,0.485,0.49,-0.005,-0.010204,...,,,,,,,,,,
8,CG1,2018-05-25,0.47,0.47,20404,0.47,0.465,0.485,-0.015,-0.030928,...,,,,,,,,,,
9,CG1,2018-05-28,0.465,0.465,8805,0.465,0.465,0.47,-0.005,-0.010638,...,,,,,,,,,,


In [47]:
train_x_dfs = []

for symbol in experiment_symbols:
    # Filter to model data for this symbol and re-set the pandas indexes
    model_data = all_df.loc[all_df['symbol'] == symbol]

    print('Symbol:', symbol, 'number of records:', len(model_data))
    train_x_dfs.append(model_data)

# Create concatenated dataframe with all data
print('Creating concatenated dataframe')
df_train_x = pd.concat(train_x_dfs)

del train_x_dfs
del all_df

Symbol: WAX number of records: 2294
Creating concatenated dataframe


In [8]:
df_train_x.describe()

Unnamed: 0,lastTradePriceOnly,adjustedPrice,volume,daysHigh,daysLow,previousClose,change,changeInPercent,52WeekHigh,52WeekLow,...,Future12WeekReturn,Future12WeekRiskAdjustedReturn,Future26WeekDividend,Future26WeekPrice,Future26WeekReturn,Future26WeekRiskAdjustedReturn,Future52WeekDividend,Future52WeekPrice,Future52WeekReturn,Future52WeekRiskAdjustedReturn
count,18275.0,18229.0,18275.0,18273.0,18273.0,18275.0,17932.0,17950.0,18275.0,18273.0,...,16543.0,16543.0,15936.0,15936.0,15936.0,15936.0,14882.0,14882.0,14882.0,14882.0
mean,38.88829,37.297523,616469.3,39.083229,38.725643,38.908596,-0.019991,0.288922,40.229813,30.857351,...,1.608522,-347.826843,0.240974,38.880257,1.783825,-693.938049,0.5205,40.226662,7.269506,-1078.868652
std,73.050064,71.151283,3872008.0,73.286949,72.839081,73.070587,0.586617,0.462492,74.174255,59.839161,...,74.247093,1415.953491,0.565421,72.971367,87.106598,2999.984619,1.116301,74.804466,145.029449,4683.137695
min,0.0006,0.00093,2.0,0.0006,0.0006,0.0006,-20.100006,-1.0,0.002,0.0,...,-88.571426,-19250.304688,0.0,0.00093,-93.333336,-36588.335938,0.0,0.00093,-95.002502,-53526.023438
25%,0.011,0.06417,11400.0,0.01157,0.01079,0.011,-0.01,-0.005428,0.231,0.042,...,-14.301637,-52.626282,0.0,0.063,-27.028788,-73.209957,0.0,0.035,-49.997913,-139.240852
50%,0.845,0.839,58100.0,0.85,0.84,0.84,0.0,0.005291,1.1842,0.5734,...,0.0,0.0,0.0,0.7862,0.0,0.0,0.0,0.64167,0.0,0.0
75%,42.689999,39.52,241250.0,42.939999,42.490002,42.700001,0.005,1.0,42.200001,33.900002,...,8.481688,0.106535,0.05,40.349998,13.922661,0.084489,0.117857,41.307501,24.407178,0.094072
max,364.299988,364.299988,182500300.0,364.480011,363.299988,364.299988,13.290009,2.0,364.480011,305.790009,...,4233.333496,1913.516724,4.45821,359.850006,3400.0,1340.731079,6.16798,359.850006,4900.0,4466.275391


In [59]:
df_time_series = pd.DataFrame()
df_time_series['symbol'] = df_train_x['symbol']
df_time_series['quoteDate'] = df_train_x['quoteDate']
df_time_series['Future8WeekReturn'] = df_train_x['Future8WeekReturn']
df_time_series.dropna(axis=0, how='any', inplace=True)
df_time_series.set_index('quoteDate', inplace=True)
df_time_series

Unnamed: 0_level_0,symbol,Future8WeekReturn
quoteDate,Unnamed: 1_level_1,Unnamed: 2_level_1
2007-07-02,WAX,-5.080055
2007-07-03,WAX,-5.537912
2007-07-04,WAX,-5.080055
2007-07-05,WAX,-3.380115
2007-07-06,WAX,-3.814983
2007-07-09,WAX,-4.664954
2007-07-10,WAX,-4.625984
2007-07-11,WAX,-2.990431
2007-07-12,WAX,-2.549911
2007-07-13,WAX,-3.814983


In [63]:
df_shift, y = make_forecasting_frame(df_time_series["Future8WeekReturn"], kind="return", 
                                     max_timeshift=40, rolling_direction=1)



In [64]:
df_shift

Unnamed: 0,time,value,id,kind
80145,2007-07-02,-5.080055,2007-07-03,return
78071,2007-07-02,-5.080055,2007-07-04,return
80146,2007-07-03,-5.537912,2007-07-04,return
75998,2007-07-02,-5.080055,2007-07-05,return
78072,2007-07-03,-5.537912,2007-07-05,return
80147,2007-07-04,-5.080055,2007-07-05,return
73926,2007-07-02,-5.080055,2007-07-06,return
75999,2007-07-03,-5.537912,2007-07-06,return
78073,2007-07-04,-5.080055,2007-07-06,return
80148,2007-07-05,-3.380115,2007-07-06,return


In [65]:
y

quoteDate
2007-07-03    -5.537912
2007-07-04    -5.080055
2007-07-05    -3.380115
2007-07-06    -3.814983
2007-07-09    -4.664954
2007-07-10    -4.625984
2007-07-11    -2.990431
2007-07-12    -2.549911
2007-07-13    -3.814983
2007-07-16    -3.814983
2007-07-17    -2.990431
2007-07-18    -4.267567
2007-07-19    -4.625984
2007-07-20    -4.684399
2007-07-23    -3.830885
2007-07-24    -4.684399
2007-07-25    -4.230085
2007-07-26    -2.990431
2007-07-27    -3.409091
2007-07-30    -1.292407
2007-07-31    -1.286950
2007-08-01    -2.160743
2007-08-02    -2.573899
2007-08-03    -2.151619
2007-08-06    -0.851927
2007-08-07    -0.851927
2007-08-08    -0.851927
2007-08-09    -1.709230
2007-08-10     0.000000
2007-08-13     0.872211
2007-08-14     1.303728
2007-08-15     3.095975
2007-08-16     9.719222
2007-08-17     7.718405
2007-08-20    15.566773
2007-08-21    15.052163
2007-08-22    14.585666
2007-08-23    12.043541
2007-08-24    13.088044
2007-08-27    10.680133
2007-08-28    11.680185
2007-0

In [66]:
X = extract_relevant_features(df_shift, y, column_id="id", column_sort="time", column_value="value", 
                              # impute_function=impute, 
                              ml_task='regression', show_warnings=True)

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  similarity_ratio = A / B
  return sum_product / ((len(x) - lag) * np.var(x))
  complex_result = (np.issubdtype(in1.dtype, complex) or
  np.issubdtype(in2.dtype, complex))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  slope = r_num / ssxm
  t = r * np.sqrt(df / ((1.0 - r + TINY)*(1.0 + r + TINY)))
  sterrest = np.sqrt((1 - r**2) * ssym / ssxm / df)
  se = -1 * np.log(similarity_ratio)
  .format(nperseg, input_length))
  similarity_ratio = A / B
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  return np.polyfit(result.x_mean, result.y_mean, deg=m)
  return np.polyfit(result.x_mean, result.y_mean, deg=m)
  return sum_product / ((len(x) - lag) * np.var(x))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_lengt

  .format(nperseg, input_length))
  return np.dot(wresid, wresid) / self.df_resid
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))


  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nper

  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nper

  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nper

  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nper

  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nper

  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nper

  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nper

  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nper

  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nper

  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nper

  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nper

  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nper

  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nper

  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nper

  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nper

  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nper

  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nper

  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nper

  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nper

  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nper

  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nper

  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nper

  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nper

  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nper

  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
  .format(nperseg, input_length))
Feature Extraction: 100%|██████████| 20/20 [01:13<00:00,  3.70s/it]
 'value__agg_linear_trend__f_agg_"max"__chunk_len_50__attr_"rvalue"'
 'value__agg_linear_trend__f_agg_"max"__chunk_len_50__attr_"slope"'
 'value__agg_linear_trend__f_agg_"max"__chunk_len_50__attr_"stderr"'
 'value__agg_linear_trend__f_agg_"mean"__chunk_len_50__attr_"intercept"'
 'value__agg_linear_trend__f_agg_"mean"__chunk_len_50__attr_"rvalue"'
 'value__agg_linear_trend__f_agg_"mean"__chunk_len_50__attr_"slope"'
 'value__agg_linear_trend__f_agg_"mean"__chunk_len_50__attr_"stderr"'
 'value__agg_linear_trend__f_agg_"min"__chunk_len_50__attr_"intercept"'
 'value__agg_linear_trend__f_agg_"min"__chunk_len_50__attr_"rvalue"'
 'value__agg_linear_trend__f_agg_"min"__chunk_len_50__attr_"slope"'
 'value__agg_linear_trend__f_agg_"min"__chunk_len_50__attr_"stderr"'
 'value__agg_linear_trend__f_a















In [67]:
X

variable,value__ar_coefficient__k_10__coeff_0,"value__change_quantiles__f_agg_""mean""__isabs_False__qh_1.0__ql_0.0",value__mean_change,value__time_reversal_asymmetry_statistic__lag_3,value__time_reversal_asymmetry_statistic__lag_1,"value__agg_linear_trend__f_agg_""min""__chunk_len_5__attr_""slope""",value__time_reversal_asymmetry_statistic__lag_2,"value__agg_linear_trend__f_agg_""mean""__chunk_len_5__attr_""slope""","value__agg_linear_trend__f_agg_""mean""__chunk_len_10__attr_""slope""","value__linear_trend__attr_""slope""",...,"value__augmented_dickey_fuller__attr_""pvalue""","value__augmented_dickey_fuller__attr_""teststat""",value__longest_strike_above_mean,"value__cwt_coefficients__widths_(2, 5, 10, 20)__coeff_9__w_10","value__change_quantiles__f_agg_""var""__isabs_True__qh_0.4__ql_0.2",value__has_duplicate_max,value__autocorrelation__lag_7,"value__fft_coefficient__coeff_11__attr_""abs""","value__change_quantiles__f_agg_""mean""__isabs_True__qh_0.2__ql_0.0","value__agg_linear_trend__f_agg_""mean""__chunk_len_5__attr_""intercept"""
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2007-07-03,0.678590,0.000000,0.009044,0.000000,0.000000,0.039158,0.000000,0.062590,0.111510,0.014690,...,3.760395e-01,-1.808926,1.0,8.968632,0.000000e+00,0.0,0.243004,9.076981,0.000000,3.751022
2007-07-04,0.678590,-0.457857,-0.457857,0.000000,0.000000,0.039158,0.000000,0.062590,0.111510,-0.457857,...,3.760395e-01,-1.808926,1.0,8.968632,0.000000e+00,0.0,0.243004,9.076981,0.000000,3.751022
2007-07-05,0.678590,0.000000,0.000000,0.000000,0.000000,0.039158,0.000000,0.062590,0.111510,0.000000,...,3.760395e-01,-1.808926,1.0,8.968632,0.000000e+00,1.0,0.243004,9.076981,0.000000,3.751022
2007-07-06,0.678590,0.566647,0.566647,0.000000,48.878498,0.039158,0.000000,0.062590,0.111510,0.555768,...,3.760395e-01,-1.808926,1.0,8.968632,0.000000e+00,0.0,0.243004,9.076981,0.000000,3.751022
2007-07-09,0.678590,0.316268,0.316268,0.000000,45.264324,0.039158,57.165169,0.062590,0.111510,0.468794,...,3.760395e-01,-1.808926,2.0,8.968632,0.000000e+00,0.0,0.243004,9.076981,0.000000,3.751022
2007-07-10,0.678590,0.083020,0.083020,0.000000,24.089735,0.872958,43.635376,-0.086330,0.111510,0.255550,...,3.760395e-01,-1.808926,2.0,8.968632,0.000000e+00,0.0,0.243004,9.076981,0.457857,-4.578624
2007-07-11,0.678590,0.075679,0.075679,14.896950,12.884875,0.872958,34.694756,-0.066845,0.111510,0.156186,...,3.760395e-01,-1.808926,2.0,8.968632,0.000000e+00,0.0,0.243004,9.076981,0.457857,-4.578624
2007-07-12,0.678590,0.298518,0.298518,48.890240,20.620901,0.872958,28.916252,0.484834,0.111510,0.238065,...,3.760395e-01,-1.808926,2.0,8.968632,0.000000e+00,0.0,-1.346206,9.076981,0.457857,-4.578624
2007-07-13,0.678590,0.316268,0.316268,62.612339,24.039415,0.872958,30.582733,0.870804,0.111510,0.289772,...,7.869246e-01,-0.903176,2.0,8.968632,0.000000e+00,0.0,-1.687007,9.076981,0.457857,-4.578624
2007-07-16,0.678590,0.140564,0.140564,43.340668,19.245911,0.872958,29.077965,0.849371,0.111510,0.231286,...,9.585321e-01,0.000000,3.0,-5.494963,0.000000e+00,0.0,-1.357159,9.076981,0.457857,-4.578624


In [24]:
df_time_series[df_time_series.isnull().any(axis=1)]

Unnamed: 0,symbol,quoteDate,adjustedPrice


In [37]:
y = pd.DataFrame(data={
    'adjustedPrice': df_time_series['adjustedPrice'].values
    },
                 index=df_time_series[['symbol', 'quoteDate']])
y['adjustedPrice']

(WAX, 2007-07-02 00:00:00)    0.5059
(WAX, 2007-07-03 00:00:00)    0.5038
(WAX, 2007-07-04 00:00:00)    0.5059
(WAX, 2007-07-05 00:00:00)    0.5059
(WAX, 2007-07-06 00:00:00)    0.5059
(WAX, 2007-07-09 00:00:00)    0.5059
(WAX, 2007-07-10 00:00:00)    0.5080
(WAX, 2007-07-11 00:00:00)    0.5016
(WAX, 2007-07-12 00:00:00)    0.5059
(WAX, 2007-07-13 00:00:00)    0.5059
(WAX, 2007-07-16 00:00:00)    0.5059
(WAX, 2007-07-17 00:00:00)    0.5016
(WAX, 2007-07-18 00:00:00)    0.5038
(WAX, 2007-07-19 00:00:00)    0.5080
(WAX, 2007-07-20 00:00:00)    0.5038
(WAX, 2007-07-23 00:00:00)    0.5038
(WAX, 2007-07-24 00:00:00)    0.5038
(WAX, 2007-07-25 00:00:00)    0.5059
(WAX, 2007-07-26 00:00:00)    0.5016
(WAX, 2007-07-27 00:00:00)    0.5016
(WAX, 2007-07-30 00:00:00)    0.4952
(WAX, 2007-07-31 00:00:00)    0.4973
(WAX, 2007-08-01 00:00:00)    0.4952
(WAX, 2007-08-02 00:00:00)    0.4973
(WAX, 2007-08-03 00:00:00)    0.4973
(WAX, 2007-08-06 00:00:00)    0.4930
(WAX, 2007-08-07 00:00:00)    0.4930
(

In [None]:
X = extract_relevant_features(df_time_series, y['adjustedPrice'], 
                              column_id='symbol', column_sort='quoteDate', show_warnings=True)