In [149]:
import numpy as np
import talib
import pandas as pd
import pathlib
from talib import abstract

In [150]:
PATH = r'./data.csv'
df = pd.read_csv(PATH, thousands=',')

In [151]:
df

Unnamed: 0,Date,Price,Open,High,Low,Vol.,Change %
0,"Dec 29, 2017",14392.6,14398.5,15109.8,13951.1,118.88K,-0.04%
1,"Dec 28, 2017",14398.7,15416.3,15505.5,13466.1,170.37K,-6.60%
2,"Dec 27, 2017",15416.6,15757.0,16514.6,14534.7,138.71K,-2.16%
3,"Dec 26, 2017",15756.6,13830.2,16094.7,13748.5,143.14K,13.90%
4,"Dec 25, 2017",13833.5,13790.0,14467.4,13010.7,107.48K,0.32%
...,...,...,...,...,...,...,...
2185,"Jan 05, 2012",6.9,5.6,7.2,5.6,182.33K,24.78%
2186,"Jan 04, 2012",5.6,4.9,5.7,4.8,131.17K,14.14%
2187,"Jan 03, 2012",4.9,5.2,5.3,4.7,125.17K,-6.51%
2188,"Jan 02, 2012",5.2,5.3,5.5,4.8,69.15K,-0.95%


### Splitting

Whole data set: 01.01.2012 - 29.12.2017 | 2190 (should be 2168)

- 02.01.2012 - 29.04.2012 | 120
- 30.04.2012 - 19.07.2016 (training) | 1539
- 20.07.2016 - 29.12.2017 (test) | 509

In [152]:
df = df.rename(columns={"Date": "date", 'Price': 'close', 'Open': 'open', "High": "high", "Low": "low", "Vol.": "volume", "Change %": "change"})

In [153]:
df['date'] = pd.to_datetime(df['date'])
df = df.set_index(df['date'])
df = df.sort_index()

In [154]:
df["change"] = df["change"].str.rstrip('%').astype('float')

In [155]:
df.volume = (df.volume.replace(r'[KMB]+$', '', regex=True).astype(float) * df.volume.str.extract(r'[\d\.]+([KMB]+)', expand=False).fillna(1).replace(['K','M', 'B'], [10**3, 10**6, 10**9]).astype(int))

In [156]:
df["close"] = df["close"].astype('float')

In [157]:
df = df.astype({"close": "float64", "low": "float64", "high": "float64", "open": "float64"})

In [158]:
bins = [-100, -11, -9, -7, -5, -3, -1, -0.8, -0.6, -0.4, -0.2, 0, 0.2, 0.4, 0.6, 0.8, 1, 3 , 5, 7, 9, 11, 100]
df['binned'] = pd.cut(df['change'], bins)

In [159]:
df

Unnamed: 0_level_0,date,close,open,high,low,volume,change,binned
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2012-01-01,2012-01-01,5.3,4.7,5.5,4.6,108510.0,11.65,"(11.0, 100.0]"
2012-01-02,2012-01-02,5.2,5.3,5.5,4.8,69150.0,-0.95,"(-1.0, -0.8]"
2012-01-03,2012-01-03,4.9,5.2,5.3,4.7,125170.0,-6.51,"(-7.0, -5.0]"
2012-01-04,2012-01-04,5.6,4.9,5.7,4.8,131170.0,14.14,"(11.0, 100.0]"
2012-01-05,2012-01-05,6.9,5.6,7.2,5.6,182330.0,24.78,"(11.0, 100.0]"
...,...,...,...,...,...,...,...,...
2017-12-25,2017-12-25,13833.5,13790.0,14467.4,13010.7,107480.0,0.32,"(0.2, 0.4]"
2017-12-26,2017-12-26,15756.6,13830.2,16094.7,13748.5,143140.0,13.90,"(11.0, 100.0]"
2017-12-27,2017-12-27,15416.6,15757.0,16514.6,14534.7,138710.0,-2.16,"(-3.0, -1.0]"
2017-12-28,2017-12-28,14398.7,15416.3,15505.5,13466.1,170370.0,-6.60,"(-7.0, -5.0]"


In [160]:
indicators = [
    {
        "name": "BBANDS"
    },
    {
        "name": "DEMA"
    },
    {
        "name": "EMA"
    },
    {
        "name": "HT_TRENDLINE"
    },
    {
        "name": "KAMA"
    },
    {
        "name": "MIDPOINT"
    },
    {
        "name": "MIDPRICE"
    },
    {
        "name": "SAR"
    },
    {
        "name": "SAREXT"
    },
    {
        "name": "SMA",
        "display_name": "SMA_3",
        "params": {
            "timeperiod": 3
        }
    },
    {
        "name": "SMA",
        "display_name": "SMA_5",
        "params": {
            "timeperiod": 5
        }
    },
    {
        "name": "SMA",
        "display_name": "SMA_10",
        "params": {
            "timeperiod": 10
        }
    },
    {
        "name": "SMA",
        "display_name": "SMA_20",
        "params": {
            "timeperiod": 20
        }
    },
    {
        "name": "T3"
    },
    {
        "name": "TEMA"
    },
    {
        "name": "TRIMA"
    },
    {
        "name": "WMA"
    },
    {
        "name": "ADX",
        "display_name": "ADX_14",
        "params": {
            "timeperiod": 14
        }
    },
    {
        "name": "ADX",
        "display_name": "ADX_20",
        "params": {
            "timeperiod": 20
        }
    },
    {
        "name": "ADXR"
    },
    {
        "name": "APO"
    },
    {
        "name": "AROONOSC"
    },
    {
        "name": "BOP"
    },
    {
        "name": "CCI",
        "display_name": "CCI_3",
        "params": {
            "timeperiod": 3
        }
    },
    {
        "name": "CCI",
        "display_name": "CCI_5",
        "params": {
            "timeperiod": 5
        }
    },
    {
        "name": "CCI",
        "display_name": "CCI_10",
        "params": {
            "timeperiod": 10
        }
    },
    {
        "name": "CCI",
        "display_name": "CCI_14",
        "params": {
            "timeperiod": 14
        }
    },
    {
        "name": "CMO"
    },
    {
        "name": "DX"
    },
    {
        "name": "BOP"
    },
    {
        "name": "MACD"
    },
    {
        "name": "MINUS_DI"
    },
    {
        "name": "MINUS_DM"
    },
    {
        "name": "MOM",
        "display_name": "MOM_1",
        "params": {
            "timeperiod": 1
        }
    },
    {
        "name": "MOM",
        "display_name": "MOM_3",
        "params": {
            "timeperiod": 3
        }
    },
    {
        "name": "MOM",
        "display_name": "MOM_5",
        "params": {
            "timeperiod": 5
        }
    },
    {
        "name": "MOM",
        "display_name": "MOM_10",
        "params": {
            "timeperiod": 10
        }
    },
    {
        "name": "PLUS_DI"
    },
    {
        "name": "PLUS_DM"
    },
    {
        "name": "PPO"
    },
    {
        "name": "ROC"
    },
    {
        "name": "ROCR"
    },
    {
        "name": "ROCR100"
    },
    {
        "name": "RSI",
        "display_name": "RSI_5",
        "params": {
            "timeperiod": 5
        }
    },
    {
        "name": "RSI",
        "display_name": "RSI_10",
        "params": {
            "timeperiod": 10
        }
    },
    {
        "name": "RSI",
        "display_name": "RSI_15",
        "params": {
            "timeperiod": 15
        }
    },
    {
        "name": "STOCH"
    },
    {
        "name": "STOCHF"
    },
    {
        "name": "TRIX"
    },
    {
        "name": "ULTOSC"
    },
    {
        "name": "WILLR"
    },
    {
        "name": "ATR"
    },
    {
        "name": "NATR"
    },
    {
        "name": "TRANGE"
    },
    {
        "name": "CDL2CROWS"
    },
    {
        "name": "CDL3BLACKCROWS"
    },
    {
        "name": "CDL3INSIDE"
    },
    {
        "name": "CDL3LINESTRIKE"
    },
    {
        "name": "CDL3OUTSIDE"
    },
    {
        "name": "CDL3STARSINSOUTH"
    },
    {
        "name": "CDL3WHITESOLDIERS"
    },
    {
        "name": "CDLABANDONEDBABY"
    },
    {
        "name": "CDLADVANCEBLOCK"
    },
    {
        "name": "CDLBELTHOLD"
    },
    {
        "name": "CDLBREAKAWAY"
    },
    {
        "name": "CDLCLOSINGMARUBOZU"
    },
    {
        "name": "CDLCONCEALBABYSWALL"
    },
    {
        "name": "CDLCOUNTERATTACK"
    },
    {
        "name": "CDLDARKCLOUDCOVER"
    },
    {
        "name": "CDLDOJI"
    },
    {
        "name": "CDLDOJISTAR"
    },
    {
        "name": "CDLDRAGONFLYDOJI"
    },
    {
        "name": "CDLENGULFING"
    },
    {
        "name": "CDLEVENINGDOJISTAR"
    },
    {
        "name": "CDLEVENINGSTAR"
    },
    {
        "name": "CDLGAPSIDESIDEWHITE"
    },
    {
        "name": "CDLGRAVESTONEDOJI"
    },
    {
        "name": "CDLHAMMER"
    },
    {
        "name": "CDLHANGINGMAN"
    },
    {
        "name": "CDLHARAMI"
    },
    {
        "name": "CDLHARAMICROSS"
    },
    {
        "name": "CDLHIGHWAVE"
    },
    {
        "name": "CDLHIKKAKE"
    },
    {
        "name": "CDLHIKKAKEMOD"
    },
    {
        "name": "CDLHOMINGPIGEON"
    },
    {
        "name": "CDLIDENTICAL3CROWS"
    },
    {
        "name": "CDLINNECK"
    },
    {
        "name": "CDLINVERTEDHAMMER"
    },
    {
        "name": "CDLKICKING"
    },
    {
        "name": "CDLKICKINGBYLENGTH"
    },
    {
        "name": "CDLLADDERBOTTOM"
    },
    {
        "name": "CDLLONGLEGGEDDOJI"
    },
    {
        "name": "CDLLONGLINE"
    },
    {
        "name": "CDLMARUBOZU"
    },
    {
        "name": "CDLMATCHINGLOW"
    },
    {
        "name": "CDLMATHOLD"
    },
    {
        "name": "CDLMORNINGDOJISTAR"
    },
    {
        "name": "CDLMORNINGSTAR"
    },
    {
        "name": "CDLONNECK"
    },
    {
        "name": "CDLPIERCING"
    },
    {
        "name": "CDLRICKSHAWMAN"
    },
    {
        "name": "CDLRISEFALL3METHODS"
    },
    {
        "name": "CDLSEPARATINGLINES"
    },
    {
        "name": "CDLSHOOTINGSTAR"
    },
    {
        "name": "CDLSHORTLINE"
    },
    {
        "name": "CDLSPINNINGTOP"
    },
    {
        "name": "CDLSTALLEDPATTERN"
    },
    {
        "name": "CDLSTICKSANDWICH"
    },
    {
        "name": "CDLTAKURI"
    },
    {
        "name": "CDLTASUKIGAP"
    },
    {
        "name": "CDLTHRUSTING"
    },
    {
        "name": "CDLTRISTAR"
    },
    {
        "name": "CDLUNIQUE3RIVER"
    },
    {
        "name": "CDLUPSIDEGAP2CROWS"
    },
    {
        "name": "CDLXSIDEGAP3METHODS"
    },
    {
        "name": "HT_DCPERIOD"
    },
    {
        "name": "HT_DCPHASE"
    },
    {
        "name": "HT_TRENDMODE"
    }
]

In [168]:
fn = abstract.Function("CDLINNECK")
fn

{'name': 'CDLINNECK', 'group': 'Pattern Recognition', 'display_name': 'In-Neck Pattern', 'function_flags': ['Output is a candlestick'], 'input_names': OrderedDict([('prices', ['open', 'high', 'low', 'close'])]), 'parameters': OrderedDict(), 'output_flags': OrderedDict([('integer', ['Line'])]), 'output_names': ['integer']}

In [169]:
fn = abstract.Function("RSI")
fn

{'name': 'RSI', 'group': 'Momentum Indicators', 'display_name': 'Relative Strength Index', 'function_flags': ['Function has an unstable period'], 'input_names': OrderedDict([('price', 'close')]), 'parameters': OrderedDict([('timeperiod', 14)]), 'output_flags': OrderedDict([('real', ['Line'])]), 'output_names': ['real']}

## Indicators

In [162]:
for indicator in indicators:
    name = indicator.get("name")
    params = indicator.get("params")
    display_name = indicator.get("display_name")
    
    fn = abstract.Function(name)
    
    if params == None:
        res = fn(df)
    else:
        res = fn(df, **params)
    
    name = display_name if display_name != None else name
    
    if isinstance(res, pd.core.series.Series):
        res = res.rename(name)
    elif isinstance(res, pd.core.frame.DataFrame):
        new_columns = {}
        for col in res:
            new_columns[col] = name + "_" + col
        res.rename(columns=new_columns, inplace=True)

    df =  pd.concat([df, res], axis=1)


In [164]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [171]:
df.describe()

Unnamed: 0,close,open,high,low,volume,change,BBANDS_upperband,BBANDS_middleband,BBANDS_lowerband,DEMA,EMA,HT_TRENDLINE,KAMA,MIDPOINT,MIDPRICE,SAR,SAREXT,SMA_3,SMA_5,SMA_10,SMA_20,T3,TEMA,TRIMA,WMA,ADX_14,ADX_20,ADXR,APO,AROONOSC,BOP,CCI_3,CCI_5,CCI_10,CCI_14,CMO,DX,BOP.1,MACD_macd,MACD_macdsignal,MACD_macdhist,MINUS_DI,MINUS_DM,MOM_1,MOM_3,MOM_5,MOM_10,PLUS_DI,PLUS_DM,PPO,ROC,ROCR,ROCR100,RSI_5,RSI_10,RSI_15,STOCH_slowk,STOCH_slowd,STOCHF_fastk,STOCHF_fastd,TRIX,ULTOSC,WILLR,ATR,NATR,TRANGE,CDL2CROWS,CDL3BLACKCROWS,CDL3INSIDE,CDL3LINESTRIKE,CDL3OUTSIDE,CDL3STARSINSOUTH,CDL3WHITESOLDIERS,CDLABANDONEDBABY,CDLADVANCEBLOCK,CDLBELTHOLD,CDLBREAKAWAY,CDLCLOSINGMARUBOZU,CDLCONCEALBABYSWALL,CDLCOUNTERATTACK,CDLDARKCLOUDCOVER,CDLDOJI,CDLDOJISTAR,CDLDRAGONFLYDOJI,CDLENGULFING,CDLEVENINGDOJISTAR,CDLEVENINGSTAR,CDLGAPSIDESIDEWHITE,CDLGRAVESTONEDOJI,CDLHAMMER,CDLHANGINGMAN,CDLHARAMI,CDLHARAMICROSS,CDLHIGHWAVE,CDLHIKKAKE,CDLHIKKAKEMOD,CDLHOMINGPIGEON,CDLIDENTICAL3CROWS,CDLINNECK,CDLINVERTEDHAMMER,CDLKICKING,CDLKICKINGBYLENGTH,CDLLADDERBOTTOM,CDLLONGLEGGEDDOJI,CDLLONGLINE,CDLMARUBOZU,CDLMATCHINGLOW,CDLMATHOLD,CDLMORNINGDOJISTAR,CDLMORNINGSTAR,CDLONNECK,CDLPIERCING,CDLRICKSHAWMAN,CDLRISEFALL3METHODS,CDLSEPARATINGLINES,CDLSHOOTINGSTAR,CDLSHORTLINE,CDLSPINNINGTOP,CDLSTALLEDPATTERN,CDLSTICKSANDWICH,CDLTAKURI,CDLTASUKIGAP,CDLTHRUSTING,CDLTRISTAR,CDLUNIQUE3RIVER,CDLUPSIDEGAP2CROWS,CDLXSIDEGAP3METHODS,HT_DCPERIOD,HT_DCPHASE,HT_TRENDMODE
count,2190.0,2190.0,2190.0,2190.0,2190.0,2190.0,2186.0,2186.0,2186.0,2132.0,2161.0,2127.0,2160.0,2177.0,2177.0,2189.0,2189.0,2188.0,2186.0,2181.0,2171.0,2166.0,2103.0,2161.0,2161.0,2163.0,2151.0,2150.0,2165.0,2176.0,2190.0,2188.0,2186.0,2181.0,2177.0,2176.0,2176.0,2190.0,2157.0,2157.0,2157.0,2176.0,2177.0,2189.0,2187.0,2185.0,2180.0,2176.0,2177.0,2165.0,2180.0,2180.0,2180.0,2185.0,2180.0,2175.0,2182.0,2182.0,2184.0,2184.0,2102.0,2162.0,2177.0,2176.0,2176.0,2189.0,2190.0,2190.0,2190.0,2190.0,2190.0,2190.0,2190.0,2190.0,2190.0,2190.0,2190.0,2190.0,2190.0,2190.0,2190.0,2190.0,2190.0,2190.0,2190.0,2190.0,2190.0,2190.0,2190.0,2190.0,2190.0,2190.0,2190.0,2190.0,2190.0,2190.0,2190.0,2190.0,2190.0,2190.0,2190.0,2190.0,2190.0,2190.0,2190.0,2190.0,2190.0,2190.0,2190.0,2190.0,2190.0,2190.0,2190.0,2190.0,2190.0,2190.0,2190.0,2190.0,2190.0,2190.0,2190.0,2190.0,2190.0,2190.0,2190.0,2190.0,2190.0,2158.0,2127.0,2190.0
mean,913.453881,906.881598,943.633379,869.993744,57980.694064,0.608178,978.248713,901.638747,825.02878,923.14535,829.647675,843.68661,893.964431,875.737575,864.403169,859.995213,122.832991,907.707541,901.638747,887.057437,853.531435,911.899071,953.56112,820.158928,856.278178,30.667763,26.245509,30.62499,53.347378,19.26208,0.078595,14.334261,19.983153,25.792665,30.251472,12.949944,30.714448,0.078595,46.101216,44.207742,1.893474,19.707006,166.002005,6.572545,20.206904,33.762059,67.74422,23.793216,237.657543,2.541106,5.647159,1.056472,105.647159,57.079715,56.669907,56.436042,58.6782,58.689474,58.684604,58.70265,0.359899,55.892303,-39.886721,63.143624,6.189315,73.860576,0.0,0.0,0.045662,0.0,0.0,0.0,0.136986,0.0,-1.73516,2.60274,0.0,3.881279,0.0,0.0,0.0,20.091324,0.0,2.876712,0.0,0.0,0.0,0.0,1.689498,2.420091,-1.826484,0.045662,0.0,1.506849,0.045662,-0.045662,0.0,0.0,0.0,0.0,0.0,0.0,0.0,19.589041,5.6621,1.415525,1.780822,0.0,0.0,0.0,0.0,0.0,14.520548,0.0,-0.273973,0.0,3.150685,2.465753,-0.273973,0.0,2.785388,0.0,0.0,0.0,0.0,0.0,0.0,23.253379,139.881984,0.835616
std,2104.922934,2085.197192,2194.919523,1965.082588,55408.184755,9.219659,2303.025809,2059.696648,1825.517557,2071.414782,1732.400919,1744.877263,2000.55247,1959.651809,1909.587091,1987.46103,2162.136974,2083.337987,2059.696648,2003.530474,1851.05439,2081.035642,2176.163801,1699.925048,1843.007031,12.510125,10.800765,10.85981,287.434738,62.350138,0.499714,79.500812,93.348421,105.885877,109.334422,29.893527,22.280462,0.499714,219.315446,206.209572,55.20742,8.341633,519.807346,148.910497,281.036528,362.70215,533.582877,9.702586,775.944739,8.971563,25.126007,0.25126,25.126007,21.649765,16.877958,14.575256,23.43225,21.945722,27.381255,23.43547,0.752663,10.838901,27.430002,184.868609,5.77714,272.301438,0.0,0.0,2.136869,0.0,0.0,0.0,3.699475,0.0,13.060746,40.072823,0.0,29.746985,0.0,0.0,0.0,40.077482,3.02268,16.718953,0.0,0.0,0.0,0.0,12.890743,15.370757,13.393815,2.136869,0.0,40.015001,48.972986,5.654731,0.0,0.0,0.0,0.0,0.0,0.0,0.0,39.697521,41.167243,20.784213,13.228408,0.0,0.0,0.0,0.0,0.0,35.238835,0.0,12.459814,0.0,47.927634,50.879361,5.228258,0.0,16.459165,0.0,0.0,0.0,0.0,0.0,0.0,5.750955,90.841002,0.370708
min,4.2,4.2,4.4,3.9,400.0,-57.21,4.469666,4.32,-35.663811,4.515636,4.87546,4.659007,4.951621,4.6,4.55,3.9,-19870.6,4.3,4.32,4.42,4.635,4.221865,4.756985,4.730417,4.795699,6.700395,6.732028,8.721737,-399.953846,-100.0,-1.0,-200.0,-166.666667,-323.925172,-347.314578,-77.37623,0.004974,-1.0,-187.825709,-161.543681,-743.722423,2.006072,0.105697,-1967.1,-3858.7,-5400.7,-4573.6,4.184615,0.19529,-51.792188,-80.792997,0.19207,19.207003,1.441934,6.656087,12.37679,2.131628e-14,5.555556,0.0,2.131628e-14,-1.061734,19.278034,-100.0,0.100052,1.295009,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-100.0,-100.0,0.0,-100.0,0.0,0.0,0.0,0.0,-100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-100.0,0.0,0.0,-100.0,-200.0,-100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-100.0,-100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-100.0,0.0,-100.0,-100.0,-100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,13.351935,-44.872591,0.0
25%,115.0,114.35,120.2,108.2,22102.5,-1.06,124.560808,115.57,104.480753,131.888803,116.909278,123.069772,133.922844,117.95,114.3,109.607957,-293.206003,115.25,115.57,116.94,119.9875,121.66341,138.238018,121.074583,119.944946,21.177458,18.229307,23.163817,-4.258333,-35.714286,-0.325131,-70.866962,-57.943759,-53.664333,-53.626732,-6.903192,12.238619,-0.325131,-1.192002,-1.065991,-1.580227,13.564668,11.499641,-2.8,-5.0,-6.2,-9.85,16.724463,17.447785,-1.971149,-3.517227,0.964828,96.482773,41.871719,45.387644,46.787993,39.17546,40.740741,36.423411,39.18263,-0.079353,48.45456,-61.889764,6.496676,3.450894,4.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,18.945852,61.63238,1.0
50%,375.8,375.2,384.15,365.05,43440.0,0.16,392.849605,375.34,348.974139,374.89447,388.775052,379.815173,367.813812,373.35,380.6,378.992047,11.50687,375.033333,375.34,372.93,372.855,377.255591,381.619016,373.465417,374.574624,28.62367,24.679685,29.181362,0.999359,35.714286,0.062446,38.411804,37.740632,42.424242,45.492461,9.417917,26.017595,0.062446,1.574043,1.912036,0.064521,19.64822,39.612382,0.1,0.4,0.6,1.2,22.229097,44.283053,1.533137,2.165002,1.02165,102.165002,56.730006,55.145222,54.524522,61.89141,61.38757,62.436441,61.90476,0.263631,55.821488,-36.666667,15.308349,4.910145,11.9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,22.090524,166.12547,1.0
75%,655.075,654.65,668.925,636.7,75200.0,2.03,684.487793,654.725,620.977702,682.077099,637.447461,660.352059,683.173348,661.5,650.9,664.760952,414.064117,654.683333,654.725,656.92,655.0575,658.851381,686.360101,652.547917,656.515484,38.561027,32.556693,37.130307,16.037821,78.571429,0.5,100.0,97.701149,103.874883,109.430703,32.015568,45.709885,0.5,14.747136,14.461681,2.324049,25.169317,108.697555,5.3,12.15,17.7,27.4,29.216564,129.102265,5.874639,11.027778,1.110278,111.027778,73.456986,68.369934,65.666681,79.73112,78.327633,82.888658,79.75948,0.571034,63.840335,-14.893617,36.503882,7.170891,36.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,26.225334,198.345008,1.0
max,19345.5,19346.6,19870.6,18750.9,572350.0,336.84,20484.508314,18502.4,16949.990271,17782.679503,14310.7791,14813.410104,17313.38892,17094.45,16450.8,19870.6,15342.723561,19127.833333,18502.4,17554.51,16270.985,18671.268168,19147.42253,16033.192917,15638.452903,76.40445,63.668866,71.200492,3640.313462,100.0,3.595126,100.0,166.666667,321.558401,368.46198,91.977831,93.132365,3.595126,2558.563724,2311.091085,618.132001,54.022318,8106.22168,3100.7,5226.4,5937.6,7117.1,62.221704,8206.038364,45.074154,493.369176,5.933692,593.369176,99.433936,96.90767,95.804324,100.0,100.0,100.0,100.0,4.351175,82.973607,-0.0,1922.252835,98.804056,4948.0,0.0,0.0,100.0,0.0,0.0,0.0,100.0,0.0,0.0,100.0,0.0,100.0,0.0,0.0,0.0,100.0,100.0,100.0,0.0,0.0,0.0,0.0,100.0,100.0,0.0,100.0,0.0,100.0,200.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,100.0,100.0,100.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,100.0,0.0,100.0,100.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,45.457506,314.373168,1.0


In [172]:
init_start = "2012-01-02"
init_end = "2012-04-29"

train_start = "2012-04-30"
train_end = "2016-07-19"

test_start = "2016-07-20"
test_end = "2017-12-29"

In [173]:
# create train test partition
init = df[init_start:init_end]
train = df[train_start:train_end]
test  = df[test_start:test_end]

print('Init Dataset:',init.shape)
print('Train Dataset:',train.shape)
print('Test Dataset:',test.shape)

Init Dataset: (119, 132)
Train Dataset: (1542, 132)
Test Dataset: (528, 132)


# Machine learning magic

In [174]:
from sklearn.tree import DecisionTreeClassifier