In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

## Read files

In [2]:
import os

In [3]:
files = os.listdir('data')

In [4]:
files

['BTC-USD.csv', '^GSPC.csv', '^IXIC.csv']

In [5]:
dataframes = dict()

In [6]:
for f in files:
    df = pd.read_csv('data/' + f)
    df['Date'] = pd.to_datetime(df['Date'])
    df.set_index('Date', inplace=True)
    df.columns = df.columns.str.lower()
    dataframes[f.split('.')[0]] = df[df.index > '1990'].copy()

## Append Target Columns

In [7]:
import talib

In [8]:
for k in dataframes.keys():
    for f in talib.get_function_groups()['Pattern Recognition']:
        dataframes[k][f]  = getattr(talib, f)(dataframes[k]['open'], dataframes[k]['high'], 
                                              dataframes[k]['low'], dataframes[k]['close'])

In [9]:
for k in dataframes.keys():
    dataframes[k]['bullishPin'] = (
        (dataframes[k]['close'] > ((dataframes[k]['high'] + dataframes[k]['low']) /2)).astype('int') *
        (dataframes[k]['open'] > ((dataframes[k]['high'] + dataframes[k]['low']) /2)).astype('int') *
        (np.abs(dataframes[k]['open'] - dataframes[k]['close']) /
         np.abs(dataframes[k][['open', 'close']].min(axis=1) - dataframes[k]['low'] + 0.00001))
    )
    
    dataframes[k]['bearishPin'] = (
        (dataframes[k]['close'] < ((dataframes[k]['high'] + dataframes[k]['low']) / 2)).astype('int') *
        (dataframes[k]['open'] < ((dataframes[k]['high'] + dataframes[k]['low']) / 2)).astype('int') *
        (np.abs(dataframes[k]['open'] - dataframes[k]['close']) /
         np.abs(dataframes[k][['open', 'close']].min(axis=1) - dataframes[k]['high'] + 0.00001))
    )
    
    dataframes[k]['threshold'] = ((np.random.randn(dataframes[k].shape[0]) / 20) + 0.33)
    
    dataframes[k]['bullishPin'] = ((dataframes[k]['bullishPin'] > 0) & (dataframes[k]['bullishPin'] < dataframes[k]['threshold'])).astype('int')
    
    dataframes[k]['bearishPin'] = ((dataframes[k]['bearishPin'] > 0) & (dataframes[k]['bearishPin'] < dataframes[k]['threshold'])).astype('int')
    
    dataframes[k].drop(['threshold'], axis=1, inplace=True)

In [10]:
dataframes['BTC-USD']

Unnamed: 0_level_0,open,high,low,close,adj close,volume,CDL2CROWS,CDL3BLACKCROWS,CDL3INSIDE,CDL3LINESTRIKE,...,CDLSTICKSANDWICH,CDLTAKURI,CDLTASUKIGAP,CDLTHRUSTING,CDLTRISTAR,CDLUNIQUE3RIVER,CDLUPSIDEGAP2CROWS,CDLXSIDEGAP3METHODS,bullishPin,bearishPin
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2014-09-17,465.864014,468.174011,452.421997,457.334015,457.334015,21056800,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2014-09-18,456.859985,456.859985,413.104004,424.440002,424.440002,34483200,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2014-09-19,424.102997,427.834991,384.532013,394.795990,394.795990,37919700,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2014-09-20,394.673004,423.295990,389.882996,408.903992,408.903992,36863600,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2014-09-21,408.084991,412.425995,393.181000,398.821014,398.821014,26580100,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-11-05,14133.733398,15706.404297,14102.088867,15579.848633,15579.848633,40856321438,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2020-11-06,15579.729492,15903.437500,15226.839844,15565.880859,15565.880859,39837841971,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
2020-11-07,15565.880859,15737.095703,14423.203125,14833.753906,14833.753906,35024953705,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2020-11-08,14833.753906,15637.320313,14744.110352,15479.567383,15479.567383,26632075028,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


#### Find columns with mostly the same values

In [11]:
to_drop = list()
for f in talib.get_function_groups()['Pattern Recognition']:
    total = 0
    size = 0
    for k in dataframes.keys():
        total += dataframes[k][f].sum()
        size += dataframes[k].shape[0]
    if total < size * 0.01:
        to_drop.append(f)

In [12]:
for k in dataframes.keys():
    dataframes[k].drop(to_drop, axis=1, inplace=True)

## Building Model

#### Prepare training and test sets

In [13]:
inputs = dict()

In [14]:
for k in dataframes.keys():
    # We need to normalize the data. Otherwise, data from multiple sources will vary too much.
    # We do this by dividing the whole row with its mean. We can also divide by open or close, but
    # one column would always be 1 in that case.
    
    divisor = dataframes[k][['open', 'high', 'low', 'close']].mean(axis=1)
    inputs[k] = dataframes[k][['open', 'high', 'low', 'close']].div(divisor, axis=0)

In [15]:
train = pd.concat([inputs[k][['open', 'high', 'low', 'close']].iloc[:-500] for k in inputs.keys()], axis=0)

In [16]:
test = pd.concat([inputs[k][['open', 'high', 'low', 'close']].iloc[-500:] for k in inputs.keys()], axis=0)

In [17]:
train

Unnamed: 0_level_0,open,high,low,close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2014-09-17,1.010664,1.015675,0.981502,0.992159
2014-09-18,1.043498,1.043498,0.943556,0.969448
2014-09-19,1.039936,1.049087,0.942905,0.968073
2014-09-20,0.976457,1.047272,0.964606,1.011665
2014-09-21,1.012296,1.023064,0.975325,0.989315
...,...,...,...,...
2018-11-06,0.995946,1.006084,0.995242,1.002728
2018-11-07,0.991964,1.008862,0.990603,1.008571
2018-11-08,1.001161,1.004181,0.995261,0.999397
2018-11-09,1.005886,1.006671,0.989855,0.997588


In [18]:
test

Unnamed: 0_level_0,open,high,low,close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-06-29,1.027629,1.027641,0.953679,0.991051
2019-06-30,1.043769,1.065323,0.944660,0.946247
2019-07-01,1.011995,1.050378,0.945670,0.991956
2019-07-02,1.007476,1.038256,0.926526,1.027742
2019-07-03,0.949676,1.050623,0.949676,1.050025
...,...,...,...,...
2020-11-02,1.003914,1.009442,0.987548,0.999096
2020-11-03,0.994071,1.009854,0.991025,1.005050
2020-11-04,0.993123,1.012175,0.988822,1.005881
2020-11-05,0.996829,1.005935,0.994114,1.003122


In [19]:
candle_models = dict()

In [20]:
y_train = pd.concat([dataframes[k].drop(['open', 'high', 'low', 'close', 'adj close', 'volume'], axis=1).iloc[:-500] for k in dataframes.keys()], axis=0)

In [21]:
y_test = pd.concat([dataframes[k].drop(['open', 'high', 'low', 'close', 'adj close', 'volume'], axis=1).iloc[-500:] for k in dataframes.keys()], axis=0)

In [22]:
from catboost import CatBoostClassifier

In [23]:
for c in y_train.columns:
    print('Training model for pattern %s' % (c))
    model = CatBoostClassifier(n_estimators=100, random_state=1, auto_class_weights='SqrtBalanced')
    model.fit(train, y_train[c])
    candle_models[c] = model

Training model for pattern CDL3INSIDE
Learning rate set to 0.5
0:	learn: 0.6240574	total: 60.7ms	remaining: 6.01s
1:	learn: 0.5029075	total: 66.7ms	remaining: 3.27s
2:	learn: 0.4454711	total: 73.2ms	remaining: 2.37s
3:	learn: 0.4164473	total: 78.9ms	remaining: 1.89s
4:	learn: 0.3971233	total: 84.8ms	remaining: 1.61s
5:	learn: 0.3889474	total: 90.6ms	remaining: 1.42s
6:	learn: 0.3802485	total: 96.6ms	remaining: 1.28s
7:	learn: 0.3703471	total: 103ms	remaining: 1.18s
8:	learn: 0.3662617	total: 109ms	remaining: 1.1s
9:	learn: 0.3565288	total: 115ms	remaining: 1.04s
10:	learn: 0.3518440	total: 122ms	remaining: 983ms
11:	learn: 0.3483193	total: 128ms	remaining: 935ms
12:	learn: 0.3458583	total: 134ms	remaining: 894ms
13:	learn: 0.3426361	total: 140ms	remaining: 860ms
14:	learn: 0.3372704	total: 147ms	remaining: 832ms
15:	learn: 0.3350357	total: 153ms	remaining: 806ms
16:	learn: 0.3299605	total: 161ms	remaining: 784ms
17:	learn: 0.3271419	total: 167ms	remaining: 761ms
18:	learn: 0.3256534	to

60:	learn: 0.0107569	total: 352ms	remaining: 225ms
61:	learn: 0.0105618	total: 358ms	remaining: 219ms
62:	learn: 0.0103973	total: 364ms	remaining: 214ms
63:	learn: 0.0102871	total: 369ms	remaining: 208ms
64:	learn: 0.0101803	total: 374ms	remaining: 202ms
65:	learn: 0.0098770	total: 380ms	remaining: 196ms
66:	learn: 0.0096453	total: 385ms	remaining: 190ms
67:	learn: 0.0094704	total: 391ms	remaining: 184ms
68:	learn: 0.0093282	total: 397ms	remaining: 178ms
69:	learn: 0.0091095	total: 403ms	remaining: 173ms
70:	learn: 0.0088780	total: 409ms	remaining: 167ms
71:	learn: 0.0086494	total: 415ms	remaining: 161ms
72:	learn: 0.0084793	total: 421ms	remaining: 156ms
73:	learn: 0.0083602	total: 426ms	remaining: 150ms
74:	learn: 0.0082876	total: 432ms	remaining: 144ms
75:	learn: 0.0082394	total: 437ms	remaining: 138ms
76:	learn: 0.0081741	total: 442ms	remaining: 132ms
77:	learn: 0.0081316	total: 448ms	remaining: 126ms
78:	learn: 0.0079910	total: 453ms	remaining: 120ms
79:	learn: 0.0077480	total: 459

35:	learn: 0.0936454	total: 179ms	remaining: 317ms
36:	learn: 0.0910383	total: 183ms	remaining: 312ms
37:	learn: 0.0891676	total: 190ms	remaining: 310ms
38:	learn: 0.0867508	total: 194ms	remaining: 304ms
39:	learn: 0.0857464	total: 198ms	remaining: 297ms
40:	learn: 0.0850166	total: 202ms	remaining: 291ms
41:	learn: 0.0840327	total: 206ms	remaining: 285ms
42:	learn: 0.0837083	total: 210ms	remaining: 279ms
43:	learn: 0.0812412	total: 215ms	remaining: 274ms
44:	learn: 0.0795832	total: 220ms	remaining: 269ms
45:	learn: 0.0769267	total: 225ms	remaining: 264ms
46:	learn: 0.0754685	total: 230ms	remaining: 259ms
47:	learn: 0.0749688	total: 234ms	remaining: 253ms
48:	learn: 0.0734805	total: 238ms	remaining: 248ms
49:	learn: 0.0715807	total: 242ms	remaining: 242ms
50:	learn: 0.0698948	total: 247ms	remaining: 237ms
51:	learn: 0.0689599	total: 252ms	remaining: 233ms
52:	learn: 0.0683502	total: 257ms	remaining: 228ms
53:	learn: 0.0680344	total: 261ms	remaining: 222ms
54:	learn: 0.0674139	total: 265

98:	learn: 0.1666644	total: 564ms	remaining: 5.7ms
99:	learn: 0.1663722	total: 570ms	remaining: 0us
Training model for pattern CDLCLOSINGMARUBOZU
Learning rate set to 0.5
0:	learn: 0.7122139	total: 6.6ms	remaining: 654ms
1:	learn: 0.5650662	total: 12.6ms	remaining: 617ms
2:	learn: 0.4802787	total: 18.4ms	remaining: 596ms
3:	learn: 0.4202449	total: 24.2ms	remaining: 580ms
4:	learn: 0.3912142	total: 30.3ms	remaining: 576ms
5:	learn: 0.3649447	total: 36.2ms	remaining: 568ms
6:	learn: 0.3512824	total: 41.8ms	remaining: 556ms
7:	learn: 0.3355670	total: 47.8ms	remaining: 550ms
8:	learn: 0.3196311	total: 53.3ms	remaining: 539ms
9:	learn: 0.3080081	total: 59.3ms	remaining: 534ms
10:	learn: 0.2998191	total: 66.6ms	remaining: 538ms
11:	learn: 0.2950630	total: 72ms	remaining: 528ms
12:	learn: 0.2926323	total: 77.7ms	remaining: 520ms
13:	learn: 0.2859179	total: 83.4ms	remaining: 512ms
14:	learn: 0.2797726	total: 89.5ms	remaining: 507ms
15:	learn: 0.2754387	total: 95.2ms	remaining: 500ms
16:	learn:

69:	learn: 0.0004190	total: 410ms	remaining: 176ms
70:	learn: 0.0004125	total: 416ms	remaining: 170ms
71:	learn: 0.0004009	total: 422ms	remaining: 164ms
72:	learn: 0.0003902	total: 428ms	remaining: 158ms
73:	learn: 0.0003859	total: 434ms	remaining: 152ms
74:	learn: 0.0003758	total: 440ms	remaining: 147ms
75:	learn: 0.0003672	total: 448ms	remaining: 141ms
76:	learn: 0.0003524	total: 454ms	remaining: 135ms
77:	learn: 0.0003433	total: 459ms	remaining: 130ms
78:	learn: 0.0003351	total: 466ms	remaining: 124ms
79:	learn: 0.0003257	total: 472ms	remaining: 118ms
80:	learn: 0.0003134	total: 477ms	remaining: 112ms
81:	learn: 0.0003078	total: 483ms	remaining: 106ms
82:	learn: 0.0003020	total: 489ms	remaining: 100ms
83:	learn: 0.0002974	total: 495ms	remaining: 94.3ms
84:	learn: 0.0002917	total: 501ms	remaining: 88.4ms
85:	learn: 0.0002862	total: 508ms	remaining: 82.7ms
86:	learn: 0.0002786	total: 514ms	remaining: 76.8ms
87:	learn: 0.0002729	total: 520ms	remaining: 70.8ms
88:	learn: 0.0002665	total

36:	learn: 0.0967586	total: 178ms	remaining: 304ms
37:	learn: 0.0960465	total: 183ms	remaining: 298ms
38:	learn: 0.0947630	total: 187ms	remaining: 292ms
39:	learn: 0.0943594	total: 192ms	remaining: 287ms
40:	learn: 0.0939594	total: 196ms	remaining: 282ms
41:	learn: 0.0931628	total: 201ms	remaining: 277ms
42:	learn: 0.0925269	total: 205ms	remaining: 272ms
43:	learn: 0.0916979	total: 210ms	remaining: 267ms
44:	learn: 0.0907182	total: 215ms	remaining: 263ms
45:	learn: 0.0892982	total: 219ms	remaining: 258ms
46:	learn: 0.0887891	total: 224ms	remaining: 252ms
47:	learn: 0.0884627	total: 228ms	remaining: 247ms
48:	learn: 0.0873981	total: 232ms	remaining: 242ms
49:	learn: 0.0870908	total: 237ms	remaining: 237ms
50:	learn: 0.0867229	total: 241ms	remaining: 232ms
51:	learn: 0.0865025	total: 245ms	remaining: 226ms
52:	learn: 0.0860342	total: 250ms	remaining: 221ms
53:	learn: 0.0855573	total: 255ms	remaining: 217ms
54:	learn: 0.0849218	total: 259ms	remaining: 212ms
55:	learn: 0.0846406	total: 263

95:	learn: 0.0041065	total: 574ms	remaining: 23.9ms
96:	learn: 0.0040399	total: 579ms	remaining: 17.9ms
97:	learn: 0.0039955	total: 586ms	remaining: 12ms
98:	learn: 0.0039509	total: 592ms	remaining: 5.98ms
99:	learn: 0.0039102	total: 597ms	remaining: 0us
Training model for pattern CDLGRAVESTONEDOJI
Learning rate set to 0.280216
0:	learn: 0.4891599	total: 4.54ms	remaining: 450ms
1:	learn: 0.3652628	total: 9.05ms	remaining: 443ms
2:	learn: 0.2888613	total: 13.6ms	remaining: 439ms
3:	learn: 0.2335627	total: 17.8ms	remaining: 427ms
4:	learn: 0.1942412	total: 22ms	remaining: 417ms
5:	learn: 0.1666650	total: 26.9ms	remaining: 421ms
6:	learn: 0.1466780	total: 32.1ms	remaining: 427ms
7:	learn: 0.1312205	total: 36.4ms	remaining: 419ms
8:	learn: 0.1192919	total: 40.7ms	remaining: 412ms
9:	learn: 0.1122615	total: 45.2ms	remaining: 407ms
10:	learn: 0.1055275	total: 50ms	remaining: 404ms
11:	learn: 0.1002470	total: 54.5ms	remaining: 400ms
12:	learn: 0.0971239	total: 58.9ms	remaining: 394ms
13:	lear

72:	learn: 0.1362854	total: 329ms	remaining: 122ms
73:	learn: 0.1357699	total: 334ms	remaining: 117ms
74:	learn: 0.1351085	total: 339ms	remaining: 113ms
75:	learn: 0.1348993	total: 343ms	remaining: 108ms
76:	learn: 0.1344531	total: 348ms	remaining: 104ms
77:	learn: 0.1335365	total: 352ms	remaining: 99.4ms
78:	learn: 0.1331507	total: 357ms	remaining: 94.8ms
79:	learn: 0.1329869	total: 361ms	remaining: 90.3ms
80:	learn: 0.1327192	total: 366ms	remaining: 85.8ms
81:	learn: 0.1325678	total: 371ms	remaining: 81.5ms
82:	learn: 0.1319000	total: 376ms	remaining: 77.1ms
83:	learn: 0.1311324	total: 381ms	remaining: 72.6ms
84:	learn: 0.1307014	total: 386ms	remaining: 68.1ms
85:	learn: 0.1301884	total: 390ms	remaining: 63.5ms
86:	learn: 0.1296511	total: 394ms	remaining: 58.9ms
87:	learn: 0.1293139	total: 399ms	remaining: 54.4ms
88:	learn: 0.1286200	total: 404ms	remaining: 49.9ms
89:	learn: 0.1281798	total: 408ms	remaining: 45.3ms
90:	learn: 0.1279819	total: 412ms	remaining: 40.7ms
91:	learn: 0.1275

61:	learn: 0.0909198	total: 280ms	remaining: 172ms
62:	learn: 0.0900594	total: 284ms	remaining: 167ms
63:	learn: 0.0894315	total: 289ms	remaining: 162ms
64:	learn: 0.0890452	total: 293ms	remaining: 158ms
65:	learn: 0.0882035	total: 298ms	remaining: 154ms
66:	learn: 0.0870217	total: 302ms	remaining: 149ms
67:	learn: 0.0864608	total: 308ms	remaining: 145ms
68:	learn: 0.0858934	total: 312ms	remaining: 140ms
69:	learn: 0.0850867	total: 316ms	remaining: 136ms
70:	learn: 0.0841199	total: 321ms	remaining: 131ms
71:	learn: 0.0832970	total: 325ms	remaining: 126ms
72:	learn: 0.0823269	total: 330ms	remaining: 122ms
73:	learn: 0.0815458	total: 335ms	remaining: 118ms
74:	learn: 0.0798608	total: 340ms	remaining: 113ms
75:	learn: 0.0793968	total: 345ms	remaining: 109ms
76:	learn: 0.0789370	total: 349ms	remaining: 104ms
77:	learn: 0.0782496	total: 353ms	remaining: 99.7ms
78:	learn: 0.0772138	total: 357ms	remaining: 95ms
79:	learn: 0.0767067	total: 362ms	remaining: 90.4ms
80:	learn: 0.0763011	total: 36

41:	learn: 0.0509989	total: 191ms	remaining: 263ms
42:	learn: 0.0492363	total: 195ms	remaining: 259ms
43:	learn: 0.0469639	total: 200ms	remaining: 254ms
44:	learn: 0.0458823	total: 204ms	remaining: 250ms
45:	learn: 0.0432498	total: 209ms	remaining: 245ms
46:	learn: 0.0417002	total: 214ms	remaining: 241ms
47:	learn: 0.0409290	total: 218ms	remaining: 236ms
48:	learn: 0.0404340	total: 223ms	remaining: 232ms
49:	learn: 0.0398995	total: 227ms	remaining: 227ms
50:	learn: 0.0383445	total: 232ms	remaining: 223ms
51:	learn: 0.0371239	total: 237ms	remaining: 218ms
52:	learn: 0.0354602	total: 241ms	remaining: 214ms
53:	learn: 0.0349348	total: 245ms	remaining: 209ms
54:	learn: 0.0340526	total: 249ms	remaining: 204ms
55:	learn: 0.0335121	total: 254ms	remaining: 199ms
56:	learn: 0.0331841	total: 258ms	remaining: 194ms
57:	learn: 0.0321670	total: 262ms	remaining: 190ms
58:	learn: 0.0309956	total: 267ms	remaining: 185ms
59:	learn: 0.0304892	total: 272ms	remaining: 181ms
60:	learn: 0.0301281	total: 277

10:	learn: 0.3086960	total: 64.3ms	remaining: 521ms
11:	learn: 0.3022552	total: 69.8ms	remaining: 512ms
12:	learn: 0.2977659	total: 76.1ms	remaining: 509ms
13:	learn: 0.2921035	total: 81.5ms	remaining: 501ms
14:	learn: 0.2870703	total: 87.8ms	remaining: 497ms
15:	learn: 0.2825770	total: 93.4ms	remaining: 490ms
16:	learn: 0.2776780	total: 99.1ms	remaining: 484ms
17:	learn: 0.2735441	total: 104ms	remaining: 476ms
18:	learn: 0.2713216	total: 111ms	remaining: 471ms
19:	learn: 0.2681208	total: 116ms	remaining: 464ms
20:	learn: 0.2632635	total: 122ms	remaining: 459ms
21:	learn: 0.2614193	total: 127ms	remaining: 451ms
22:	learn: 0.2574307	total: 133ms	remaining: 444ms
23:	learn: 0.2556313	total: 138ms	remaining: 436ms
24:	learn: 0.2540511	total: 143ms	remaining: 429ms
25:	learn: 0.2526066	total: 148ms	remaining: 422ms
26:	learn: 0.2497741	total: 154ms	remaining: 416ms
27:	learn: 0.2484604	total: 159ms	remaining: 408ms
28:	learn: 0.2456557	total: 165ms	remaining: 403ms
29:	learn: 0.2434632	tot

91:	learn: 0.1223699	total: 531ms	remaining: 46.2ms
92:	learn: 0.1218732	total: 536ms	remaining: 40.4ms
93:	learn: 0.1218155	total: 541ms	remaining: 34.6ms
94:	learn: 0.1217590	total: 546ms	remaining: 28.8ms
95:	learn: 0.1209631	total: 552ms	remaining: 23ms
96:	learn: 0.1208636	total: 558ms	remaining: 17.3ms
97:	learn: 0.1201703	total: 563ms	remaining: 11.5ms
98:	learn: 0.1196357	total: 569ms	remaining: 5.74ms
99:	learn: 0.1187339	total: 574ms	remaining: 0us
Training model for pattern CDLMATCHINGLOW
Learning rate set to 0.280216
0:	learn: 0.5244036	total: 4.91ms	remaining: 486ms
1:	learn: 0.4273132	total: 9.25ms	remaining: 453ms
2:	learn: 0.3663015	total: 13.5ms	remaining: 435ms
3:	learn: 0.3235804	total: 17.9ms	remaining: 429ms
4:	learn: 0.2935207	total: 22.2ms	remaining: 422ms
5:	learn: 0.2749017	total: 26.7ms	remaining: 418ms
6:	learn: 0.2580459	total: 32ms	remaining: 426ms
7:	learn: 0.2438068	total: 36.6ms	remaining: 421ms
8:	learn: 0.2335748	total: 41.1ms	remaining: 416ms
9:	learn

65:	learn: 0.0274603	total: 308ms	remaining: 158ms
66:	learn: 0.0268911	total: 312ms	remaining: 154ms
67:	learn: 0.0264099	total: 317ms	remaining: 149ms
68:	learn: 0.0259432	total: 322ms	remaining: 145ms
69:	learn: 0.0250061	total: 326ms	remaining: 140ms
70:	learn: 0.0247886	total: 331ms	remaining: 135ms
71:	learn: 0.0244726	total: 335ms	remaining: 130ms
72:	learn: 0.0240699	total: 339ms	remaining: 125ms
73:	learn: 0.0236989	total: 344ms	remaining: 121ms
74:	learn: 0.0231340	total: 348ms	remaining: 116ms
75:	learn: 0.0229394	total: 352ms	remaining: 111ms
76:	learn: 0.0223631	total: 356ms	remaining: 106ms
77:	learn: 0.0219634	total: 361ms	remaining: 102ms
78:	learn: 0.0213898	total: 366ms	remaining: 97.2ms
79:	learn: 0.0210801	total: 370ms	remaining: 92.4ms
80:	learn: 0.0209434	total: 374ms	remaining: 87.7ms
81:	learn: 0.0207223	total: 379ms	remaining: 83.1ms
82:	learn: 0.0203193	total: 383ms	remaining: 78.5ms
83:	learn: 0.0202006	total: 388ms	remaining: 73.8ms
84:	learn: 0.0199579	tota

48:	learn: 0.0672309	total: 218ms	remaining: 227ms
49:	learn: 0.0665406	total: 223ms	remaining: 223ms
50:	learn: 0.0659838	total: 228ms	remaining: 219ms
51:	learn: 0.0650157	total: 232ms	remaining: 214ms
52:	learn: 0.0629740	total: 237ms	remaining: 210ms
53:	learn: 0.0625044	total: 242ms	remaining: 206ms
54:	learn: 0.0618526	total: 246ms	remaining: 201ms
55:	learn: 0.0607979	total: 251ms	remaining: 197ms
56:	learn: 0.0583807	total: 255ms	remaining: 192ms
57:	learn: 0.0573462	total: 259ms	remaining: 188ms
58:	learn: 0.0569286	total: 263ms	remaining: 183ms
59:	learn: 0.0558157	total: 268ms	remaining: 178ms
60:	learn: 0.0553159	total: 272ms	remaining: 174ms
61:	learn: 0.0546320	total: 276ms	remaining: 169ms
62:	learn: 0.0535623	total: 280ms	remaining: 165ms
63:	learn: 0.0528678	total: 285ms	remaining: 160ms
64:	learn: 0.0511331	total: 289ms	remaining: 156ms
65:	learn: 0.0505929	total: 293ms	remaining: 151ms
66:	learn: 0.0494185	total: 298ms	remaining: 147ms
67:	learn: 0.0486987	total: 302

22:	learn: 0.0047130	total: 130ms	remaining: 436ms
23:	learn: 0.0043712	total: 136ms	remaining: 429ms
24:	learn: 0.0040336	total: 142ms	remaining: 426ms
25:	learn: 0.0038178	total: 147ms	remaining: 419ms
26:	learn: 0.0034601	total: 153ms	remaining: 413ms
27:	learn: 0.0031691	total: 158ms	remaining: 407ms
28:	learn: 0.0029682	total: 164ms	remaining: 401ms
29:	learn: 0.0028161	total: 170ms	remaining: 396ms
30:	learn: 0.0024431	total: 176ms	remaining: 391ms
31:	learn: 0.0022195	total: 182ms	remaining: 387ms
32:	learn: 0.0020737	total: 188ms	remaining: 381ms
33:	learn: 0.0018306	total: 193ms	remaining: 375ms
34:	learn: 0.0017139	total: 199ms	remaining: 369ms
35:	learn: 0.0016930	total: 204ms	remaining: 363ms
36:	learn: 0.0015800	total: 210ms	remaining: 357ms
37:	learn: 0.0014703	total: 215ms	remaining: 351ms
38:	learn: 0.0014107	total: 221ms	remaining: 346ms
39:	learn: 0.0013453	total: 226ms	remaining: 339ms
40:	learn: 0.0013245	total: 231ms	remaining: 333ms
41:	learn: 0.0011729	total: 237

0:	learn: 0.7677389	total: 6.92ms	remaining: 685ms
1:	learn: 0.6535137	total: 12.8ms	remaining: 626ms
2:	learn: 0.5950558	total: 18.3ms	remaining: 592ms
3:	learn: 0.5621286	total: 24.1ms	remaining: 578ms
4:	learn: 0.5389533	total: 29.8ms	remaining: 565ms
5:	learn: 0.5240375	total: 35.6ms	remaining: 558ms
6:	learn: 0.5131475	total: 41.7ms	remaining: 555ms
7:	learn: 0.5058876	total: 47.3ms	remaining: 544ms
8:	learn: 0.5016967	total: 53.2ms	remaining: 538ms
9:	learn: 0.4983070	total: 58.7ms	remaining: 528ms
10:	learn: 0.4964338	total: 64.2ms	remaining: 519ms
11:	learn: 0.4930848	total: 70ms	remaining: 513ms
12:	learn: 0.4914272	total: 75.5ms	remaining: 505ms
13:	learn: 0.4901029	total: 81ms	remaining: 498ms
14:	learn: 0.4879201	total: 86.9ms	remaining: 493ms
15:	learn: 0.4861674	total: 92.9ms	remaining: 488ms
16:	learn: 0.4848499	total: 98.5ms	remaining: 481ms
17:	learn: 0.4826903	total: 104ms	remaining: 475ms
18:	learn: 0.4802729	total: 110ms	remaining: 469ms
19:	learn: 0.4779144	total: 

64:	learn: 0.0067728	total: 291ms	remaining: 157ms
65:	learn: 0.0065108	total: 297ms	remaining: 153ms
66:	learn: 0.0063337	total: 301ms	remaining: 148ms
67:	learn: 0.0061217	total: 306ms	remaining: 144ms
68:	learn: 0.0060026	total: 311ms	remaining: 140ms
69:	learn: 0.0057980	total: 316ms	remaining: 135ms
70:	learn: 0.0057201	total: 320ms	remaining: 131ms
71:	learn: 0.0055976	total: 324ms	remaining: 126ms
72:	learn: 0.0054320	total: 328ms	remaining: 121ms
73:	learn: 0.0052906	total: 332ms	remaining: 117ms
74:	learn: 0.0051708	total: 337ms	remaining: 112ms
75:	learn: 0.0050711	total: 342ms	remaining: 108ms
76:	learn: 0.0049095	total: 346ms	remaining: 103ms
77:	learn: 0.0048389	total: 350ms	remaining: 98.8ms
78:	learn: 0.0047035	total: 355ms	remaining: 94.3ms
79:	learn: 0.0045707	total: 359ms	remaining: 89.8ms
80:	learn: 0.0045135	total: 364ms	remaining: 85.4ms
81:	learn: 0.0044243	total: 368ms	remaining: 80.8ms
82:	learn: 0.0043195	total: 373ms	remaining: 76.4ms
83:	learn: 0.0042706	tota

34:	learn: 0.1131302	total: 201ms	remaining: 373ms
35:	learn: 0.1103913	total: 206ms	remaining: 367ms
36:	learn: 0.1088274	total: 212ms	remaining: 361ms
37:	learn: 0.1053199	total: 218ms	remaining: 356ms
38:	learn: 0.1035674	total: 224ms	remaining: 350ms
39:	learn: 0.1007837	total: 229ms	remaining: 344ms
40:	learn: 0.0971224	total: 235ms	remaining: 337ms
41:	learn: 0.0946821	total: 240ms	remaining: 332ms
42:	learn: 0.0921002	total: 246ms	remaining: 326ms
43:	learn: 0.0900557	total: 252ms	remaining: 321ms
44:	learn: 0.0870499	total: 258ms	remaining: 315ms
45:	learn: 0.0851673	total: 264ms	remaining: 309ms
46:	learn: 0.0840159	total: 269ms	remaining: 303ms
47:	learn: 0.0808043	total: 275ms	remaining: 298ms
48:	learn: 0.0790506	total: 280ms	remaining: 292ms
49:	learn: 0.0779217	total: 286ms	remaining: 286ms
50:	learn: 0.0765664	total: 292ms	remaining: 280ms
51:	learn: 0.0745707	total: 297ms	remaining: 274ms
52:	learn: 0.0733454	total: 303ms	remaining: 268ms
53:	learn: 0.0711635	total: 308

0:	learn: 0.4844524	total: 6.13ms	remaining: 606ms
1:	learn: 0.3619948	total: 10.7ms	remaining: 526ms
2:	learn: 0.2825674	total: 14.9ms	remaining: 483ms
3:	learn: 0.2281915	total: 19.2ms	remaining: 461ms
4:	learn: 0.1881012	total: 23.7ms	remaining: 450ms
5:	learn: 0.1609999	total: 28.5ms	remaining: 446ms
6:	learn: 0.1403940	total: 33.2ms	remaining: 441ms
7:	learn: 0.1240382	total: 37.2ms	remaining: 428ms
8:	learn: 0.1112672	total: 41.5ms	remaining: 420ms
9:	learn: 0.1013971	total: 45.9ms	remaining: 413ms
10:	learn: 0.0935151	total: 50.3ms	remaining: 407ms
11:	learn: 0.0880488	total: 54.4ms	remaining: 399ms
12:	learn: 0.0831295	total: 58.8ms	remaining: 393ms
13:	learn: 0.0798841	total: 64ms	remaining: 393ms
14:	learn: 0.0765457	total: 68.7ms	remaining: 389ms
15:	learn: 0.0734033	total: 73.3ms	remaining: 385ms
16:	learn: 0.0705577	total: 78.4ms	remaining: 383ms
17:	learn: 0.0674936	total: 82.8ms	remaining: 377ms
18:	learn: 0.0656834	total: 87.1ms	remaining: 371ms
19:	learn: 0.0641035	tot

86:	learn: 0.0390446	total: 399ms	remaining: 59.5ms
87:	learn: 0.0390390	total: 402ms	remaining: 54.8ms
88:	learn: 0.0385124	total: 407ms	remaining: 50.3ms
89:	learn: 0.0382889	total: 412ms	remaining: 45.7ms
90:	learn: 0.0382278	total: 417ms	remaining: 41.2ms
91:	learn: 0.0380323	total: 421ms	remaining: 36.6ms
92:	learn: 0.0379063	total: 425ms	remaining: 32ms
93:	learn: 0.0376546	total: 430ms	remaining: 27.4ms
94:	learn: 0.0375981	total: 435ms	remaining: 22.9ms
95:	learn: 0.0371455	total: 440ms	remaining: 18.3ms
96:	learn: 0.0369851	total: 445ms	remaining: 13.8ms
97:	learn: 0.0369363	total: 449ms	remaining: 9.17ms
98:	learn: 0.0367179	total: 454ms	remaining: 4.58ms
99:	learn: 0.0367127	total: 457ms	remaining: 0us


In [24]:
from sklearn.metrics import f1_score, classification_report

In [25]:
score = 0
for c in y_train.columns:
    score += f1_score(y_test[c], candle_models[c].predict(test), average='macro')

In [26]:
score

20.673912237970722

In [27]:
import joblib

In [28]:
joblib.dump(candle_models, 'MLTA/candle_models.joblib')

['MLTA/candle_models.joblib']

In [29]:
for k in candle_models.keys():
    print(candle_models[k].classes_)

[-100    0  100]
[-100    0  100]
[-100    0  100]
[  0 100]
[-100    0  100]
[-100    0  100]
[-100    0  100]
[  0 100]
[  0 100]
[-100    0  100]
[  0 100]
[  0 100]
[-100    0  100]
[  0 100]
[  0 100]
[  0 100]
[  0 100]
[-100    0  100]
[-100    0  100]
[  0 100]
[  0 100]
[  0 100]
[  0 100]
[  0 100]
[-100    0  100]
[-100    0  100]
[-100    0  100]
[  0 100]
[  0 100]
[-100    0  100]
[  0 100]
[0 1]
[0 1]


In [30]:
candle_models['CDLDOJI'].classes_

array([  0, 100], dtype=int64)

In [31]:
candle_models.keys()

dict_keys(['CDL3INSIDE', 'CDL3LINESTRIKE', 'CDL3OUTSIDE', 'CDL3WHITESOLDIERS', 'CDLBELTHOLD', 'CDLCLOSINGMARUBOZU', 'CDLCOUNTERATTACK', 'CDLDOJI', 'CDLDRAGONFLYDOJI', 'CDLGAPSIDESIDEWHITE', 'CDLGRAVESTONEDOJI', 'CDLHAMMER', 'CDLHARAMI', 'CDLHOMINGPIGEON', 'CDLINVERTEDHAMMER', 'CDLLADDERBOTTOM', 'CDLLONGLEGGEDDOJI', 'CDLLONGLINE', 'CDLMARUBOZU', 'CDLMATCHINGLOW', 'CDLMORNINGDOJISTAR', 'CDLMORNINGSTAR', 'CDLPIERCING', 'CDLRICKSHAWMAN', 'CDLRISEFALL3METHODS', 'CDLSEPARATINGLINES', 'CDLSHORTLINE', 'CDLSTICKSANDWICH', 'CDLTAKURI', 'CDLTASUKIGAP', 'CDLUNIQUE3RIVER', 'bullishPin', 'bearishPin'])