In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

## Read files

In [2]:
import os

In [3]:
files = os.listdir('data')

In [4]:
files

['BTC-USD.csv', '^GSPC.csv', '^IXIC.csv']

In [5]:
dataframes = dict()

In [6]:
for f in files:
    df = pd.read_csv('data/' + f)
    df['Date'] = pd.to_datetime(df['Date'])
    df.set_index('Date', inplace=True)
    dataframes[f.split('.')[0]] = df[df.index > '1990'].copy()

## Append Target Columns

In [7]:
import talib

In [8]:
for k in dataframes.keys():
    for f in talib.get_function_groups()['Pattern Recognition']:
        dataframes[k][f]  = getattr(talib, f)(dataframes[k]['Open'], dataframes[k]['High'], 
                                              dataframes[k]['Low'], dataframes[k]['Close'])

In [9]:
to_drop = list()
for f in talib.get_function_groups()['Pattern Recognition']:
    total = 0
    size = 0
    for k in dataframes.keys():
        total += dataframes[k][f].sum()
        size += dataframes[k].shape[0]
    if total < size * 0.01:
        to_drop.append(f)

In [10]:
for k in dataframes.keys():
    dataframes[k].drop(to_drop, axis=1, inplace=True)

## Building Model

Because candlestick patterns are very well-defined and clear cut, I expect any classifier to return probability of only 0 and 1. To get more useful data about how close a candle is to a pattern, I deliberately add uncertainty by compressing inputs first through an autoencoder.

### Autoencoder

#### Prepare training and test sets

In [11]:
inputs = dict()

In [12]:
for k in dataframes.keys():
    # We need to normalize the data. Otherwise, data from multiple sources will vary too much.
    # We do this by dividing the whole row with its mean. We can also divide by open or close, but
    # one column would always be 1 in that case.
    
    divisor = dataframes[k][['Open', 'High', 'Low', 'Close']].mean(axis=1)
    inputs[k] = dataframes[k][['Open', 'High', 'Low', 'Close']].div(divisor, axis=0)

In [13]:
train = pd.concat([inputs[k][['Open', 'High', 'Low', 'Close']].iloc[:-500] for k in inputs.keys()], axis=0)

In [14]:
test = pd.concat([inputs[k][['Open', 'High', 'Low', 'Close']].iloc[-500:] for k in inputs.keys()], axis=0)

In [15]:
train

Unnamed: 0_level_0,Open,High,Low,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2014-09-17,1.010664,1.015675,0.981502,0.992159
2014-09-18,1.043498,1.043498,0.943556,0.969448
2014-09-19,1.039936,1.049087,0.942905,0.968073
2014-09-20,0.976457,1.047272,0.964606,1.011665
2014-09-21,1.012296,1.023064,0.975325,0.989315
...,...,...,...,...
2018-11-06,0.995946,1.006084,0.995242,1.002728
2018-11-07,0.991964,1.008862,0.990603,1.008571
2018-11-08,1.001161,1.004181,0.995261,0.999397
2018-11-09,1.005886,1.006671,0.989855,0.997588


In [16]:
test

Unnamed: 0_level_0,Open,High,Low,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-06-29,1.027629,1.027641,0.953679,0.991051
2019-06-30,1.043769,1.065323,0.944660,0.946247
2019-07-01,1.011995,1.050378,0.945670,0.991956
2019-07-02,1.007476,1.038256,0.926526,1.027742
2019-07-03,0.949676,1.050623,0.949676,1.050025
...,...,...,...,...
2020-11-02,1.003914,1.009442,0.987548,0.999096
2020-11-03,0.994071,1.009854,0.991025,1.005050
2020-11-04,0.993123,1.012175,0.988822,1.005881
2020-11-05,0.996829,1.005935,0.994114,1.003122


In [17]:
from sklearn.decomposition import PCA

In [18]:
pca = PCA(n_components=2, random_state=1)

In [19]:
pca.fit(train)

PCA(n_components=2, random_state=1)

In [20]:
pca.transform(test)

array([[ 0.04622258, -0.01591535],
       [ 0.08748861, -0.04885457],
       [ 0.0638456 ,  0.00112652],
       ...,
       [ 0.0022576 ,  0.00972273],
       [-0.00449785,  0.0032774 ],
       [-0.00109365,  0.00093077]])

In [21]:
candle_models = dict()

In [25]:
y_train = pd.concat([dataframes[k].drop(['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume'], axis=1).iloc[:-500] for k in dataframes.keys()], axis=0)

In [26]:
y_test = pd.concat([dataframes[k].drop(['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume'], axis=1).iloc[-500:] for k in dataframes.keys()], axis=0)

In [27]:
from catboost import CatBoostClassifier

In [115]:
for c in y_train.columns:
    print('Training model for pattern %s' % (c))
    model = CatBoostClassifier(n_estimators=100, random_state=1, auto_class_weights='SqrtBalanced')
    model.fit(train, y_train[c])
    candle_models[c] = model

Training model for pattern CDL3INSIDE
Learning rate set to 0.5
0:	learn: 0.6240574	total: 6.57ms	remaining: 651ms
1:	learn: 0.5029075	total: 11.7ms	remaining: 575ms
2:	learn: 0.4454711	total: 17.4ms	remaining: 562ms
3:	learn: 0.4164473	total: 22.7ms	remaining: 544ms
4:	learn: 0.3971233	total: 27.9ms	remaining: 530ms
5:	learn: 0.3889474	total: 33ms	remaining: 516ms
6:	learn: 0.3802485	total: 38.1ms	remaining: 506ms
7:	learn: 0.3703471	total: 43.6ms	remaining: 501ms
8:	learn: 0.3662617	total: 48.7ms	remaining: 492ms
9:	learn: 0.3565288	total: 54.4ms	remaining: 490ms
10:	learn: 0.3518440	total: 60ms	remaining: 485ms
11:	learn: 0.3483193	total: 65.3ms	remaining: 479ms
12:	learn: 0.3458583	total: 71ms	remaining: 475ms
13:	learn: 0.3426361	total: 76.8ms	remaining: 472ms
14:	learn: 0.3372704	total: 82.2ms	remaining: 466ms
15:	learn: 0.3350357	total: 87.6ms	remaining: 460ms
16:	learn: 0.3299605	total: 93ms	remaining: 454ms
17:	learn: 0.3271419	total: 98.1ms	remaining: 447ms
18:	learn: 0.325653

73:	learn: 0.0083602	total: 392ms	remaining: 138ms
74:	learn: 0.0082876	total: 397ms	remaining: 132ms
75:	learn: 0.0082394	total: 402ms	remaining: 127ms
76:	learn: 0.0081741	total: 408ms	remaining: 122ms
77:	learn: 0.0081316	total: 413ms	remaining: 116ms
78:	learn: 0.0079910	total: 418ms	remaining: 111ms
79:	learn: 0.0077480	total: 424ms	remaining: 106ms
80:	learn: 0.0076692	total: 429ms	remaining: 101ms
81:	learn: 0.0076170	total: 434ms	remaining: 95.2ms
82:	learn: 0.0075147	total: 438ms	remaining: 89.8ms
83:	learn: 0.0075049	total: 443ms	remaining: 84.4ms
84:	learn: 0.0073502	total: 449ms	remaining: 79.2ms
85:	learn: 0.0072948	total: 454ms	remaining: 73.9ms
86:	learn: 0.0072819	total: 459ms	remaining: 68.5ms
87:	learn: 0.0071447	total: 464ms	remaining: 63.3ms
88:	learn: 0.0070862	total: 469ms	remaining: 57.9ms
89:	learn: 0.0069975	total: 474ms	remaining: 52.6ms
90:	learn: 0.0069702	total: 479ms	remaining: 47.3ms
91:	learn: 0.0069252	total: 484ms	remaining: 42.1ms
92:	learn: 0.0068979

50:	learn: 0.0698948	total: 221ms	remaining: 212ms
51:	learn: 0.0689599	total: 226ms	remaining: 208ms
52:	learn: 0.0683502	total: 230ms	remaining: 204ms
53:	learn: 0.0680344	total: 234ms	remaining: 199ms
54:	learn: 0.0674139	total: 238ms	remaining: 195ms
55:	learn: 0.0661100	total: 242ms	remaining: 190ms
56:	learn: 0.0647045	total: 247ms	remaining: 186ms
57:	learn: 0.0644806	total: 251ms	remaining: 182ms
58:	learn: 0.0640558	total: 256ms	remaining: 178ms
59:	learn: 0.0638644	total: 260ms	remaining: 173ms
60:	learn: 0.0627673	total: 264ms	remaining: 169ms
61:	learn: 0.0612231	total: 268ms	remaining: 164ms
62:	learn: 0.0602918	total: 273ms	remaining: 160ms
63:	learn: 0.0601201	total: 278ms	remaining: 156ms
64:	learn: 0.0598000	total: 282ms	remaining: 152ms
65:	learn: 0.0587506	total: 287ms	remaining: 148ms
66:	learn: 0.0577451	total: 291ms	remaining: 144ms
67:	learn: 0.0576108	total: 295ms	remaining: 139ms
68:	learn: 0.0568892	total: 300ms	remaining: 135ms
69:	learn: 0.0564042	total: 304

27:	learn: 0.2338861	total: 148ms	remaining: 380ms
28:	learn: 0.2317502	total: 153ms	remaining: 375ms
29:	learn: 0.2300632	total: 159ms	remaining: 370ms
30:	learn: 0.2276029	total: 164ms	remaining: 366ms
31:	learn: 0.2244613	total: 170ms	remaining: 362ms
32:	learn: 0.2236470	total: 175ms	remaining: 356ms
33:	learn: 0.2221004	total: 180ms	remaining: 350ms
34:	learn: 0.2212763	total: 185ms	remaining: 344ms
35:	learn: 0.2195929	total: 191ms	remaining: 339ms
36:	learn: 0.2161371	total: 196ms	remaining: 334ms
37:	learn: 0.2138075	total: 202ms	remaining: 329ms
38:	learn: 0.2123722	total: 207ms	remaining: 323ms
39:	learn: 0.2101952	total: 212ms	remaining: 318ms
40:	learn: 0.2091442	total: 217ms	remaining: 312ms
41:	learn: 0.2078892	total: 222ms	remaining: 307ms
42:	learn: 0.2072793	total: 227ms	remaining: 301ms
43:	learn: 0.2057605	total: 233ms	remaining: 296ms
44:	learn: 0.2047738	total: 238ms	remaining: 291ms
45:	learn: 0.2038601	total: 243ms	remaining: 286ms
46:	learn: 0.2033594	total: 249

Learning rate set to 0.280216
0:	learn: 0.5431129	total: 5.36ms	remaining: 531ms
1:	learn: 0.4349567	total: 9.8ms	remaining: 480ms
2:	learn: 0.3730806	total: 14.1ms	remaining: 456ms
3:	learn: 0.3196975	total: 18.4ms	remaining: 442ms
4:	learn: 0.2836661	total: 22.6ms	remaining: 430ms
5:	learn: 0.2593660	total: 26.9ms	remaining: 421ms
6:	learn: 0.2391189	total: 31.2ms	remaining: 414ms
7:	learn: 0.2258887	total: 35.6ms	remaining: 409ms
8:	learn: 0.2119289	total: 39.9ms	remaining: 404ms
9:	learn: 0.2018204	total: 44.2ms	remaining: 398ms
10:	learn: 0.1922298	total: 48.3ms	remaining: 391ms
11:	learn: 0.1869464	total: 52.4ms	remaining: 384ms
12:	learn: 0.1804290	total: 56.5ms	remaining: 378ms
13:	learn: 0.1742038	total: 60.8ms	remaining: 374ms
14:	learn: 0.1702170	total: 65.7ms	remaining: 372ms
15:	learn: 0.1664502	total: 70.4ms	remaining: 369ms
16:	learn: 0.1631801	total: 74.8ms	remaining: 365ms
17:	learn: 0.1601825	total: 79.2ms	remaining: 361ms
18:	learn: 0.1567323	total: 83.4ms	remaining:

68:	learn: 0.0777313	total: 298ms	remaining: 134ms
69:	learn: 0.0769101	total: 302ms	remaining: 130ms
70:	learn: 0.0763225	total: 307ms	remaining: 125ms
71:	learn: 0.0757498	total: 311ms	remaining: 121ms
72:	learn: 0.0755311	total: 316ms	remaining: 117ms
73:	learn: 0.0752858	total: 320ms	remaining: 112ms
74:	learn: 0.0745767	total: 324ms	remaining: 108ms
75:	learn: 0.0743481	total: 329ms	remaining: 104ms
76:	learn: 0.0737739	total: 333ms	remaining: 99.5ms
77:	learn: 0.0736507	total: 337ms	remaining: 95.1ms
78:	learn: 0.0731243	total: 342ms	remaining: 90.8ms
79:	learn: 0.0728457	total: 346ms	remaining: 86.4ms
80:	learn: 0.0723638	total: 350ms	remaining: 82.2ms
81:	learn: 0.0718370	total: 355ms	remaining: 77.8ms
82:	learn: 0.0717352	total: 359ms	remaining: 73.5ms
83:	learn: 0.0713311	total: 363ms	remaining: 69.1ms
84:	learn: 0.0705894	total: 367ms	remaining: 64.8ms
85:	learn: 0.0703416	total: 371ms	remaining: 60.4ms
86:	learn: 0.0700698	total: 375ms	remaining: 56.1ms
87:	learn: 0.0698464

46:	learn: 0.0541941	total: 205ms	remaining: 231ms
47:	learn: 0.0530184	total: 210ms	remaining: 227ms
48:	learn: 0.0524208	total: 214ms	remaining: 223ms
49:	learn: 0.0516939	total: 219ms	remaining: 219ms
50:	learn: 0.0515506	total: 223ms	remaining: 215ms
51:	learn: 0.0506931	total: 228ms	remaining: 210ms
52:	learn: 0.0502043	total: 232ms	remaining: 206ms
53:	learn: 0.0494654	total: 236ms	remaining: 201ms
54:	learn: 0.0489559	total: 240ms	remaining: 197ms
55:	learn: 0.0484885	total: 245ms	remaining: 192ms
56:	learn: 0.0483398	total: 250ms	remaining: 188ms
57:	learn: 0.0477320	total: 254ms	remaining: 184ms
58:	learn: 0.0467601	total: 258ms	remaining: 179ms
59:	learn: 0.0456368	total: 262ms	remaining: 175ms
60:	learn: 0.0455198	total: 266ms	remaining: 170ms
61:	learn: 0.0451880	total: 270ms	remaining: 166ms
62:	learn: 0.0447679	total: 274ms	remaining: 161ms
63:	learn: 0.0443154	total: 278ms	remaining: 157ms
64:	learn: 0.0441270	total: 282ms	remaining: 152ms
65:	learn: 0.0434899	total: 287

4:	learn: 0.6332334	total: 30.2ms	remaining: 573ms
5:	learn: 0.6240474	total: 36.2ms	remaining: 568ms
6:	learn: 0.6160210	total: 41.9ms	remaining: 556ms
7:	learn: 0.6107688	total: 47.5ms	remaining: 546ms
8:	learn: 0.6068184	total: 53ms	remaining: 536ms
9:	learn: 0.5996245	total: 58.4ms	remaining: 526ms
10:	learn: 0.5959091	total: 63.9ms	remaining: 517ms
11:	learn: 0.5923746	total: 69.5ms	remaining: 510ms
12:	learn: 0.5897835	total: 74.9ms	remaining: 501ms
13:	learn: 0.5877120	total: 80.2ms	remaining: 493ms
14:	learn: 0.5851100	total: 85.7ms	remaining: 485ms
15:	learn: 0.5832022	total: 90.7ms	remaining: 476ms
16:	learn: 0.5781802	total: 96.3ms	remaining: 470ms
17:	learn: 0.5759441	total: 102ms	remaining: 465ms
18:	learn: 0.5732797	total: 107ms	remaining: 458ms
19:	learn: 0.5705879	total: 113ms	remaining: 453ms
20:	learn: 0.5686051	total: 119ms	remaining: 446ms
21:	learn: 0.5665973	total: 124ms	remaining: 439ms
22:	learn: 0.5628459	total: 129ms	remaining: 432ms
23:	learn: 0.5600560	total

93:	learn: 0.0681285	total: 405ms	remaining: 25.8ms
94:	learn: 0.0676029	total: 409ms	remaining: 21.5ms
95:	learn: 0.0670413	total: 414ms	remaining: 17.2ms
96:	learn: 0.0665350	total: 418ms	remaining: 12.9ms
97:	learn: 0.0657249	total: 422ms	remaining: 8.61ms
98:	learn: 0.0646688	total: 426ms	remaining: 4.3ms
99:	learn: 0.0637956	total: 431ms	remaining: 0us
Training model for pattern CDLINVERTEDHAMMER
Learning rate set to 0.280216
0:	learn: 0.4998706	total: 4.25ms	remaining: 421ms
1:	learn: 0.3843543	total: 8.55ms	remaining: 419ms
2:	learn: 0.3135879	total: 12.8ms	remaining: 413ms
3:	learn: 0.2621878	total: 16.7ms	remaining: 402ms
4:	learn: 0.2270514	total: 20.9ms	remaining: 398ms
5:	learn: 0.2035618	total: 25.5ms	remaining: 399ms
6:	learn: 0.1814628	total: 29.5ms	remaining: 392ms
7:	learn: 0.1665537	total: 33.6ms	remaining: 386ms
8:	learn: 0.1534905	total: 37.8ms	remaining: 382ms
9:	learn: 0.1448773	total: 41.8ms	remaining: 376ms
10:	learn: 0.1354944	total: 45.7ms	remaining: 370ms
11:

54:	learn: 0.0340526	total: 236ms	remaining: 193ms
55:	learn: 0.0335121	total: 240ms	remaining: 189ms
56:	learn: 0.0331841	total: 244ms	remaining: 184ms
57:	learn: 0.0321670	total: 249ms	remaining: 181ms
58:	learn: 0.0309956	total: 254ms	remaining: 177ms
59:	learn: 0.0304892	total: 258ms	remaining: 172ms
60:	learn: 0.0301281	total: 262ms	remaining: 168ms
61:	learn: 0.0293537	total: 267ms	remaining: 163ms
62:	learn: 0.0291530	total: 270ms	remaining: 159ms
63:	learn: 0.0286553	total: 274ms	remaining: 154ms
64:	learn: 0.0279694	total: 279ms	remaining: 150ms
65:	learn: 0.0272764	total: 283ms	remaining: 146ms
66:	learn: 0.0269764	total: 288ms	remaining: 142ms
67:	learn: 0.0267342	total: 292ms	remaining: 138ms
68:	learn: 0.0261030	total: 297ms	remaining: 133ms
69:	learn: 0.0257959	total: 301ms	remaining: 129ms
70:	learn: 0.0252706	total: 305ms	remaining: 125ms
71:	learn: 0.0244913	total: 310ms	remaining: 121ms
72:	learn: 0.0238599	total: 314ms	remaining: 116ms
73:	learn: 0.0234986	total: 318

46:	learn: 0.2256539	total: 251ms	remaining: 282ms
47:	learn: 0.2245258	total: 257ms	remaining: 279ms
48:	learn: 0.2236410	total: 263ms	remaining: 274ms
49:	learn: 0.2226032	total: 269ms	remaining: 269ms
50:	learn: 0.2216688	total: 274ms	remaining: 263ms
51:	learn: 0.2207107	total: 279ms	remaining: 258ms
52:	learn: 0.2197303	total: 285ms	remaining: 253ms
53:	learn: 0.2195275	total: 290ms	remaining: 247ms
54:	learn: 0.2186922	total: 295ms	remaining: 242ms
55:	learn: 0.2177205	total: 301ms	remaining: 236ms
56:	learn: 0.2171941	total: 306ms	remaining: 231ms
57:	learn: 0.2165379	total: 311ms	remaining: 225ms
58:	learn: 0.2163691	total: 316ms	remaining: 220ms
59:	learn: 0.2156181	total: 322ms	remaining: 214ms
60:	learn: 0.2153333	total: 327ms	remaining: 209ms
61:	learn: 0.2146185	total: 332ms	remaining: 204ms
62:	learn: 0.2140818	total: 337ms	remaining: 198ms
63:	learn: 0.2137556	total: 343ms	remaining: 193ms
64:	learn: 0.2132540	total: 348ms	remaining: 187ms
65:	learn: 0.2126738	total: 353

17:	learn: 0.1967389	total: 78.9ms	remaining: 359ms
18:	learn: 0.1941586	total: 83ms	remaining: 354ms
19:	learn: 0.1915523	total: 87.2ms	remaining: 349ms
20:	learn: 0.1892294	total: 92ms	remaining: 346ms
21:	learn: 0.1872072	total: 97.8ms	remaining: 347ms
22:	learn: 0.1858194	total: 103ms	remaining: 343ms
23:	learn: 0.1850037	total: 108ms	remaining: 341ms
24:	learn: 0.1841012	total: 112ms	remaining: 336ms
25:	learn: 0.1822829	total: 117ms	remaining: 332ms
26:	learn: 0.1810607	total: 121ms	remaining: 327ms
27:	learn: 0.1800232	total: 126ms	remaining: 324ms
28:	learn: 0.1787203	total: 131ms	remaining: 320ms
29:	learn: 0.1771610	total: 135ms	remaining: 315ms
30:	learn: 0.1765749	total: 139ms	remaining: 310ms
31:	learn: 0.1762122	total: 143ms	remaining: 305ms
32:	learn: 0.1756460	total: 148ms	remaining: 301ms
33:	learn: 0.1734414	total: 153ms	remaining: 298ms
34:	learn: 0.1714457	total: 158ms	remaining: 294ms
35:	learn: 0.1698303	total: 163ms	remaining: 289ms
36:	learn: 0.1684341	total: 16

76:	learn: 0.0223631	total: 344ms	remaining: 103ms
77:	learn: 0.0219634	total: 349ms	remaining: 98.5ms
78:	learn: 0.0213898	total: 355ms	remaining: 94.3ms
79:	learn: 0.0210801	total: 359ms	remaining: 89.8ms
80:	learn: 0.0209434	total: 364ms	remaining: 85.3ms
81:	learn: 0.0207223	total: 368ms	remaining: 80.9ms
82:	learn: 0.0203193	total: 374ms	remaining: 76.5ms
83:	learn: 0.0202006	total: 378ms	remaining: 71.9ms
84:	learn: 0.0199579	total: 382ms	remaining: 67.5ms
85:	learn: 0.0196037	total: 386ms	remaining: 62.9ms
86:	learn: 0.0191134	total: 391ms	remaining: 58.4ms
87:	learn: 0.0186186	total: 396ms	remaining: 54ms
88:	learn: 0.0182228	total: 400ms	remaining: 49.4ms
89:	learn: 0.0180895	total: 405ms	remaining: 45ms
90:	learn: 0.0178071	total: 409ms	remaining: 40.5ms
91:	learn: 0.0175596	total: 414ms	remaining: 36ms
92:	learn: 0.0171742	total: 419ms	remaining: 31.5ms
93:	learn: 0.0170874	total: 423ms	remaining: 27ms
94:	learn: 0.0169030	total: 427ms	remaining: 22.5ms
95:	learn: 0.0166892	

72:	learn: 0.0447382	total: 331ms	remaining: 123ms
73:	learn: 0.0444878	total: 336ms	remaining: 118ms
74:	learn: 0.0440208	total: 340ms	remaining: 113ms
75:	learn: 0.0434666	total: 344ms	remaining: 109ms
76:	learn: 0.0431686	total: 348ms	remaining: 104ms
77:	learn: 0.0425840	total: 353ms	remaining: 99.7ms
78:	learn: 0.0421689	total: 358ms	remaining: 95.1ms
79:	learn: 0.0416829	total: 362ms	remaining: 90.6ms
80:	learn: 0.0413473	total: 366ms	remaining: 85.9ms
81:	learn: 0.0406214	total: 371ms	remaining: 81.4ms
82:	learn: 0.0396899	total: 375ms	remaining: 76.9ms
83:	learn: 0.0392302	total: 380ms	remaining: 72.3ms
84:	learn: 0.0389126	total: 384ms	remaining: 67.8ms
85:	learn: 0.0382951	total: 388ms	remaining: 63.2ms
86:	learn: 0.0380771	total: 393ms	remaining: 58.7ms
87:	learn: 0.0377786	total: 397ms	remaining: 54.1ms
88:	learn: 0.0373765	total: 401ms	remaining: 49.5ms
89:	learn: 0.0370027	total: 406ms	remaining: 45.1ms
90:	learn: 0.0368705	total: 410ms	remaining: 40.5ms
91:	learn: 0.0363

50:	learn: 0.0008553	total: 300ms	remaining: 288ms
51:	learn: 0.0008168	total: 306ms	remaining: 283ms
52:	learn: 0.0007977	total: 312ms	remaining: 277ms
53:	learn: 0.0007779	total: 319ms	remaining: 272ms
54:	learn: 0.0007269	total: 326ms	remaining: 267ms
55:	learn: 0.0007149	total: 333ms	remaining: 262ms
56:	learn: 0.0006780	total: 339ms	remaining: 256ms
57:	learn: 0.0006453	total: 345ms	remaining: 250ms
58:	learn: 0.0005976	total: 351ms	remaining: 244ms
59:	learn: 0.0005838	total: 356ms	remaining: 238ms
60:	learn: 0.0005679	total: 363ms	remaining: 232ms
61:	learn: 0.0005227	total: 369ms	remaining: 226ms
62:	learn: 0.0005019	total: 375ms	remaining: 220ms
63:	learn: 0.0004865	total: 382ms	remaining: 215ms
64:	learn: 0.0004719	total: 388ms	remaining: 209ms
65:	learn: 0.0004583	total: 393ms	remaining: 203ms
66:	learn: 0.0004414	total: 399ms	remaining: 196ms
67:	learn: 0.0004325	total: 404ms	remaining: 190ms
68:	learn: 0.0004167	total: 410ms	remaining: 184ms
69:	learn: 0.0004034	total: 416

9:	learn: 0.4983070	total: 55.4ms	remaining: 498ms
10:	learn: 0.4964338	total: 60.9ms	remaining: 493ms
11:	learn: 0.4930848	total: 66.3ms	remaining: 486ms
12:	learn: 0.4914272	total: 71.3ms	remaining: 477ms
13:	learn: 0.4901029	total: 76.4ms	remaining: 470ms
14:	learn: 0.4879201	total: 82ms	remaining: 465ms
15:	learn: 0.4861674	total: 87.2ms	remaining: 458ms
16:	learn: 0.4848499	total: 92.4ms	remaining: 451ms
17:	learn: 0.4826903	total: 98.1ms	remaining: 447ms
18:	learn: 0.4802729	total: 104ms	remaining: 441ms
19:	learn: 0.4779144	total: 109ms	remaining: 436ms
20:	learn: 0.4769962	total: 114ms	remaining: 430ms
21:	learn: 0.4755351	total: 119ms	remaining: 423ms
22:	learn: 0.4734857	total: 125ms	remaining: 420ms
23:	learn: 0.4721809	total: 130ms	remaining: 413ms
24:	learn: 0.4710615	total: 136ms	remaining: 408ms
25:	learn: 0.4685287	total: 142ms	remaining: 404ms
26:	learn: 0.4664189	total: 147ms	remaining: 398ms
27:	learn: 0.4648338	total: 152ms	remaining: 392ms
28:	learn: 0.4627619	tota

96:	learn: 0.0033305	total: 438ms	remaining: 13.5ms
97:	learn: 0.0032778	total: 442ms	remaining: 9.01ms
98:	learn: 0.0032108	total: 446ms	remaining: 4.5ms
99:	learn: 0.0031750	total: 450ms	remaining: 0us
Training model for pattern CDLTAKURI
Learning rate set to 0.280216
0:	learn: 0.5001572	total: 4.67ms	remaining: 462ms
1:	learn: 0.3833436	total: 9.36ms	remaining: 459ms
2:	learn: 0.3076990	total: 13.7ms	remaining: 443ms
3:	learn: 0.2544268	total: 18ms	remaining: 432ms
4:	learn: 0.2192361	total: 22.3ms	remaining: 424ms
5:	learn: 0.1905049	total: 26.6ms	remaining: 417ms
6:	learn: 0.1682786	total: 31.2ms	remaining: 415ms
7:	learn: 0.1527654	total: 35.7ms	remaining: 410ms
8:	learn: 0.1415514	total: 40.2ms	remaining: 406ms
9:	learn: 0.1321481	total: 44.4ms	remaining: 400ms
10:	learn: 0.1263583	total: 49.3ms	remaining: 399ms
11:	learn: 0.1207662	total: 53.6ms	remaining: 393ms
12:	learn: 0.1170683	total: 58.6ms	remaining: 392ms
13:	learn: 0.1130520	total: 63.4ms	remaining: 390ms
14:	learn: 0.

78:	learn: 0.0441787	total: 434ms	remaining: 115ms
79:	learn: 0.0434673	total: 440ms	remaining: 110ms
80:	learn: 0.0429325	total: 446ms	remaining: 105ms
81:	learn: 0.0426129	total: 451ms	remaining: 99ms
82:	learn: 0.0420072	total: 456ms	remaining: 93.5ms
83:	learn: 0.0409492	total: 461ms	remaining: 87.9ms
84:	learn: 0.0402906	total: 467ms	remaining: 82.4ms
85:	learn: 0.0399110	total: 473ms	remaining: 77ms
86:	learn: 0.0394481	total: 478ms	remaining: 71.5ms
87:	learn: 0.0390739	total: 483ms	remaining: 65.9ms
88:	learn: 0.0386852	total: 489ms	remaining: 60.4ms
89:	learn: 0.0383097	total: 494ms	remaining: 54.9ms
90:	learn: 0.0381712	total: 499ms	remaining: 49.4ms
91:	learn: 0.0374377	total: 505ms	remaining: 43.9ms
92:	learn: 0.0369071	total: 510ms	remaining: 38.4ms
93:	learn: 0.0363095	total: 515ms	remaining: 32.9ms
94:	learn: 0.0358009	total: 521ms	remaining: 27.4ms
95:	learn: 0.0353963	total: 527ms	remaining: 22ms
96:	learn: 0.0350742	total: 533ms	remaining: 16.5ms
97:	learn: 0.0342998	

In [116]:
from sklearn.metrics import f1_score, classification_report

In [117]:
score = 0
for c in y_train.columns:
    score += f1_score(y_test[c], candle_models[c].predict(test), average='macro')

In [118]:
score

18.74358556380481

In [119]:
import joblib

In [120]:
joblib.dump(candle_models, 'ML-TA/candle_models.joblib')

['ML-TA/candle_models.joblib']

In [125]:
for k in candle_models.keys():
    print(candle_models[k].classes_)

[-100    0  100]
[-100    0  100]
[-100    0  100]
[  0 100]
[-100    0  100]
[-100    0  100]
[-100    0  100]
[  0 100]
[  0 100]
[-100    0  100]
[  0 100]
[  0 100]
[-100    0  100]
[  0 100]
[  0 100]
[  0 100]
[  0 100]
[-100    0  100]
[-100    0  100]
[  0 100]
[  0 100]
[  0 100]
[  0 100]
[  0 100]
[-100    0  100]
[-100    0  100]
[-100    0  100]
[  0 100]
[  0 100]
[-100    0  100]
[  0 100]


In [124]:
candle_models['CDLDOJI'].classes_

array([  0, 100], dtype=int64)

In [126]:
candle_models.keys()

dict_keys(['CDL3INSIDE', 'CDL3LINESTRIKE', 'CDL3OUTSIDE', 'CDL3WHITESOLDIERS', 'CDLBELTHOLD', 'CDLCLOSINGMARUBOZU', 'CDLCOUNTERATTACK', 'CDLDOJI', 'CDLDRAGONFLYDOJI', 'CDLGAPSIDESIDEWHITE', 'CDLGRAVESTONEDOJI', 'CDLHAMMER', 'CDLHARAMI', 'CDLHOMINGPIGEON', 'CDLINVERTEDHAMMER', 'CDLLADDERBOTTOM', 'CDLLONGLEGGEDDOJI', 'CDLLONGLINE', 'CDLMARUBOZU', 'CDLMATCHINGLOW', 'CDLMORNINGDOJISTAR', 'CDLMORNINGSTAR', 'CDLPIERCING', 'CDLRICKSHAWMAN', 'CDLRISEFALL3METHODS', 'CDLSEPARATINGLINES', 'CDLSHORTLINE', 'CDLSTICKSANDWICH', 'CDLTAKURI', 'CDLTASUKIGAP', 'CDLUNIQUE3RIVER'])