In [170]:
import numpy as np
import pandas as pd
import time
import chainer
from chainer import cuda, Function, gradient_check, report, training, utils, Variable
from chainer import datasets, iterators, optimizers, serializers
from chainer import Link, Chain, ChainList, cuda
import chainer.functions as F
import chainer.links as L
from chainer.training import extensions
from chaineripy.extensions import PrintReport, ProgressBar
from database.TradeHistory import get_session, History
from database.db_utils import get_recent_hist15_df

session = get_session()

In [2]:
from matplotlib import pyplot as plt
%matplotlib inline

In [58]:
def zs(p, n, shift=0):
    return (p.shift(shift) - p.rolling(n).mean()) / p.rolling(n).std().replace(0, 1)


def avg(p, n):
    return p.rolling(n).mean()


def std(p, n):
    return p.rolling(n).std()

In [177]:
start = time.time()
print('loading from db')
df = pd.read_sql_query('select * from history15min', session.bind)
print('loaded from db: %fs' % (time.time() - start))
df.exec_date = pd.to_datetime(df.exec_date)
df = df.set_index('exec_date')
df = df.loc['2016-03':]

loading from db
loaded from db: 1.647327s


In [178]:
df

Unnamed: 0_level_0,price,size
exec_date,Unnamed: 1_level_1,Unnamed: 2_level_1
2016-03-01 00:00:00,4.994609e+04,25.987227
2016-03-01 00:15:00,4.998313e+04,15.549000
2016-03-01 00:30:00,5.001167e+04,51.568395
2016-03-01 00:45:00,5.011319e+04,4.485000
2016-03-01 01:00:00,4.989262e+04,30.907859
2016-03-01 01:15:00,4.992312e+04,16.026564
2016-03-01 01:30:00,4.991145e+04,6.549000
2016-03-01 01:45:00,4.989500e+04,5.048000
2016-03-01 02:00:00,4.989188e+04,34.441220
2016-03-01 02:15:00,4.973135e+04,70.924000


In [179]:
# bench_price = df.price
# bench_size = df['size']
dfb = df

p = dfb.price
v = dfb['size']

dfb['pma12'] = zs(p / avg(p, 12) - 1, 96)
dfb['pma96'] = zs(p / avg(p, 96) - 1, 96)
dfb['pma672'] = zs(p / avg(p, 672) - 1, 96)

dfb['ma4_36'] = zs(avg(p, 4) / avg(p, 36) - 1, 96)
dfb['ma12_96'] = zs(avg(p, 12) / avg(p, 96) - 1, 96)
dfb['ac12_12'] = zs((p / avg(p, 12)) / avg(p / avg(p, 12), 12), 96)
dfb['ac96_96'] = zs((p / avg(p, 96)) / avg(p / avg(p, 96), 12), 96)

dfb['vma12'] = zs(v / avg(v, 12) - 1, 96)
dfb['vma96'] = zs(v / avg(v, 96) - 1, 96)
dfb['vma672'] = zs(v / avg(v, 672) - 1, 96)

dfb['vZ12'] = zs(v, 12)
dfb['vZ96'] = zs(v, 96)
dfb['vZ672'] = zs(v, 672)

r_label_list = [
    'r', 'r_1', 'r_2',
]
for r_label in r_label_list:
    dfb[r_label] = 0
dfb['state'] = 0

dfb['pZ12'] = zs(p, 12)
dfb['pZ96'] = zs(p, 96)
dfb['vol12'] = zs(std(p, 12), 96)
dfb['vol96'] = zs(std(p, 96), 96)
dfb['vol672'] = zs(std(p, 672), 96)
dfb['dv12_96'] = zs(std(p, 12) / avg(std(p, 12), 96), 96)
dfb['dv96_672'] = zs(std(p, 96) / avg(std(p, 96), 672), 96)

for i in range(96):
    dfb['pZ96_s%02d' % i] = zs(p, 96, shift=i)
    
dfb['pre_diff'] = p / p.shift(1) - 1

dfb['max_diff12'] = p / p.rolling(12).max() - 1
dfb['max_diff96'] = p / p.rolling(96).max() - 1
dfb['max_diff672'] = p / p.rolling(672).max() - 1

dfb['min_diff12'] = p / p.rolling(12).min() - 1
dfb['min_diff96'] = p / p.rolling(96).min() - 1
dfb['min_diff672'] = p / p.rolling(672).min() - 1

dfb['utctime'] = (dfb.index.hour * 4 + dfb.index.minute / 15) / 96

dfz = dfb

indicator = dfz.reset_index().loc[:, 'pma12':'utctime']
price_history = dfz.reset_index().price

indicator['vma12'] = indicator.vma12.fillna(0)
indicator['vZ12'] = indicator.vZ12.fillna(0)

In [180]:
answer = price_history.shift(-1) / price_history - 1
answer = pd.qcut(answer, 3, labels=list(range(3))).fillna(1)
answer

0        2
1        2
2        2
3        0
4        2
5        1
6        1
7        1
8        0
9        2
10       1
11       2
12       0
13       1
14       1
15       0
16       1
17       1
18       1
19       0
20       2
21       1
22       0
23       2
24       0
25       2
26       1
27       2
28       0
29       0
        ..
65614    2
65615    2
65616    2
65617    2
65618    2
65619    2
65620    2
65621    2
65622    0
65623    1
65624    0
65625    0
65626    2
65627    0
65628    2
65629    2
65630    0
65631    2
65632    2
65633    2
65634    0
65635    0
65636    0
65637    2
65638    2
65639    2
65640    0
65641    2
65642    0
65643    1
Name: price, Length: 65644, dtype: category
Categories (3, int64): [0 < 1 < 2]

In [120]:
price_history.shift(-1) / price_history - 1

0       -0.001019
1        0.002447
2       -0.001928
3       -0.000754
4       -0.002298
5       -0.004137
6       -0.000281
7        0.002951
8       -0.003377
9       -0.001126
10      -0.001242
11       0.004284
12      -0.000676
13      -0.000015
14       0.003209
15       0.001605
16       0.001228
17      -0.000661
18      -0.000980
19       0.000950
20       0.001359
21      -0.000203
22      -0.001154
23       0.000880
24       0.001716
25       0.000111
26       0.000089
27       0.000010
28      -0.001486
29      -0.000839
           ...   
50823    0.007900
50824    0.013196
50825   -0.000741
50826   -0.000780
50827    0.000450
50828    0.007736
50829   -0.002217
50830   -0.000228
50831   -0.007786
50832   -0.000784
50833    0.000139
50834    0.004287
50835   -0.000509
50836   -0.002169
50837    0.002543
50838   -0.007026
50839   -0.003900
50840   -0.003537
50841    0.001525
50842    0.003101
50843   -0.002957
50844   -0.005194
50845   -0.005393
50846    0.000735
50847    0

In [181]:
pd.qcut(price_history.shift(-1) / price_history - 1, 3).value_counts()

(0.000538, 0.104]        21881
(-0.000407, 0.000538]    21881
(-0.118, -0.000407]      21881
Name: price, dtype: int64

In [182]:
indexer = indicator.dv96_672.notnull()

In [183]:
xp = cuda.cupy

d_exp = indicator.loc[indexer].reset_index(drop=True)
d_obj = answer[indexer].reset_index(drop=True)

border = d_exp.index[-600]
# last = d_exp.index[-200]
test_exp = d_exp[border:]
test_obj = d_obj[border:]
d_exp = d_exp[:border]
d_obj = d_obj[:border]

data = xp.array(d_exp, dtype=xp.float32)
t_data = xp.array(d_obj, dtype=xp.int32)
data_test = xp.array(test_exp, dtype=xp.float32)
t_data_test = xp.array(test_obj, dtype=xp.int32)

In [67]:
d_exp.describe().to_csv('tmp.csv')

In [13]:
df.isnull().apply(pd.value_counts)

Unnamed: 0,price,size,pma12,pma96,pma672,ma4_36,ma12_96,ac12_12,ac96_96,vma12,...,pZ96_s87,pZ96_s88,pZ96_s89,pZ96_s90,pZ96_s91,pZ96_s92,pZ96_s93,pZ96_s94,pZ96_s95,utctime
False,65506.0,65506.0,65400,65316,64740,65376,65316,65389,65305,65400,...,65411,65411,65411,65411,65411,65411,65411,65411,65411,65506.0
True,,,106,190,766,130,190,117,201,106,...,95,95,95,95,95,95,95,95,95,


In [188]:
row, col = d_exp.shape

ls_1 = 400
ls_2 = 800
# ls_3 = 500
ls_4 = 800
ls_5 = 200
out_size = 3
class MyChain(Chain):
    def __init__(self):
        super().__init__(
            l1=L.Linear(col, ls_1),
            l2=L.Linear(ls_1, ls_2),
#             l3=L.Linear(ls_2, ls_3),
            l4=L.Linear(ls_2, ls_4),
            l5=L.Linear(ls_4, ls_5),
            l6=L.Linear(ls_5, out_size)
        )
    
    def __call__(self, x):
        if chainer.config.train:
            h = F.sigmoid(self.l1(x))
            h = F.dropout(F.sigmoid(self.l2(h)), ratio=0.05)
#             h = F.dropout(F.sigmoid(self.l3(h)), ratio=0.1)
            h = F.dropout(F.leaky_relu(self.l4(h)), ratio=0.1)
            h = F.sigmoid(self.l5(h))
        else:
            h = F.sigmoid(self.l1(x))
            h = F.sigmoid(self.l2(h))
#             h = F.sigmoid(self.l3(h))
            h = F.leaky_relu(self.l4(h))
            h = F.sigmoid(self.l5(h))
        o = self.l6(h)
        return o

In [189]:
train = datasets.tuple_dataset.TupleDataset(data, t_data)
train = iterators.SerialIterator(train, batch_size=100, shuffle=True, repeat=True)

testset = datasets.tuple_dataset.TupleDataset(data_test, t_data_test)
testset = iterators.SerialIterator(testset, batch_size=100, shuffle=False, repeat=False)

In [190]:
model = L.Classifier(MyChain())

gpu_device = 0
cuda.get_device_from_id(gpu_device).use()
model.to_gpu(gpu_device)

optimizer = optimizers.SGD()
optimizer.setup(model)
updater = training.StandardUpdater(train, optimizer, device=gpu_device)

In [191]:
interval = 25
times = 80
for i in range(times):
    trainer = training.Trainer(updater, (interval * (i + 1), 'epoch'), out='result/%02d' % i)
    trainer.extend(extensions.Evaluator(testset, model, device=gpu_device))
    trainer.extend(extensions.LogReport())
    trainer.extend(PrintReport(['epoch', 'main/loss', 'validation/main/loss',
         'main/accuracy', 'validation/main/accuracy', 'elapsed_time']))
    trainer.extend(ProgressBar(update_interval=10))
    trainer.run()
    model.to_cpu()
    serializers.save_npz('agent/snapshot_%02d.npz' % i, model.predictor)
    model.to_gpu(gpu_device)

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

In [37]:
indicator

Unnamed: 0,pma12,pma96,pma672,ma4_36,ma12_96,ac12_12,ac96_96,vma12,vma96,vma672,...,pZ96_s87,pZ96_s88,pZ96_s89,pZ96_s90,pZ96_s91,pZ96_s92,pZ96_s93,pZ96_s94,pZ96_s95,utctime
0,,,,,,,,0.000000,,,...,,,,,,,,,,0.000000
1,,,,,,,,0.000000,,,...,,,,,,,,,,0.010417
2,,,,,,,,0.000000,,,...,,,,,,,,,,0.020833
3,,,,,,,,0.000000,,,...,,,,,,,,,,0.031250
4,,,,,,,,0.000000,,,...,,,,,,,,,,0.041667
5,,,,,,,,0.000000,,,...,,,,,,,,,,0.052083
6,,,,,,,,0.000000,,,...,,,,,,,,,,0.062500
7,,,,,,,,0.000000,,,...,,,,,,,,,,0.072917
8,,,,,,,,0.000000,,,...,,,,,,,,,,0.083333
9,,,,,,,,0.000000,,,...,,,,,,,,,,0.093750


In [40]:
exp_sample = d_exp.sample(1000)
obj_sample = d_obj[exp_sample.index]
obj_sample

9426     1
5677     2
29453    2
7496     2
20308    2
23312    0
21096    0
18413    1
14773    0
31009    2
6261     2
6200     0
3121     2
23137    2
11811    0
8834     1
2486     0
7471     2
6357     2
25975    2
22959    2
13425    0
19770    1
17360    0
15114    2
20501    2
8112     0
5673     0
28317    2
28665    2
        ..
8515     1
26043    0
17906    0
19316    0
10270    1
28766    2
32462    0
20929    0
13976    2
24342    0
9749     1
4874     0
19730    1
25349    2
25828    0
22238    1
28635    0
32426    2
32008    2
28249    2
1986     0
11870    0
19345    1
8892     1
11008    1
16594    2
3653     1
21583    2
18738    2
29247    0
Name: price, Length: 1000, dtype: category
Categories (3, int64): [0 < 1 < 2]

In [41]:
model.to_cpu()
exp_sample_data = np.array(exp_sample, dtype=np.float32)
res = model.predictor(exp_sample_data)

In [42]:
result = obj_sample.reset_index()
result['pred'] = res.data.argmax(axis=1)
result

Unnamed: 0,index,price,pred
0,9426,1,0
1,5677,2,1
2,29453,2,2
3,7496,2,2
4,20308,2,2
5,23312,0,2
6,21096,0,0
7,18413,1,0
8,14773,0,1
9,31009,2,2


In [43]:
result[result.price == result['pred']].shape

(570, 3)

In [44]:
result[(result.price - result['pred']).abs() == 2].shape

(106, 3)

In [45]:
model.predictor

<__main__.MyChain at 0x10dd108f5f8>

In [46]:
serializers.save_npz('min30p552_500_2-1000_200_3.npz', model.predictor)

In [47]:
result.groupby(['price', 'pred']).size()

price  pred
0      0       226
       1        62
       2        53
1      0        93
       1       114
       2       110
2      0        53
       1        59
       2       230
dtype: int64

In [61]:
diff = (price_history / price_history.shift() - 1).fillna(0)
pd.qcut(diff, 6).value_counts()

(0.00167, 0.104]         10869
(4.51e-05, 0.000536]     10869
(-0.00147, -0.000403]    10869
(-0.117, -0.00147]       10869
(0.000536, 0.00167]      10868
(-0.000403, 4.51e-05]    10868
Name: price, dtype: int64

In [64]:
pd.qcut(diff, 6, labels=list(range(6)))

0        2
1        4
2        4
3        5
4        0
5        4
6        2
7        2
8        2
9        0
10       4
11       3
12       4
13       1
14       2
15       2
16       1
17       3
18       2
19       2
20       0
21       5
22       2
23       1
24       4
25       1
26       4
27       2
28       4
29       1
        ..
65182    5
65183    5
65184    1
65185    3
65186    1
65187    5
65188    5
65189    2
65190    1
65191    5
65192    2
65193    2
65194    2
65195    5
65196    5
65197    5
65198    3
65199    0
65200    0
65201    5
65202    5
65203    5
65204    2
65205    0
65206    0
65207    1
65208    5
65209    5
65210    0
65211    1
Name: price, Length: 65212, dtype: category
Categories (6, int64): [0 < 1 < 2 < 3 < 4 < 5]