In [1]:
import numpy as np
import pandas as pd
import time
import chainer
from chainer import cuda, Function, gradient_check, report, training, utils, Variable
from chainer import datasets, iterators, optimizers, serializers
from chainer import Link, Chain, ChainList, cuda
import chainer.functions as F
import chainer.links as L
from chainer.training import extensions
from chaineripy.extensions import PrintReport, ProgressBar
from database.TradeHistory import get_session, History
from database.db_utils import get_recent_hist15_df

session = get_session()

In [2]:
from matplotlib import pyplot as plt
%matplotlib inline

In [3]:
def zs(p, n, shift=0):
    return (p.shift(shift) - p.rolling(n).mean()) / p.rolling(n).std().replace(0, 1)


def avg(p, n):
    return p.rolling(n).mean()


def std(p, n):
    return p.rolling(n).std()

In [5]:
start = time.time()
print('loading from db')
df = pd.read_sql_query('select * from history5min', session.bind)
print('loaded from db: %fs' % (time.time() - start))
df.exec_date = pd.to_datetime(df.exec_date)
df = df.set_index('exec_date')
df = df.loc['2016-03':]

loading from db
loaded from db: 4.399554s


In [6]:
df

Unnamed: 0_level_0,price,size
exec_date,Unnamed: 1_level_1,Unnamed: 2_level_1
2016-03-01 00:00:00,4.991371e+04,5.581900
2016-03-01 00:05:00,4.993420e+04,5.040500
2016-03-01 00:10:00,4.997394e+04,15.364827
2016-03-01 00:15:00,5.001271e+04,6.554000
2016-03-01 00:20:00,4.995983e+04,5.598000
2016-03-01 00:25:00,4.995490e+04,3.397000
2016-03-01 00:30:00,4.997905e+04,33.716964
2016-03-01 00:35:00,5.001895e+04,13.828000
2016-03-01 00:40:00,5.005329e+04,4.023432
2016-03-01 00:45:00,5.014410e+04,0.984000


In [7]:
# bench_price = df.price
# bench_size = df['size']
dfb = df

p = dfb.price
v = dfb['size']

dfb['pma12'] = zs(p / avg(p, 12) - 1, 96)
dfb['pma96'] = zs(p / avg(p, 96) - 1, 96)
dfb['pma672'] = zs(p / avg(p, 672) - 1, 96)

dfb['ma4_36'] = zs(avg(p, 4) / avg(p, 36) - 1, 96)
dfb['ma12_96'] = zs(avg(p, 12) / avg(p, 96) - 1, 96)
dfb['ac12_12'] = zs((p / avg(p, 12)) / avg(p / avg(p, 12), 12), 96)
dfb['ac96_96'] = zs((p / avg(p, 96)) / avg(p / avg(p, 96), 12), 96)

dfb['vma12'] = zs(v / avg(v, 12) - 1, 96)
dfb['vma96'] = zs(v / avg(v, 96) - 1, 96)
dfb['vma672'] = zs(v / avg(v, 672) - 1, 96)

dfb['vZ12'] = zs(v, 12)
dfb['vZ96'] = zs(v, 96)
dfb['vZ672'] = zs(v, 672)

r_label_list = [
    'r', 'r_1', 'r_2',
]
for r_label in r_label_list:
    dfb[r_label] = 0
dfb['state'] = 0

dfb['pZ12'] = zs(p, 12)
dfb['pZ96'] = zs(p, 96)
dfb['vol12'] = zs(std(p, 12), 96)
dfb['vol96'] = zs(std(p, 96), 96)
dfb['vol672'] = zs(std(p, 672), 96)
dfb['dv12_96'] = zs(std(p, 12) / avg(std(p, 12), 96), 96)
dfb['dv96_672'] = zs(std(p, 96) / avg(std(p, 96), 672), 96)

for i in range(96):
    dfb['pZ96_s%02d' % i] = zs(p, 96, shift=i)

for i in range(96):
    dfb['pre_diff%02d' % i] = p.shift(i) / p.shift(i + 1) - 1

dfb['max_diff12'] = p / p.rolling(12).max() - 1
dfb['max_diff96'] = p / p.rolling(96).max() - 1
dfb['max_diff672'] = p / p.rolling(672).max() - 1

dfb['min_diff12'] = p / p.rolling(12).min() - 1
dfb['min_diff96'] = p / p.rolling(96).min() - 1
dfb['min_diff672'] = p / p.rolling(672).min() - 1

dfb['utctime'] = (dfb.index.hour * 4 + dfb.index.minute / 15) / 96

dfz = dfb

indicator = dfz.reset_index().loc[:, 'pma12':'utctime']
price_history = dfz.reset_index().price

indicator['vma12'] = indicator.vma12.fillna(0)
indicator['vZ12'] = indicator.vZ12.fillna(0)

In [8]:
answer = price_history.shift(-3) / price_history - 1
answer = pd.qcut(answer, 3, labels=list(range(3))).fillna(1)
answer

0         2
1         1
2         1
3         0
4         2
5         2
6         2
7         2
8         1
9         0
10        0
11        0
12        0
13        2
14        2
15        2
16        0
17        0
18        0
19        0
20        2
21        1
22        1
23        1
24        0
25        0
26        0
27        0
28        2
29        2
         ..
197555    0
197556    0
197557    0
197558    0
197559    0
197560    2
197561    0
197562    0
197563    0
197564    2
197565    2
197566    0
197567    0
197568    0
197569    0
197570    0
197571    0
197572    0
197573    0
197574    2
197575    2
197576    2
197577    0
197578    1
197579    2
197580    2
197581    2
197582    1
197583    1
197584    1
Name: price, Length: 197585, dtype: category
Categories (3, int64): [0 < 1 < 2]

In [9]:
indicator.shape

(197585, 223)

In [10]:
pd.qcut(price_history.shift(-3) / price_history - 1, 3).value_counts()

(0.000588, 0.129]        65861
(-0.14, -0.000465]       65861
(-0.000465, 0.000588]    65860
Name: price, dtype: int64

In [14]:
indexer = indicator.dv96_672.notnull()
for label in indicator.columns:
    indexer = indexer & indicator[label].notnull()

In [15]:
xp = cuda.cupy

d_exp = indicator.loc[indexer].reset_index(drop=True)
d_obj = answer[indexer].reset_index(drop=True)

border = d_exp.index[-1800]
# last = d_exp.index[-200]
test_exp = d_exp[border:]
test_obj = d_obj[border:]
d_exp = d_exp[:border]
d_obj = d_obj[:border]

data = xp.array(d_exp, dtype=xp.float32)
t_data = xp.array(d_obj, dtype=xp.int32)
data_test = xp.array(test_exp, dtype=xp.float32)
t_data_test = xp.array(test_obj, dtype=xp.int32)

In [67]:
d_exp.describe().to_csv('tmp.csv')

In [17]:
indicator[indexer].isnull().apply(pd.value_counts)

Unnamed: 0,pma12,pma96,pma672,ma4_36,ma12_96,ac12_12,ac96_96,vma12,vma96,vma672,...,pre_diff93,pre_diff94,pre_diff95,max_diff12,max_diff96,max_diff672,min_diff12,min_diff96,min_diff672,utctime
False,196724,196724,196724,196724,196724,196724,196724,196724,196724,196724,...,196724,196724,196724,196724,196724,196724,196724,196724,196724,196724


In [18]:
row, col = d_exp.shape

ls_1 = 400
ls_2 = 800
ls_3 = 800
ls_4 = 200
# ls_5 = 200
out_size = 3
class MyChain(Chain):
    def __init__(self):
        super().__init__(
            l1=L.Linear(col, ls_1),
            l2=L.Linear(ls_1, ls_2),
            l3=L.Linear(ls_2, ls_3),
            l4=L.Linear(ls_3, ls_4),
#             l5=L.Linear(ls_4, ls_5),
            l5=L.Linear(ls_4, out_size)
        )
    
    def __call__(self, x):
        if chainer.config.train:
            h = F.sigmoid(self.l1(x))
#             h = F.dropout(F.sigmoid(self.l2(h)), ratio=0.01)
            h = F.dropout(F.sigmoid(self.l2(h)), ratio=0.05)
            h = F.dropout(F.leaky_relu(self.l3(h)), ratio=0.1)
            h = F.sigmoid(self.l4(h))
        else:
            h = F.sigmoid(self.l1(x))
#             h = F.sigmoid(self.l2(h))
            h = F.sigmoid(self.l2(h))
            h = F.leaky_relu(self.l3(h))
            h = F.sigmoid(self.l4(h))
        o = self.l5(h)
        return o

In [19]:
train = datasets.tuple_dataset.TupleDataset(data, t_data)
train = iterators.SerialIterator(train, batch_size=100, shuffle=True, repeat=True)

testset = datasets.tuple_dataset.TupleDataset(data_test, t_data_test)
testset = iterators.SerialIterator(testset, batch_size=100, shuffle=False, repeat=False)

In [20]:
model = L.Classifier(MyChain())

gpu_device = 0
cuda.get_device_from_id(gpu_device).use()
model.to_gpu(gpu_device)

optimizer = optimizers.SGD()
optimizer.setup(model)
updater = training.StandardUpdater(train, optimizer, device=gpu_device)

In [21]:
interval = 25
times = 80
for i in range(times):
    trainer = training.Trainer(updater, (interval * (i + 1), 'epoch'), out='result/%02d' % i)
    trainer.extend(extensions.Evaluator(testset, model, device=gpu_device))
    trainer.extend(extensions.LogReport())
    trainer.extend(PrintReport(['epoch', 'main/loss', 'validation/main/loss',
         'main/accuracy', 'validation/main/accuracy', 'elapsed_time']))
    trainer.extend(ProgressBar(update_interval=10))
    trainer.run()
    model.to_cpu()
    serializers.save_npz('agent/snapshot_%02d.npz' % i, model.predictor)
    model.to_gpu(gpu_device)

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

In [37]:
indicator

Unnamed: 0,pma12,pma96,pma672,ma4_36,ma12_96,ac12_12,ac96_96,vma12,vma96,vma672,...,pZ96_s87,pZ96_s88,pZ96_s89,pZ96_s90,pZ96_s91,pZ96_s92,pZ96_s93,pZ96_s94,pZ96_s95,utctime
0,,,,,,,,0.000000,,,...,,,,,,,,,,0.000000
1,,,,,,,,0.000000,,,...,,,,,,,,,,0.010417
2,,,,,,,,0.000000,,,...,,,,,,,,,,0.020833
3,,,,,,,,0.000000,,,...,,,,,,,,,,0.031250
4,,,,,,,,0.000000,,,...,,,,,,,,,,0.041667
5,,,,,,,,0.000000,,,...,,,,,,,,,,0.052083
6,,,,,,,,0.000000,,,...,,,,,,,,,,0.062500
7,,,,,,,,0.000000,,,...,,,,,,,,,,0.072917
8,,,,,,,,0.000000,,,...,,,,,,,,,,0.083333
9,,,,,,,,0.000000,,,...,,,,,,,,,,0.093750


In [40]:
exp_sample = d_exp.sample(1000)
obj_sample = d_obj[exp_sample.index]
obj_sample

9426     1
5677     2
29453    2
7496     2
20308    2
23312    0
21096    0
18413    1
14773    0
31009    2
6261     2
6200     0
3121     2
23137    2
11811    0
8834     1
2486     0
7471     2
6357     2
25975    2
22959    2
13425    0
19770    1
17360    0
15114    2
20501    2
8112     0
5673     0
28317    2
28665    2
        ..
8515     1
26043    0
17906    0
19316    0
10270    1
28766    2
32462    0
20929    0
13976    2
24342    0
9749     1
4874     0
19730    1
25349    2
25828    0
22238    1
28635    0
32426    2
32008    2
28249    2
1986     0
11870    0
19345    1
8892     1
11008    1
16594    2
3653     1
21583    2
18738    2
29247    0
Name: price, Length: 1000, dtype: category
Categories (3, int64): [0 < 1 < 2]

In [41]:
model.to_cpu()
exp_sample_data = np.array(exp_sample, dtype=np.float32)
res = model.predictor(exp_sample_data)

In [42]:
result = obj_sample.reset_index()
result['pred'] = res.data.argmax(axis=1)
result

Unnamed: 0,index,price,pred
0,9426,1,0
1,5677,2,1
2,29453,2,2
3,7496,2,2
4,20308,2,2
5,23312,0,2
6,21096,0,0
7,18413,1,0
8,14773,0,1
9,31009,2,2


In [43]:
result[result.price == result['pred']].shape

(570, 3)

In [44]:
result[(result.price - result['pred']).abs() == 2].shape

(106, 3)

In [45]:
model.predictor

<__main__.MyChain at 0x10dd108f5f8>

In [46]:
serializers.save_npz('min30p552_500_2-1000_200_3.npz', model.predictor)

In [47]:
result.groupby(['price', 'pred']).size()

price  pred
0      0       226
       1        62
       2        53
1      0        93
       1       114
       2       110
2      0        53
       1        59
       2       230
dtype: int64

In [61]:
diff = (price_history / price_history.shift() - 1).fillna(0)
pd.qcut(diff, 6).value_counts()

(0.00167, 0.104]         10869
(4.51e-05, 0.000536]     10869
(-0.00147, -0.000403]    10869
(-0.117, -0.00147]       10869
(0.000536, 0.00167]      10868
(-0.000403, 4.51e-05]    10868
Name: price, dtype: int64

In [64]:
pd.qcut(diff, 6, labels=list(range(6)))

0        2
1        4
2        4
3        5
4        0
5        4
6        2
7        2
8        2
9        0
10       4
11       3
12       4
13       1
14       2
15       2
16       1
17       3
18       2
19       2
20       0
21       5
22       2
23       1
24       4
25       1
26       4
27       2
28       4
29       1
        ..
65182    5
65183    5
65184    1
65185    3
65186    1
65187    5
65188    5
65189    2
65190    1
65191    5
65192    2
65193    2
65194    2
65195    5
65196    5
65197    5
65198    3
65199    0
65200    0
65201    5
65202    5
65203    5
65204    2
65205    0
65206    0
65207    1
65208    5
65209    5
65210    0
65211    1
Name: price, Length: 65212, dtype: category
Categories (6, int64): [0 < 1 < 2 < 3 < 4 < 5]