In [56]:
import pandas as pd
import numpy as np
import tushare as ts
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import auc, roc_curve, confusion_matrix, precision_recall_curve, average_precision_score
from sklearn.svm import SVC
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM
from keras.utils import to_categorical

In [512]:
tick_ori = ts.get_k_data('600313', start='2010-01-01', end='2018-01-23')

In [513]:
tick = pd.DataFrame.copy(tick_ori)
tick = tick[tick['high'] != tick['low']].reset_index().drop('index', axis=1)

In [514]:
def get_box(p_open, p_close, p_high, p_low):
    if p_high == p_low:
        p_high = max(p_open, p_close, p_high, p_low)
        p_low = min(p_open, p_close, p_high, p_low)
    max_range = (p_high - p_low)
    move = p_close - p_open
    top = min(p_high - p_open, p_high - p_close) / max_range
    middle = abs(p_open - p_close) / max_range
    bottom = min(p_close - p_low, p_open - p_low) / max_range
    return [move, top, middle, bottom]

In [515]:
tick['move'] = tick.apply(lambda x: get_box(x['open'], x['close'], x['high'], x['low'])[0], axis=1)
tick['top'] = tick.apply(lambda x: get_box(x['open'], x['close'], x['high'], x['low'])[1], axis=1)
tick['middle'] = tick.apply(lambda x: get_box(x['open'], x['close'], x['high'], x['low'])[2], axis=1)
tick['bottom'] = tick.apply(lambda x: get_box(x['open'], x['close'], x['high'], x['low'])[3], axis=1)
tick['median'] = tick.apply(lambda x: (x['high'] + x['close']) / 2, axis=1)

In [516]:
scaler = MinMaxScaler((0, 1))
tick['move_scaled'] = scaler.fit_transform(tick['move'].values.reshape(-1 , 1))
tick['median'] = scaler.fit_transform(tick['median'].values.reshape(-1 , 1))
tick['volume'] = scaler.fit_transform(tick['volume'].values.reshape(-1 , 1))

In [517]:
def get_frame(df, x_cols, y_cols, duration=10, forward=1, thread=0.01):
    tick = pd.DataFrame.copy(df)
    Xs = []
    ys = []
    for i in range(duration, len(tick) - forward + 1):
        h_x = tick.loc[i-duration: i-1, x_cols].as_matrix()
        forward_high = max(tick.loc[i: i + forward - 1, y_cols[0]])
        h_y = np.array(1 if (forward_high - tick.loc[i, y_cols[1]]) / tick.loc[i, y_cols[1]] >= thread else 0)
        Xs.append(h_x)
        ys.append(h_y)
    Xs = np.array(Xs)
    ys = to_categorical(np.array(ys), 2)
    return [Xs, ys]

In [521]:
x_cols = ['median', 'volume', 'move_scaled', 'top', 'middle', 'bottom']
# x_cols = ['move_scaled', 'middle']
# x_cols = ['move_scaled', 'top', 'middle', 'bottom']
y_cols = ['high', 'close']

thread = 0.09
duration = 20
forward = 20

X, y = get_frame(tick, x_cols, y_cols, thread=thread, duration=duration, forward=forward)
sum(y[:, 1]) / len(y)

0.50474254742547431

In [522]:
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [523]:
model = Sequential()
model.add(LSTM(16, input_shape=(duration, len(x_cols)), activation='relu', return_sequences=True))
# model.add(Dropout(0.5))
# model.add(LSTM(16, activation='relu', return_sequences=True))
model.add(Dropout(0.5))
model.add(LSTM(16, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(2, activation='sigmoid'))

model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

In [524]:
model.fit(X_train, y_train, batch_size=32, epochs=200, validation_split=0.2)

Train on 885 samples, validate on 222 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
E

<keras.callbacks.History at 0x1b0386d1cf8>

In [525]:
y_test_predicted = [np.argmax(i) for i in model.predict(X_test)]
y_test_actual = [np.argmax(i) for i in y_test]
tn, fp, fn, tp = confusion_matrix(y_test_actual, y_test_predicted).ravel()
print(tn, fp, fn, tp)
print("percision: %.4f"%(tp / (tp + fp)))
print("recall: %.4f"%(tp / (tp + fn)))

171 0 198 0
percision: nan
recall: 0.0000


