In [14]:
import numpy as np
import pandas as pd
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from recomm.classifier import ClassifierNN, ClassifierDNN

In [2]:
init_notebook_mode(connected=True)

In [3]:
tx_data = pd.read_csv("data/tx_data.csv")

In [4]:
# To construct 1 min k-line
tx_data = tx_data.sort_values(by="create_time")
tx_data.loc[:, "trade_time"] = pd.to_datetime(tx_data["create_time"].astype("str").apply(lambda x: x[:-2]), format="%Y%m%d%H%M")
tx_data.loc[:, "high"] = tx_data.price
tx_data.loc[:, "low"] = tx_data.price
tx_1min_kline = tx_data.groupby(["trade_time"]).agg({"high": pd.Series.max, "low": pd.Series.min, "volume": pd.Series.sum})
tx_1min_kline.loc[:, "open"] = tx_data.drop_duplicates(["trade_time"]).price.values
tx_1min_kline.loc[:, "close"] = tx_data.drop_duplicates(["trade_time"], keep="last").price.values

# Data Pre-process
The idea of the model is to pick an interval of the time series with open, high, low, close, and volume as fetures. More detail, we will consider the data from n-th k-line to (n+300)-th k-line as features and the growth rate of the (n+301)-th k-line as labels.

In [5]:
feature_range = 300

It is found the ratio between open and close is symmetry, and it is therefore grouped into 6 levels with boundarues 180e-6, 90e-6, 0, -90e-6, and -180e-6.

In [1]:
iplot([go.Histogram(x=(tx_1min_kline.close - tx_1min_kline.open) / tx_1min_kline.open)])

In [6]:
train_data = []
label_data = []
for idx in range(feature_range, tx_1min_kline.shape[0]-1):
    range_data = tx_1min_kline.iloc[(idx-feature_range):idx]
    label = tx_1min_kline.iloc[idx+1]
    label = (label.close - label.open) / label.open
    train_data.append(range_data.values.reshape(-1))
    if (label > 180e-6):
        label_data.append([1, 0, 0, 0, 0, 0])
    elif(label > 90e-6):
        label_data.append([0, 1, 0, 0, 0, 0])
    elif(label > 0):
        label_data.append([0, 0, 1, 0, 0, 0])
    elif(label > -90e-6):
        label_data.append([0, 0, 0, 1, 0, 0])
    elif(label > -180e-6):
        label_data.append([0, 0, 0, 0, 1, 0])
    else:
        label_data.append([0, 0, 0, 0, 0, 1])
train_features = np.array(train_data)
train_labels = np.array(label_data)

In [7]:
# To rescale features to values between 0 and 1
train_features = np.apply_along_axis(lambda x: (x - np.min(x)) / (np.max(x) - np.min(x)), 1, train_features)

In [13]:
test_features = train_features[20000:]
test_labels = train_labels[20000:]
train_features = train_features[:20000]
train_labels = train_labels[:20000]