In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import savgol_filter
import sys 
sys.path.append('..')
from renderer import render

In [None]:
np.set_printoptions(suppress=True)
TIME_COLUMN_INDEX = 0
OPEN_COLUMN_INDEX = 1
CLOSE_COLUMN_INDEX = 4
VOLUME_COLUMN_INDEX = 5
EPISODE_SIZE = 2880
LOOK_AHEAD = 15
TRAIN_SPLIT = 0.85

In [None]:
def read_data(filename="../data/bitfinex-BTCUSD-1m.csv", date_columns="DateTime"):
    df = pd.read_csv(filename, parse_dates=[date_columns])
    df = df[~np.isnan(df['Close'])].set_index(pd.DatetimeIndex(df[date_columns]))
    df['Volume'] = ((df['Volume'] - df['Volume'].mean())/df['Volume'].std())
    return df

def normalize_episode(df_episode):
    df_episode_norm = df_episode.copy()
    df_episode_norm['Open']  = df_episode_norm['Open'] / df_episode.iloc[0]['Open']
    df_episode_norm['High']  = df_episode_norm['High'] / df_episode.iloc[0]['Open']
    df_episode_norm['Low']   = df_episode_norm['Low']  / df_episode.iloc[0]['Open']
    df_episode_norm['Close'] = df_episode_norm['Close']/df_episode.iloc[0]['Open']
    return df_episode_norm

def get_train_test_sets(df, train_split):
    state_len = df.shape[0]
    train_end_index = int(train_split * state_len)
    train_state = df[0:train_end_index]
    test_state = df[train_end_index:]
    print("Train state: ", train_state.shape, "test state: ", test_state.shape, "total:", state_len)
    return train_state, test_state

def get_state_episode_count(state, episode_size, look_ahead):
    return state.shape[0] - episode_size - look_ahead + 1

def get_features_number(df):
    return df.shape[1] - 1

def get_y_label(episode_data, look_ahead, smooth=True, normalize=True):
    regular_close = episode_data['Close']
    shifted_close = episode_data.shift(-look_ahead)['Close']
    diff = (shifted_close - regular_close).dropna()
    if normalize:
        diff = diff / diff.std()
    if smooth:
        diff = savgol_filter(diff, 9, 3)
    y_category = ((np.sign(diff) + 1) / 2).astype(int)
    return diff, y_category

def get_episode_data(state, start_index, episode_size, look_ahead):
    episode_state = state[start_index : start_index + episode_size].copy()
    X_episode = np.array(normalize_episode(episode_state))[:-look_ahead,:]
    Y_diff, Y_category = get_y_label(episode_state, look_ahead)
    return X_episode, np.array(Y_diff), np.array(Y_category)

In [None]:
df = read_data()
train_state, test_state = get_train_test_sets(df, TRAIN_SPLIT)
features_number = get_features_number(df)
train_length = get_state_episode_count(train_state, EPISODE_SIZE, LOOK_AHEAD)
test_length = get_state_episode_count(test_state, EPISODE_SIZE, LOOK_AHEAD)
train_length, test_length, features_number

In [None]:
X_episode, Y_diff, Y_category = get_episode_data(train_state, start_index=675000, episode_size=EPISODE_SIZE, look_ahead=LOOK_AHEAD)
print(X_episode[0:2], Y_diff[0:2], X_episode.shape, Y_diff.shape)

In [None]:
def render_X(X_set, start=0, end=500):
    values = X_set[start:end].copy().T
    trades = np.zeros((2, values.shape[1]))
    trade_values = np.concatenate((values, trades), axis=0)
    print(trade_values.T.shape)
    render(trade_values.T, title="X", scale=1)

def render_y(y, start=0, end=500):
    plt.rcParams["figure.figsize"] = (20,5)
    plt.plot(y[start:end])
    plt.show()

In [None]:
render_y(Y_diff, start=0, end=500)

In [None]:
render_y(Y_category, start=0, end=500)

In [None]:
render_X(X_episode, start=0, end=500)