In [None]:
import pandas as pd
import seaborn as sb
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report
from pckgs.headline_preprocess import HeadlinePreprocess
from pckgs.helper import timeseries_to_supervised2, custom_split
from pckgs.models import get_model_price, train_model, get_model_both_sent, get_model_both_emb
from pckgs.price_preprocess import PricePreprocess
from sklearn.model_selection import train_test_split
from pckgs.evaluator import Evaluator
import numpy as np
%load_ext autoreload
%autoreload 2
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
pd.options.display.max_seq_items = 400

In [None]:
coins = {}
coin_names = ['BTC_USD', 'ETH_USD', 'LTC_USD','BCH_USD', ]#, 'XLM_USD', 'XRP_USD']
coin_dirs = []
for i in range(len(coin_names)): coin_dirs.append('./datasets/coinbase_hour_candles/' + coin_names[i] + '.feather')
coins = {coin_names[i]:coin_dirs[i] for i in range(len(coin_names))}
coins


# problem = 'p'
problem = 'pp'
# problem = 'pe'

start_timestamp = '2017-01-01 00:00:00'
split_timestamp = '2019-06-30 00:00:00'
end_timestamp = '2019-12-31 23:00:00'

lag = 21
# threshold = 0.0025
threshold = 0.1
pp = PricePreprocess(lag, threshold)

In [None]:
plt.figure(figsize=(10,8))
# plt.ylim(0,5000)
for coin in coins:
    candle = pd.read_feather(coins[coin])
    candle.set_index('time', inplace=True)
    candle.index = candle.index.tz_localize(None)
    sb.lineplot(data=candle, x=candle.index, y='close', label=coin)

In [None]:
#one coin
# x_train, y_train, x_test, y_test = custom_split(pp, [coins['ETH_USD']], problem, start_timestamp, split_timestamp, end_timestamp)

#many coins
x_train, y_train, x_test, y_test = custom_split(pp, coins.values(), problem, start_timestamp, split_timestamp, end_timestamp)

In [None]:
if problem == 'p':
    model = get_model_price()
    model, history = train_model(model, (x_train, x_test, y_train, y_test), '../models/model_price.h5', epochs=200, verbose=1)
elif problem =='pp':
    model = get_model_both_sent()
    model, history = train_model(model, (x_train, x_test, y_train, y_test), '../models/model_both_sent.h5', epochs=200)
# elif problem =='pe':
#     model = get_model_both_emb()
#     model, history = train_model(model, (x_train, x_test, y_train, y_test), '../models/model_both_sent.h5', epochs=2)


ac = {'accuracy': history.history['accuracy'], 'val_accuracy': history.history['val_accuracy']}
plt.figure()
sb.lineplot(data=ac)

loss = {'loss': history.history['loss'], 'val_loss': history.history['val_loss']}
plt.figure()
sb.lineplot(data=loss)

In [None]:
coin = 'BTC_USD'
x_train, y_train, x_test, y_test = custom_split(pp, [coins[coin]], problem, start_timestamp, split_timestamp, end_timestamp)
candle = pd.read_feather(coins[coin])
candle.set_index('time', inplace=True)
candle.index = candle.index.tz_localize(None)

test_index = y_test.index
train_index = y_train.index


#create labeled y_test to measure pnl, confusion matrix
y_test_labeled = y_test.rename(columns={'down': -1, 'same': 0, 'up': 1})
y_test_labeled = y_test_labeled.idxmax(axis=1)

y_train_labeled = y_train.rename(columns={'down': -1, 'same': 0, 'up': 1})
y_train_labeled = y_train_labeled.idxmax(axis=1)


test_candle = candle.reindex(test_index)
train_candle = candle.reindex(train_index)

#create labeled y_pred for pnl, confusion matrix
y_pred = model.predict(x_test)
y_pred_labeled = pd.DataFrame(y_pred, columns=[-1, 0, 1], index=test_index)
y_pred_labeled = y_pred_labeled.idxmax(axis=1)

Evaluator.evaluate(y_pred_labeled, y_test_labeled, test_candle)
print(classification_report(y_test_labeled, y_pred_labeled))

In [None]:
y_predt = model.predict(x_train)
y_predt_labeled = pd.DataFrame(y_predt, columns=[-1, 0, 1], index=train_index)
y_predt_labeled = y_predt_labeled.idxmax(axis=1)

Evaluator.evaluate(y_predt_labeled, y_train_labeled, train_candle)
print(classification_report(y_train_labeled, y_predt_labeled))