In [1]:
from pckgs.helper import PnlCallback
from tensorflow.keras.models import load_model
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as  np
import matplotlib.pyplot as plt
import seaborn as sb
from pckgs.models import get_model_price
from tensorflow.keras.models import Sequential
from tensorflow.keras.regularizers import l1, l2
from pckgs.evaluator import Evaluator
from sklearn.metrics import classification_report

from tensorflow.keras.layers import Dense, Reshape, LSTM, Dropout, Bidirectional
from tensorflow.keras.optimizers import Adam, SGD
%load_ext autoreload
%autoreload 2

In [9]:
#read
# df = pd.read_csv('./datasets/bitcoin_position_day.csv', header=0, parse_dates=['Timestamp'], index_col='Timestamp')
df = pd.read_csv('./datasets/bitcoin_position_hour.csv', header=0, parse_dates=['Timestamp'], index_col='Timestamp')

#split
y = df.loc[:, ['down', 'same', 'up']]
x = df.drop(['down', 'same', 'up'], axis=1)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, shuffle=False)

y_index = y_test.index
x_index = y_train.index

In [11]:
#create labeled y_test to measure pnl, confusion matrix
y_test_labeled = y_test.rename(columns={'down': -1, 'same': 0, 'up': 1})
y_test_labeled = y_test_labeled.idxmax(axis=1)

y_train_labeled = y_train.rename(columns={'down': -1, 'same': 0, 'up': 1})
y_train_labeled = y_train_labeled.idxmax(axis=1)

In [12]:
# df_candle = pd.read_csv('../Price/datasets/bitcoin_candle_day.csv', header=0, parse_dates=['Timestamp'])
df_candle = pd.read_csv('../Price/datasets/bitcoin_candle_hour.csv', header=0, parse_dates=['Timestamp'])
df_candle.rename(columns={'Timestamp':'date'}, inplace=True)
df_candle.set_index('date', inplace=True)
df_candle.rename(columns={'Open': 'open', 'Low': 'low', 'High': 'high', 'Close': 'close'}, inplace=True)
# align dates
df_candle_y = df_candle.reindex(y_index)
df_candle_x = df_candle.reindex(x_index)
# print(df_candle)
# print(x_test)

In [13]:
print(y_train.value_counts())

down  same  up
0     0     1     13888
1     0     0     12829
0     1     0      8307
dtype: int64


In [14]:
model = get_model_price()

In [None]:
es = EarlyStopping(monitor='val_loss', mode='min', min_delta=0.001, verbose=0, patience=60)
mc = ModelCheckpoint('model_price.h5',verbose=0, save_best_only=True)    #MLP

history = model.fit(x=x_train, y=y_train, validation_data=(x_test, y_test), batch_size=32,
                    epochs=300, verbose=0, callbacks=[es, mc])#, shuffle=False)

model = load_model('model_price.h5')

In [None]:
ac = {'accuracy': history.history['accuracy'], 'val_accuracy': history.history['val_accuracy']}
plt.figure()
sb.lineplot(data=ac)

loss = {'loss': history.history['loss'], 'val_loss': history.history['val_loss']}
plt.figure()
sb.lineplot(data=loss)

In [None]:
#create labeled y_pred for pnl, confusion matrix
y_pred = model.predict(x_test)
y_pred_labeled = pd.DataFrame(y_pred, columns=[-1, 0, 1], index=y_index)
y_pred_labeled = y_pred_labeled.idxmax(axis=1)

Evaluator.evaluate(y_pred_labeled, y_test_labeled, df_candle_y, force=False)

In [None]:
y_predt = model.predict(x_train)
y_predt_labeled = pd.DataFrame(y_predt, columns=[-1, 0, 1], index=x_index)
y_predt_labeled = y_predt_labeled.idxmax(axis=1)

Evaluator.evaluate(y_predt_labeled, y_train_labeled, df_candle_x, force=False)

In [None]:
print(classification_report(y_test_labeled, y_pred_labeled))

In [None]:
#test
temp =  {'train_accuracy':[], 'train_f1':[], 'test_accuracy':[], 'test_f1':[]}

for i in range(10):
    model = get_model_price()
    es = EarlyStopping(monitor='val_loss', mode='min', min_delta=0.001, verbose=0, patience=60)
    mc = ModelCheckpoint('model_price.h5',verbose=0, save_best_only=True)    #MLP

    history = model.fit(x=x_train, y=y_train, validation_data=(x_test, y_test), batch_size=32
                        ,epochs=300, verbose=0, callbacks=[es, mc])#, shuffle=False)
    model = load_model('model_price.h5')
    y_pred = model.predict(x_test)
    y_pred_labeled = pd.DataFrame(y_pred, columns=[-1, 0, 1], index=y_index)
    y_pred_labeled = y_pred_labeled.idxmax(axis=1)
    y_predt = model.predict(x_train)
    y_predt_labeled = pd.DataFrame(y_predt, columns=[-1, 0, 1], index=x_index)
    y_predt_labeled = y_predt_labeled.idxmax(axis=1)
    results_test = classification_report(y_test_labeled, y_pred_labeled, output_dict=True)
    temp['test_accuracy'].append(results_test['accuracy'])
    temp['test_f1'].append(results_test['weighted avg']['f1-score'])
    results_test = classification_report(y_train_labeled, y_predt_labeled, output_dict=True)
    temp['train_accuracy'].append(results_test['accuracy'])
    temp['train_f1'].append(results_test['weighted avg']['f1-score'])
temp

In [None]:
import statistics

print(statistics.mean(temp['train_f1']))
print(statistics.mean(temp['test_f1']))
print(statistics.mean(temp['train_accuracy']))
print(statistics.mean(temp['test_accuracy']))

In [None]:
plt.figure(figsize=(10,5))
for i in range(5):
    model = get_model_price()
    es = EarlyStopping(monitor='val_loss', mode='min', min_delta=0.001, verbose=0, patience=60)
    mc = ModelCheckpoint('model_price.h5',verbose=0, save_best_only=True,)
    history = model.fit(x=x_train, y=y_train, validation_data=(x_test, y_test), batch_size=32,
                        # class_weight=class_weights,
                        epochs=300, callbacks=[mc, es], verbose=0)

    model = load_model('model_price.h5')
    y_pred = model.predict(x_test)
    y_pred_labeled = pd.DataFrame(y_pred, columns=[-1, 0, 1], index=y_index)
    y_pred_labeled = y_pred_labeled.idxmax(axis=1)
    pnl = Evaluator.get_pnl(y_pred_labeled, df_candle_y)
    sb.lineplot(x=pnl.index, y=pnl)