In [None]:
import pandas as pd
import numpy as np
from alpha_vantage.cryptocurrencies import CryptoCurrencies
from sklearn.preprocessing import StandardScaler
from sklearn.utils import shuffle
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, LSTM
import matplotlib.pyplot as plt
import requests

In [None]:
AlphaVantage_api_key = "INSERT-YOUR-API-KEY-HERE"
cc = CryptoCurrencies(key=AlphaVantage_api_key, output_format='pandas')

# get data
df, meta_data = cc.get_digital_currency_daily(symbol='BTC', market='USD')

# prep column names, index etc.
cols = ['1a. open (USD)', '2a. high (USD)', '3a. low (USD)', '4a. close (USD)', '5. volume']
df.index = pd.to_datetime(df.index)
df = df[cols].sort_index()
df.columns = ['open', 'high', 'low', 'close', 'vol']

In [None]:
# WYKRES
df[['open', 'high', 'low', 'close']].plot()
ax = df['vol'].plot(secondary_y=True, legend=True)
ax.set_ylabel('vol')

In [None]:
# WYKRES - pojedyncza probka danych
df[['open', 'high', 'low', 'close']][100:101].plot(style='o')
ax = df['vol'][100:101].plot(secondary_y=True, style='o', legend=True)
ax.set_ylabel('vol')

In [None]:
# ROLLINg WINDOW
window_size = 30
for i in range (0,25,5):
    df[['open', 'high', 'low', 'close']][i:i+window_size].plot(ylim=[5750,8000], title='start date:' + str(df.index[i].date()))
    ax = df['vol'][i:i+window_size].plot(secondary_y=True, legend=True)
    ax.set_ylabel('vol')
    ax.figure.savefig('rolling_window_' + str(i) + '.png')

In [None]:
df.shape

In [None]:
test_size = 100 

# (1) dane stacjonarne
df_pct = df.pct_change()[1:]

# (2) Y
y = df['close'].pct_change().shift(periods=-1).fillna(0)[1:]

# (3) Normalizacja
scaler_df = StandardScaler() 

#budujuemy scaler na danych TRAIN only
scaler_df.fit_transform(df_pct[:-test_size])

#aplikujemy skaler na dane
x_normalized = scaler_df.transform(df_pct)

In [None]:
# (4) create rolling window
n, feat = np.shape(x_normalized)
# declare new x
x_ts = np.zeros((n-window_size,window_size,feat))
# create time series
for i in range(window_size,n):
    x_ts[i-window_size] = x_normalized[i-window_size+1:i+1]
x_ts.shape

In [None]:
#wykres ostatniej próbki danych
fig, ax1 = plt.subplots()
ax2 = ax1.twinx()
ax1.plot(x_ts[-1,:,:4])
ax2.plot(x_ts[-1,:,4])
ax1.set_ylabel('$')
ax2.set_ylabel('vol')
ax1.legend(df.columns[:4], loc=2)
ax2.legend(df.columns[4], loc=1)
plt.show()

In [None]:
# (5) Podzial TRAIN/TEST
x_train = x_ts[:-test_size]
x_test = x_ts[-test_size:]

y_train = y[window_size:-test_size]
y_test = y[-test_size:]

assert(len(x_train)==len(y_train))
assert(len(x_test)==len(y_test))

# (6) Mieszanie próbek TRAIN
x_train, y_train = shuffle(x_train, y_train, random_state=0)

In [None]:
# (7a) definicja modelu
def my_model(feat, window_size):
    
    x_input = Input(shape = (window_size,feat))
    
    x = LSTM(20, name='lstm')(x_input)
    x = Dense(20, name='dense')(x)
    x_output = Dense(1, activation='linear', name='out')(x)

    model = Model(inputs = x_input, outputs = x_output, name='way2becomefcknrich')
    
    return model

In [None]:
# (7) Uczenie sieci na danych TRAIN
#num features
_, window_size, feat = np.shape(x_train)

model = my_model(feat, window_size)
model.compile(loss='mean_squared_error')
model.summary()

model.fit(x_train, y_train, epochs=20)

In [None]:
# (8) Predykcja dla TEST
y_predicted = model.predict(x_test)

df_pred = pd.concat([y_test, pd.DataFrame(y_predicted, index=y_test.index, columns=['prediction'])], axis=1)
df_pred[df_pred.index>'2021-01-01'].plot(grid=True)

# (9) Wypłata zarobionej gotówki
mask = np.sign(df_pred['close']) == np.sign(df_pred['prediction'])
acc = np.sum(mask) / len(df_pred)
display(acc)

trans_positive = df_pred['close'][mask].abs()
trans_loss = df_pred['close'][~mask].abs()

print('accuracy:%.2f%% trans_positive:%.2f%% trans_loss:%.2f%%' %(100*acc, 100*trans_positive.sum(), 100*trans_loss.sum()))
print('zarobek netto:%.2f%%' %(100*trans_positive.sum() - 100*trans_loss.sum()))

In [None]:
#to samo dla CDPROJEKT. Dane z https://www.gpw.pl/archiwum-notowan
df = pd.read_excel('cdprojekt.xls', index_col=0)
df = df[['Kurs otwarcia',	'Kurs max',	'Kurs min',	'Kurs zamknięcia',	'Wolumen']]
df.columns = ['open', 'high', 'low', 'close', 'vol']

In [None]:
#wszystkie kroki w jednej funkcji
def do_magic(df_in, window_size = 30, test_size = 100, n_epochs = 100):
    # (1) dane stacjonarne
    df_pct = df_in.pct_change()[1:]

    # (2) Y
    y = df_in['close'].pct_change().shift(periods=-1).fillna(0)[1:]

    # (3) Normalizacja
    scaler_df = StandardScaler() 

    #budujuemy scaler na danych TRAIN only
    scaler_df.fit_transform(df_pct[:-test_size])

    #aplikujemy skaler na dane
    x_normalized = scaler_df.transform(df_pct)


    # (4) create rolling window
    n, feat = np.shape(x_normalized)
    # declare new x
    x_ts = np.zeros((n-window_size,window_size,feat))
    # create time series
    for i in range(window_size,n):
        x_ts[i-window_size] = x_normalized[i-window_size+1:i+1]


    # (5) Podzial TRAIN/TEST
    x_train = x_ts[:-test_size]
    x_test = x_ts[-test_size:]

    y_train = y[window_size:-test_size]
    y_test = y[-test_size:]

    assert(len(x_train)==len(y_train))
    assert(len(x_test)==len(y_test))

    
    # (6) Mieszanie próbek TRAIN
    x_train, y_train = shuffle(x_train, y_train, random_state=0)

    
    # (7) Uczenie sieci na danych TRAIN
    _, window_size, feat = np.shape(x_train)

    model = my_model(feat, window_size)
    model.compile(loss='mean_squared_error')
    #model.summary()

    model.fit(x_train, y_train, epochs=n_epochs, verbose=False)

    
    # (8) Predykcja dla TEST
    y_predicted = model.predict(x_test)

    df_pred = pd.concat([y_test, pd.DataFrame(y_predicted, index=y_test.index, columns=['prediction'])], axis=1)
    df_pred[df_pred.index>'2021-01-01'].plot(grid=True)


    # (9) Wypłata zarobionej gotówki
    mask = np.sign(df_pred['close']) == np.sign(df_pred['prediction'])
    acc = np.sum(mask) / len(df_pred)
    #transakcje zarobione/stratne
    trans_positive = df_pred['close'][mask].abs()
    trans_loss = df_pred['close'][~mask].abs()
    #transkacje long/short
    mask_long = np.sign(df_pred['prediction']) > 0
    gain_long = df_pred['close'][mask_long].sum()
    gain_short = df_pred['close'][~mask_long].sum()

    print('accuracy:%.2f%% trans_positive:%.2f%% trans_loss:%.2f%%' %(100*acc, 100*trans_positive.sum(), 100*trans_loss.sum()))
    print('zarobek netto:%.2f%%' %(100*trans_positive.sum() - 100*trans_loss.sum()))
    print('\tz czego long_gain:%.2f%% short_gain:%.2f%%' %(100*gain_long, -100*gain_short))

In [None]:
#train/predict w 1 kroku
do_magic(df, window_size = 30, test_size = 100, n_epochs = 20)