# Imports

In [153]:
import math
import yfinance as yf
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler 
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from pycoingecko import CoinGeckoAPI
from sklearn.model_selection import train_test_split
import requests
import pandas as pd
import plotly.graph_objects as go
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
import plotly.express as px
from mplfinance.original_flavor import candlestick_ohlc
import matplotlib.dates as mpl_dates
import warnings
from sklearn.model_selection import TimeSeriesSplit
warnings.filterwarnings("ignore")

# Import data

In [154]:


def get_crypto_price(symbol, exchange, start_date = None):
    api_key = 'YOUR API KEY'
    api_url = f'https://www.alphavantage.co/query?function=DIGITAL_CURRENCY_DAILY&symbol={symbol}&market={exchange}&apikey={api_key}'
    raw_df = requests.get(api_url).json()
    df = pd.DataFrame(raw_df['Time Series (Digital Currency Daily)']).T
    df = df.rename(columns = {'1a. open (USD)': 'open', '2a. high (USD)': 'high', '3a. low (USD)': 'low', '4a. close (USD)': 'close', '5. volume': 'volume'})
    for i in df.columns:
        df[i] = df[i].astype(float)
    df.index = pd.to_datetime(df.index)
    df = df.iloc[::-1].drop(['1b. open (USD)', '2b. high (USD)', '3b. low (USD)', '4b. close (USD)', '6. market cap (USD)'], axis = 1)
    if start_date:
        df = df[df.index >= start_date]
    return df

df = get_crypto_price(symbol = 'SHIB', exchange = 'USD', start_date = '2021-1-1')
df

Unnamed: 0,open,high,low,close,volume
2021-05-10,0.000014,0.000050,0.000014,0.000035,9.530231e+13
2021-05-11,0.000035,0.000038,0.000028,0.000030,6.451279e+13
2021-05-12,0.000030,0.000031,0.000018,0.000018,8.690914e+13
2021-05-13,0.000018,0.000024,0.000015,0.000021,9.125488e+13
2021-05-14,0.000021,0.000022,0.000018,0.000018,4.092785e+13
...,...,...,...,...,...
2022-02-03,0.000021,0.000021,0.000020,0.000021,3.687629e+12
2022-02-04,0.000021,0.000022,0.000021,0.000022,6.004844e+12
2022-02-05,0.000022,0.000024,0.000022,0.000023,7.828548e+12
2022-02-06,0.000022,0.000029,0.000022,0.000028,1.812868e+13


# Create normalised data

In [155]:
scaler = MinMaxScaler()
norm_df = pd.DataFrame(scaler.fit_transform(df),columns=df.columns)
norm_df.head()

Unnamed: 0,open,high,low,close,volume
0,0.112478,0.531713,0.146906,0.393194,0.384932
1,0.393467,0.385784,0.369707,0.335247,0.259352
2,0.334973,0.306926,0.20228,0.168375,0.350699
3,0.167828,0.210571,0.166124,0.206779,0.368424
4,0.206915,0.194411,0.212704,0.170972,0.163157


# Timeseries splitting

In [156]:
def TimeSeriesSplitter(norm_input_data, n_features = 4, sequence_length = 5, output_length = 1):
    '''
    # Time series split
    Provides train/test indices to split time series data samples that are observed at fixed time intervals, in train/test sets. 
    In each split, test indices must be higher than before, and thus shuffling in cross validator is inappropriate.
    '''
    
    X,y = [],[]
    batch_size = len(norm_input_data)
    
    # create sequence splits
    tscv = TimeSeriesSplit(gap = 0, max_train_size = sequence_length, n_splits = int(batch_size / output_length) - int(sequence_length / output_length), test_size = output_length)
    
    # create sequences
    for X_index, y_index in tscv.split(norm_input_data.iloc[:,3]):
        X.append([[norm_input_data.iloc[i,f] for f in range(n_features)] for i in X_index]) 
        y.append([[[norm_input_data.iloc[i,3]]] for i in y_index])

    # transform target variable: becomes 1 if price increases otherwise 0 (classification)       
    last_price = 0 # initialize first known price in dataset
    for i in range(1,len(y)):
        if y[i][0][0] > last_price: # price increasement
            y[i][0][0] = 1
        else: # price decreasement
            y[i][0][0] = 0
        last_price = y[i][0][0]
        
    return X, y

In [157]:
norm_input_data = norm_df
sequence_length = 6
output_length = 1
n_features = 4
X,y = [],[]
batch_size = len(norm_input_data)
    
# create sequence splits
tscv = TimeSeriesSplit(gap = 0, max_train_size = sequence_length, n_splits = int(batch_size / output_length) - int(sequence_length / output_length), test_size = output_length)
    
    # create sequences
for X_index, y_index in tscv.split(norm_input_data.close):
     X.append([[norm_input_data.iloc[i,f] for f in range(n_features)] for i in X_index]) 
     y.append([[[norm_input_data.iloc[i,3]]] for i in y_index])

# transform target variable: becomes 1 if price increases otherwise 0 (classification)       
last_price = 0 # initialize first known price in dataset
for i in range(1,len(y)):
    if y[i][0][0][0] > last_price: # price increasement
        y[i][0][0][0] = 1
    else: # price decreasement
        y[i][0][0][0] = 0
        last_price = y[i][0][0][0]     


# RNN Classifier

In [158]:
from keras.layers import *
model = keras.Sequential() # initialize sequential model
    
model.add(SimpleRNN(64, input_shape = (6, 4), activation = 'sigmoid', return_sequences = False)) # add input layer
#model.add(Dropout(0.5))
model.add(Dense(64, activation="sigmoid")) # add dense layer with X internal units (neurons)
#model.add(Dropout(0.5))
model.add(Dense(1, activation="sigmoid")) # add dense layer as output layer (sigmoid activation as we're interested in the probability)
#model.add(Dropout(0.5))

opt = tf.keras.optimizers.Adam(learning_rate = 0.01) # define optimizer
model.compile(loss='binary_crossentropy', optimizer = opt, metrics = ['accuracy']) # compile model

model.summary() # print model summary

Model: "sequential_14"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 simple_rnn_14 (SimpleRNN)   (None, 64)                4416      
                                                                 
 dense_26 (Dense)            (None, 64)                4160      
                                                                 
 dense_27 (Dense)            (None, 1)                 65        
                                                                 
Total params: 8,641
Trainable params: 8,641
Non-trainable params: 0
_________________________________________________________________


# Train process

In [159]:
def TrainValidSplit(X, y):
    
    def __init__(self, X_train, y_train, x_valid, y_valid):
        self.X_train = X_train
        self.y_train = y_train
        self.X_valid = X_valid
        self.y_valid = y_valid
        
    total_sequences = len(X)

    # define train, val, test sizes from config
    train_size  = int(total_sequences * 0.7)
    val_size    = train_size + int(total_sequences * 0.3)

    # define train, val, test sets for X and y
    X_train, y_train  = np.array(X[:train_size]), np.array(y[:train_size])
    X_valid, y_valid  = np.array(X[train_size:val_size]), np.array(y[train_size:val_size])

    # reshape to target variable to 3 dimensional -> (batch x timesteps x features) required for RNN 
    y_train = np.reshape(y_train, (y_train.shape[0], y_train.shape[1], 1)).astype(np.float32)
    y_valid = np.reshape(y_valid, (y_valid.shape[0], y_valid.shape[1], 1)).astype(np.float32)

    return X_train, X_valid, y_train, y_valid


In [160]:
X_train, X_valid, y_train, y_valid = TrainValidSplit(X,y)

In [161]:
def trainModel(X_train, X_valid, y_train, Y_valid, n_features = 5, sequence_length = 1):
    model.fit(X_train, y_train, epochs = 10, validation_data = (X_valid, y_valid), verbose = 0) # train model

    model.evaluate(X_valid, y_valid) # evaluate model
    
    

In [162]:
trainModel(X_train, X_valid, y_train, y_valid)



In [163]:
model.predict(X_valid)[0][0] > 0.5

True