In [1]:
# Initial imports
import numpy as np
import pandas as pd


# API import
import requests
import json

# Data cleaning
from datetime import datetime


# Plot import
%matplotlib inline

In [2]:
# API
ticker = "BTC"
limit ="2000"

price_url = f"https://min-api.cryptocompare.com/data/v2/histoday?fsym={ticker}&tsym=USD&limit=2000"

price_response_data = requests.get(price_url).json()
data_price = pd.DataFrame(price_response_data["Data"]["Data"])

data_price.head()


Unnamed: 0,time,high,low,open,volumefrom,volumeto,close,conversionType,conversionSymbol
0,1469923200,654.92,621.42,654.74,62887.74,39918066.14,621.87,direct,
1,1470009600,627.89,603.55,621.87,66660.27,41229298.83,607.0,direct,
2,1470096000,613.75,471.36,607.0,130913.66,75338595.54,513.43,direct,
3,1470182400,573.06,514.09,513.43,85930.35,47170751.56,566.44,direct,
4,1470268800,584.89,559.5,566.44,44823.44,25725471.16,576.22,direct,


In [3]:
# Transform to DataFrame
data_price.drop(columns = ["volumefrom" , "volumeto" , "conversionType" , "conversionSymbol"], inplace =True)

data_price.columns =['Date', 'Highest price',  'Lowest price' ,  'Opening price' , 'Closing price']
data_price['Date'] = pd.to_datetime(data_price['Date'], unit='s')

data_price.set_index('Date', inplace=True)

data_price.head()

Unnamed: 0_level_0,Highest price,Lowest price,Opening price,Closing price
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2016-07-31,654.92,621.42,654.74,621.87
2016-08-01,627.89,603.55,621.87,607.0
2016-08-02,613.75,471.36,607.0,513.43
2016-08-03,573.06,514.09,513.43,566.44
2016-08-04,584.89,559.5,566.44,576.22


In [4]:
# Select only dates starting end 2019, because the price curve is distincly different
df = data_price.loc["2019-12-1" : ]
df.head(10)

Unnamed: 0_level_0,Highest price,Lowest price,Opening price,Closing price
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-12-01,7575.5,7261.95,7571.77,7420.53
2019-12-02,7442.62,7193.09,7420.53,7320.94
2019-12-03,7420.34,7260.93,7320.94,7313.64
2019-12-04,7764.68,7110.76,7313.64,7207.78
2019-12-05,7492.96,7175.85,7207.78,7406.75
2019-12-06,7607.79,7323.86,7406.75,7558.58
2019-12-07,7640.51,7503.1,7558.58,7514.86
2019-12-08,7594.8,7414.51,7514.86,7539.7
2019-12-09,7667.04,7296.42,7539.7,7350.62
2019-12-10,7403.64,7176.42,7350.62,7233.84


In [5]:
def window_data(df, window, feature_col_number, target_col_number):
    """
    This function accepts the column number for the features (X) and the target (y).
    It chunks the data up with a rolling window of Xt - window to predict Xt.
    It returns two numpy arrays of X and y.
    """
    X = []
    y = []
    for i in range(len(df) - window -1):
        features = df.iloc[i : (i + window), feature_col_number]
        target = df.iloc[(i + window + 1), target_col_number]
        X.append(features)
        y.append(target)
    return np.array(X), np.array(y).reshape(-1, 1)

# Split Data and create predictive set

In [6]:
# Define the window size
window_size = 10


# Set the index of the feature and target columns
feature_column = 3
target_column = 3

# Create the features (X) and target (y) data using the window_data() function.
X, y = window_data(df, window_size, feature_column, target_column)


# Manually splitting the data
split = int(0.7 * len(X))

X_train = X[: split]
X_test = X[split:]

y_train = y[: split]
y_test = y[split:]

In [8]:
# Importing the MinMaxScaler from sklearn
from sklearn.preprocessing import MinMaxScaler

# Create a MinMaxScaler object
X_scaler = MinMaxScaler()
Y_scaler = MinMaxScaler()

# Fit the MinMaxScaler object with the features data X
X_scaler.fit(X_train)

# Scale the features training and testing sets
X_train = X_scaler.transform(X_train)
X_test = X_scaler.transform(X_test)

# Fit the MinMaxScaler object with the target data Y
Y_scaler.fit(y_train)

# Scale the target training and testing sets
y_train = Y_scaler.transform(y_train)
y_test = Y_scaler.transform(y_test)

In [10]:
# Reshape the features for the model
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

# Build and Train the LSTM RNN

In [11]:
# Import required Keras modules
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

In [12]:
model = Sequential()

# Initial model setup
number_units = 10
dropout_fraction = 0.2

# Layer 1
model.add(LSTM(
    units=number_units,
    return_sequences=True,
    input_shape=(X_train.shape[1], 1))
    )
model.add(Dropout(dropout_fraction))

# Layer 2
model.add(LSTM(units=number_units, return_sequences=True))
model.add(Dropout(dropout_fraction))


# Output layer
model.add(Dense(1))

In [13]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 10, 10)            480       
                                                                 
 dropout (Dropout)           (None, 10, 10)            0         
                                                                 
 lstm_1 (LSTM)               (None, 10, 10)            840       
                                                                 
 dropout_1 (Dropout)         (None, 10, 10)            0         
                                                                 
 dense (Dense)               (None, 10, 1)             11        
                                                                 
Total params: 1,331
Trainable params: 1,331
Non-trainable params: 0
_________________________________________________________________


In [32]:
# Compile the model
model.compile(optimizer="adam", loss="mean_squared_error" , metrics = ["mse", "accuracy"])

In [33]:
# Train the model
batch = 1
epoch = 10

model.fit(X_train, y_train , validation_split=0.2, epochs=epoch, shuffle=False, batch_size=batch, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x285be150188>

In [34]:
# Evaluate the model
model.evaluate(X_test, y_test, return_dict=True)



{'loss': 0.008362788707017899, 'mse': 0.008362788707017899, 'accuracy': 0.0}

In [36]:
# Make some predictions
predicted = model.predict(X_test)

print(predicted)

[[[0.6051835 ]
  [0.6841973 ]
  [0.6953402 ]
  ...
  [0.6277708 ]
  [0.62514496]
  [0.636617  ]]

 [[0.59828806]
  [0.6960156 ]
  [0.6870638 ]
  ...
  [0.62531334]
  [0.63648844]
  [0.6610455 ]]

 [[0.6121728 ]
  [0.6873703 ]
  [0.64638937]
  ...
  [0.63664234]
  [0.66091776]
  [0.63237745]]

 ...

 [[0.64849436]
  [0.73908913]
  [0.73759896]
  ...
  [0.72038996]
  [0.72041124]
  [0.70857686]]

 [[0.6512548 ]
  [0.73892033]
  [0.7498632 ]
  ...
  [0.72065544]
  [0.7085407 ]
  [0.7087021 ]]

 [[0.6506965 ]
  [0.7515569 ]
  [0.7675472 ]
  ...
  [0.7087946 ]
  [0.7086604 ]
  [0.6994872 ]]]


In [None]:
# Recover the original prices instead of the scaled version
predicted_prices = Y_scaler.inverse_transform(predicted)
real_prices = Y_scaler.inverse_transform(y_test.reshape(-1, 1))

In [None]:
# Create a DataFrame of Real and Predicted values
stocks = pd.DataFrame({
    "Actual": real_prices.ravel(),
    "Predicted": predicted_prices.ravel()
}, index = data_price.index[-len(real_prices): ]) 

# Show the DataFrame's head
stocks.tail()

In [None]:
stocks.plot(title=f"Actual Vs. Predicted {ticker} Prices");