In [54]:
import numpy as np
import pandas as pd
import hvplot.pandas
import tensorflow as tf
from pathlib import Path

# API import
import requests
import json


In [118]:
ticker = "ETH"
frequency = "1hour"
epoch = "1620360000"
response_data = requests.get(f'https://api.kucoin.com/api/v1/market/candles?type={frequency}&symbol={ticker}-USDT&startAt={epoch}&endAt=0&limit=10000').json()

In [119]:
df = pd.DataFrame(response_data['data'])

df[0] = pd.to_datetime(df[0], unit='s')
df.columns =['Date', 'Opening price',  'Closing price' , 'Highest price' , 'Lowest price' , 'Transaction volume' ,  'Transaction amount']
df.sort_values(by=['Date'], inplace=True)
df.set_index('Date', inplace=True)
df = df.astype(float)

df.tail()


Unnamed: 0_level_0,Opening price,Closing price,Highest price,Lowest price,Transaction volume,Transaction amount
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-01-20 20:00:00,3215.74,3195.28,3219.04,3184.0,3806.383624,12177060.0
2022-01-20 21:00:00,3195.29,3080.74,3197.26,3080.04,8957.291509,28069910.0
2022-01-20 22:00:00,3080.77,3061.41,3106.49,3022.74,8042.983697,24703890.0
2022-01-20 23:00:00,3061.41,3000.95,3061.81,2986.97,8846.349723,26698460.0
2022-01-21 00:00:00,3000.41,2986.14,3008.73,2981.42,677.058938,2028140.0


In [55]:
# API
ticker = "ETH"
limit ="2000"

price_url = f"https://min-api.cryptocompare.com/data/v2/histoday?fsym={ticker}&tsym=USD&limit=2000"

price_response_data = requests.get(price_url).json()
data_price = pd.DataFrame(price_response_data["Data"]["Data"])

data_price.head()

Unnamed: 0,time,high,low,open,volumefrom,volumeto,close,conversionType,conversionSymbol
0,1469836800,657.55,652.06,655.43,19354.41,12721755.54,654.74,direct,
1,1469923200,654.92,621.42,654.74,62887.74,39918066.14,621.87,direct,
2,1470009600,627.89,603.55,621.87,66660.27,41229298.83,607.0,direct,
3,1470096000,613.75,471.36,607.0,130913.66,75338595.54,513.43,direct,
4,1470182400,573.06,514.09,513.43,85930.35,47170751.56,566.44,direct,


In [56]:
# Transform to DataFrame
data_price.drop(columns = ["volumefrom" , "volumeto" , "conversionType" , "conversionSymbol"], inplace =True)

data_price.columns =['Date', 'Highest price',  'Lowest price' ,  'Opening price' , 'Closing price']
data_price['Date'] = pd.to_datetime(data_price['Date'], unit='s')

data_price.set_index('Date', inplace=True)

data_price.head()

Unnamed: 0_level_0,Highest price,Lowest price,Opening price,Closing price
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2016-07-30,657.55,652.06,655.43,654.74
2016-07-31,654.92,621.42,654.74,621.87
2016-08-01,627.89,603.55,621.87,607.0
2016-08-02,613.75,471.36,607.0,513.43
2016-08-03,573.06,514.09,513.43,566.44


In [74]:
# Select only dates starting end 2019, because the price curve is distincly different
df = data_price.loc["2018-1-1" : "2020-02-27"].drop(columns = ["Highest price","Lowest price", "Opening price"])
df.tail()

Unnamed: 0_level_0,Closing price
Date,Unnamed: 1_level_1
2020-02-23,9977.39
2020-02-24,9664.21
2020-02-25,9316.18
2020-02-26,8793.5
2020-02-27,8821.5


In [115]:
# This function accepts the column number for the features (X) and the target (y)
# It chunks the data up with a rolling window of Xt-n to predict Xt
# It returns a numpy array of X any y
def window_data(df, window, feature_col_number, target_col_number):
    X = []
    y = []
    for i in range(len(df) - window - 1):
        features = df.iloc[i:(i + window), feature_col_number]
        target = df.iloc[(i + window), target_col_number]
        X.append(features)
        y.append(target)
    return np.array(X), np.array(y).reshape(-1, 1)

In [134]:
# Predict Closing Prices using a 10 day window of previous closing prices
# Then, experiment with window sizes anywhere from 1 to 10 and see how the model performance changes
window_size = 10

# Column index 3 is the `Close` column
feature_column = 1
target_column = 1
X, y = window_data(df, window_size, feature_column, target_column)

In [135]:
# Use 70% of the data for training and the remaineder for testing
split = int(0.7 * len(X))
X_train = X[: split]
X_test = X[split:]
y_train = y[: split]
y_test = y[split:]

In [136]:
# Use the MinMaxScaler to scale data between 0 and 1.
from sklearn.preprocessing import MinMaxScaler

# Create a MinMaxScaler object
X_scaler = MinMaxScaler()
Y_scaler = MinMaxScaler()
# Fit the MinMaxScaler object with the training feature data X_train
X_scaler.fit(X_train)

# Scale the features training and testing sets
X_train = X_scaler.transform(X_train)
X_test = X_scaler.transform(X_test)

# Fit the MinMaxScaler object with the training target data y_train
Y_scaler.fit(y_train)

# Scale the target training and testing sets
y_train = Y_scaler.transform(y_train)
y_test = Y_scaler.transform(y_test)


In [137]:
# Reshape the features for the model
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

---

## Build and Train the LSTM RNN

In this section, you will design a custom LSTM RNN and fit (train) it using the training data.

You will need to:
1. Define the model architecture
2. Compile the model
3. Fit the model to the training data

### Hints:
You will want to use the same model architecture and random seed for both notebooks. This is necessary to accurately compare the performance of the FNG model vs the closing price model. 

In [138]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

In [139]:
# Build the LSTM model. 
# The return sequences need to be set to True if you are adding additional LSTM layers, but 
# You don't have to do this for the final layer. 
# Note: The dropouts help prevent overfitting
# Note: The input shape is the number of time steps and the number of indicators
# Note: Batching inputs has a different input shape of Samples/TimeSteps/Features

model = Sequential()

number_units = window_size
dropout_fraction = 0.2

# Layer 1
model.add(LSTM(
    units=3*number_units,
    return_sequences=True,
    input_shape=(X_train.shape[1], 1))
    )
model.add(Dropout(dropout_fraction))

# Layer 2
model.add(LSTM(units=number_units, return_sequences=True))


# Layer 3
model.add(LSTM(units=number_units))
model.add(Dropout(dropout_fraction))

# Output layer
model.add(Dense(1))


In [140]:
# Compile the model
model.compile(optimizer="adam", loss="mean_squared_error")

In [141]:
# Summarize the model
model.summary()

Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_18 (LSTM)              (None, 10, 30)            3840      
                                                                 
 dropout_12 (Dropout)        (None, 10, 30)            0         
                                                                 
 lstm_19 (LSTM)              (None, 10, 10)            1640      
                                                                 
 lstm_20 (LSTM)              (None, 10)                840       
                                                                 
 dropout_13 (Dropout)        (None, 10)                0         
                                                                 
 dense_6 (Dense)             (None, 1)                 11        
                                                                 
Total params: 6,331
Trainable params: 6,331
Non-traina

In [142]:
# Train the model
# Use at least 10 epochs
# Do not shuffle the data
# Experiement with the batch size, but a smaller batch size is recommended
batch = 1 
epoch = 10

model.fit(X_train, y_train , validation_split=0.2, epochs=epoch, shuffle=False, batch_size=batch, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x23995f9a3c8>

---

## Model Performance

In this section, you will evaluate the model using the test data. 

You will need to:
1. Evaluate the model using the `X_test` and `y_test` data.
2. Use the X_test data to make predictions
3. Create a DataFrame of Real (y_test) vs predicted values. 
4. Plot the Real vs predicted values as a line chart

### Hints
Remember to apply the `inverse_transform` function to the predicted and y_test values to recover the actual closing prices.

In [143]:
# Evaluate the model
model.evaluate(X_test, y_test, return_dict=True)



{'loss': 0.32202884554862976}

In [144]:
# Make some predictions
prediction =  model.predict(X_test)

In [145]:
# Recover the original prices instead of the scaled version
predicted_prices = Y_scaler.inverse_transform(prediction)
real_prices = Y_scaler.inverse_transform(y_test.reshape(-1, 1))

In [146]:
# Create a DataFrame of Real and Predicted values
stocks = pd.DataFrame({
    "Real": real_prices.ravel(),
    "Predicted": predicted_prices.ravel()
}, index = df.index[-len(real_prices): ]) 
stocks.head()

Unnamed: 0_level_0,Real,Predicted
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-01-02 10:00:00,3753.94,3975.753418
2022-01-02 11:00:00,3743.55,3976.384277
2022-01-02 12:00:00,3743.67,3976.854248
2022-01-02 13:00:00,3754.23,3976.972412
2022-01-02 14:00:00,3744.96,3977.106445


In [147]:
# Plot the real vs predicted values as a line chart
stocks.hvplot()