In [1]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.preprocessing import MinMaxScaler

In [2]:
# Set the random seed
from numpy.random import seed
seed(1)
from tensorflow import random
random.set_seed(2)

In [3]:
# Load the fear and greed sentiment data for Bitcoin
df = pd.read_csv('btc_sentiment.csv', index_col="date", infer_datetime_format=True, parse_dates=True)
df = df.drop(columns="fng_classification")

# Load the historical closing prices for bitcoin
df2 = pd.read_csv('btc_historic.csv', index_col="Date", infer_datetime_format=True, parse_dates=True)['Close']
df2 = df2.sort_index()

# Concatenate data 
df = df.join(df2, how='inner')
df.head()

Unnamed: 0,fng_value,Close
2018-02-01,30,9114.719727
2018-02-02,15,8870.820313
2018-02-03,40,9251.269531
2018-02-04,24,8218.049805
2018-02-05,11,6937.080078


In [4]:
# This function accepts the column number for the features (X) and the target (y)
# It chunks the data up with a rolling window of Xt-n to predict Xt
# It returns a numpy array of X any y
def window_data(df, window, feature_col_number, target_col_number):
    X = []
    y = []
    for i in range(len(df) - window - 1):
        features = df.iloc[i:(i + window), feature_col_number]
        target = df.iloc[(i + window), target_col_number]
        X.append(features)
        y.append(target)
    return np.array(X), np.array(y).reshape(-1, 1)

In [5]:
# Predict closing prices using provided window size
def set_X_y(df, window, feature_col, target_col):
    
    X, y = window_data(df, window, feature_col, target_col)
    return X, y

In [6]:
# Use 70% of the data for training and the remaineder for testing
def split_X_y(X, y):
    
    split = int(.7 * len(X))
    X_train = X[:split-1]
    X_test = X[split:]
    y_train = y[:split-1]
    y_test = y[split:]
    
    return X_train, X_test, y_train, y_test

In [7]:
# Use MinMaxScaler to scale the data between 0 and 1
def scale_X_y(X, y, X_train, X_test, y_train, y_test):
    
    mmscaler = MinMaxScaler()
    mmscaler.fit(X)
    X_train = mmscaler.transform(X_train)
    X_test = mmscaler.transform(X_test)
    mmscaler.fit(y)
    y_train = mmscaler.transform(y_train)
    y_test = mmscaler.transform(y_test)
    
    # Reshape the features for the model
    X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
    X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
    
    return X_train, X_test, y_train, y_test

In [8]:
# Build the LSTM model
def build_compile_model(X_train, number_units):
    
    model = Sequential()
    
    dropout_fraction = 0.2
    
    # Layer 1
    model.add(LSTM(
        units=number_units,
        return_sequences=True,
        input_shape=(X_train.shape[1], 1))
            )
    model.add(Dropout(dropout_fraction))
    
    # Layer 2
    model.add(LSTM(
        units=number_units,
        return_sequences=True)
             )
    model.add(Dropout(dropout_fraction))
    
    # Layer 3 
    model.add(LSTM(
        units=number_units)
             )
    model.add(Dropout(dropout_fraction))
    
    # Output layer
    model.add(Dense(1))
    
    # Compiple the model
    model.compile(optimizer='adam', loss='mean_squared_error')
    
    return model    

In [9]:
def find_best_model(predictor):
    
    # Logic to use closing price as predictor
    if predictor == 'close':
        feature_col = 1
    
    # Logic to use Fear and Greed score as predictor
    if predictor == 'fng':
        feature_col = 0
    
    target_col = 1
    
    # Try a window size from 1 to 10 and see how performace changes
    for window in range(1, 11):
        
        # Set X and y
        X, y = set_X_y(df, window, feature_col, target_col)
        
        # Split into train and test data
        X_train, X_test, y_train, y_test = split_X_y(X, y)
        
        # Scale the data between 1 and 0 and reshape features
        X_train, X_test, y_train, y_test = scale_X_y(X, y, X_train, X_test, y_train, y_test)
        
        # Build and compiple the LSTM model with various number of units 
        for number_units in range(3, 9):
            
            model = build_compile_model(X_train, number_units)
            
            # Train the model with various batch size
            for batch_size in range(1, 3):
                
                model.fit(X_train, y_train, epochs=10, shuffle=False, batch_size=1, verbose=1)
                
                # Evaluate the model performance
                best_score = 1
                
                score = model.evaluate(X_test, y_test)
                
                # Logic to determine best score
                if score < best_score:
                    best_score = score
                    best_model = model 
                    best_window = window
                    best_number_units = number_units
                    
    return best_score, best_model, best_window, best_number_units

In [10]:
close_best_score, close_best_model, close_best_window, close_best_number_units = find_best_model('close')

Train on 377 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Train on 377 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Train on 377 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Train on 377 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Train on 377 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Train on 377 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Train on 377 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Train on 377 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


In [11]:
fng_best_score, fng_best_model, fng_best_window, fng_best_number_units = find_best_model('fng')

Train on 377 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Train on 377 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Train on 377 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Train on 377 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Train on 377 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Train on 377 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Train on 377 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Train on 377 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


In [15]:
# Print summary for FNG as predictor
print(f"FNG best score: {fng_best_score}, best window size: {fng_best_window}, best number of units: {fng_best_number_units} \n")

# Print summary for Close as predictor
print(f"Close best score: {close_best_score}, best window size: {close_best_window}, best number of units: {close_best_number_units}")

FNG best score: 0.10816842541098595, best window size: 10, best number of units: 8 

Close best score: 0.02607108948286623, best window size: 10, best number of units: 8
