In [7]:
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)  # ignore warnings
import os
import pandas as pd
import yfinance as yf
import numpy as np
import pickle
import errno
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from keras.models import Sequential
from keras.layers import Dense
from keras.callbacks import ModelCheckpoint
from keras.optimizers import Adam

%run helper_functions.ipynb

# Get  Data

### Historical Data and Relevant Features
The data will always be set up as a dataframe where the first column is the price, second is return, and rest are explanatory variables.
From data exploration we know that the most relevant features to predict return in BTC price are distance between Moving Average and btc price

In [8]:
hist = get_yfinance_data('BTC-USD')[['Close']].rename(columns = {'Close': 'Btc Close'}) # get btc hist
hist['Btc Returns'] = hist['Btc Close'].pct_change() # get btc returns

# Lagged Moving average - Btc price
windows = [3, 5, 7, 10, 20, 50, 100, 200] # set moving average windows
for window in windows:
    hist['Btc Close - ' + 'Lagged ' +str(window) + 
         'd Avg'] = hist['Btc Close'] - moving_average(hist['Btc Close'], window=window, lag=1)

### Data cleaning and transformation
- Remove NA's and INF's
- Normalize 

In [9]:
hist = hist.dropna()

# Model

### Train/Test split

In [10]:
x = hist[hist.columns[2:]].values
y = hist[hist.columns[1]].values

# Warn if there's any data point in training or testing with NA or INF
if np.any(np.isnan(x)) and np.any(np.isnan(y)) or not np.all(np.isfinite(x)) and np.all(np.isfinite(y)):
    print('Your data has NAs or INFs, could fail with some models')

# divide data between training and testing
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state=42)

### Train Model

We will train a NN to take full advantage of nonlinear interactions between features

In [11]:
model = LinearRegression().fit(x_train, y_train)

### Model metrics
All model metrics

In [12]:
pred = model.predict(x_test) 
model_metrics(pred, y_test, 0.001)

Testing set size: 429
Model guessed up or down movement correctly 0.6107226107226107%
mae: 0.02008956663892932
mse: 0.0009767436074974122
R^2: 0.31595344345533405


In [13]:
pickle.dump(model, open('model_weights.sav', 'wb')) # save model weights