In [1]:
%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [2]:
import pandas as pd
import json
from datetime import datetime
import numpy as np
from fbprophet import Prophet
import pickle
import math

# Read Bitstamp Data

First, let's read in the data from Kaggle.

In [4]:
df = pd.read_csv('bitcoin-historical-data/bitstampUSD_1-min_data_2012-01-01_to_2019-03-13.csv')

In [5]:
df.tail()

Unnamed: 0,Timestamp,Open,High,Low,Close,Volume_(BTC),Volume_(Currency),Weighted_Price
3778812,1552434960,,,,,,,
3778813,1552435020,,,,,,,
3778814,1552435080,3860.09,3861.05,3860.09,3861.05,0.378637,1461.770077,3860.606378
3778815,1552435140,3860.18,3860.18,3859.74,3859.74,1.056403,4077.863045,3860.13908
3778816,1552435200,3861.37,3862.01,3861.37,3861.95,0.198124,765.149137,3861.967464


In [6]:
df['ds'] = df['Timestamp'].apply(lambda x: datetime.fromtimestamp(x))
df = df.set_index(df['ds'])

# Train Model Using Prophet

Next, let's take the log of weighted price and train our model using that feature.

In [7]:
df['y'] = np.log(df['Weighted_Price'])

In [8]:
df_subset = df[df['ds'] >= pd.Timestamp('2016-01-01')]

In [9]:
m = Prophet()

In [10]:
%time m.fit(df_subset[['ds','y']]);

  elif np.issubdtype(np.asarray(v).dtype, float):


CPU times: user 1h 23min 40s, sys: 4min 24s, total: 1h 28min 5s
Wall time: 1h 47min 25s


<fbprophet.forecaster.Prophet at 0x11a416f60>

Only run the below if you want to save a **NEW** model

In [None]:
with open('btc_model.pickle', 'wb') as pickle_file:
    pickle.dump(m, pickle_file)
    pickle_file.close()

Use this if you've already trained the model

In [None]:
with open('btc_model.pickle', 'rb') as pickle_file:
    m = pickle.Unpickler(pickle_file)
    m = m.load()

# Generate Prediction

Now, let's predict out prices using the model we just trained.

In [None]:
future = m.make_future_dataframe(periods=150000, freq='1min', include_history=False)
future.tail()

In [None]:
%time fcst = m.predict(future)

We can use our forecast (`fcst`) to plot the prediction we just made:

In [None]:
m.plot(fcst);

Now let's convert our `log(price)` back to price and calculate whether the minute is a buy or sell minute

In [None]:
fcst['yhat_exp'] = fcst['yhat'].apply(lambda x: math.exp(x))

In [None]:
output = fcst[fcst['ds'] > '2018-01-01'][['ds','yhat_exp']]

In [None]:
output['delta'] = output.yhat_exp.diff()

In [None]:
output = output[output['ds'] > '2017-12-25']

In [None]:
def choose_side(x):
    if x > 0:
        return 'buy'
    else: 
        return 'sell'

In [None]:
output['side'] = output['delta'].apply(lambda x: choose_side(x))

We can count the number of buy vs. sell minutes with:

In [None]:
output.groupby('side')['ds'].count()

# Store Output

Save file to CSV

In [None]:
output[['ds','side']].to_csv('2018_full_trade_forecast.csv', index=False)

# Analyze Output

Generate a components plot

In [None]:
fcst_sampled = fcst[fcst['ds'] > '2018-01-01 00:00:00']

In [None]:
a = m.plot_components(fcst_sampled)

You can save the components plot to a file if you'd like

In [None]:
a.savefig('components_plot.png', dpi=200)