In [1]:
%matplotlib inline
import sys
import errno
from fbprophet import Prophet
from fbprophet.plot import add_changepoints_to_plot
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import logging
logging.getLogger('fbprophet').setLevel(logging.ERROR)
import warnings
warnings.filterwarnings("ignore")

Global variables that sets the type of coin evaluated, prediction distance, the range the volatility is calculated from, and the total number of the total 

In [2]:
_coin_type = "bitcoin"
_forecast_distance=5
_volatility_range=21
_subset_period = 15
_number_subsets = 3
_change_flex = 0.5

Pulls the data from the appropriate csv file and saves it as a data frame

In [3]:
import requests

POLONIEX_OHLCV_BASEURL = 'https://poloniex.com/public?command=returnChartData&currencyPair='

def get_ohlcv_poloniex(pair='BTC_ETH', start=1435699200, end=9999999999, period=900):
    """
    returns ohlcv data for poloniex as pandas dataframe
    convert to unix timestamp using https://coderstoolbox.net/unixtimestamp/
    :param pair: str pair on poloniex
    :param start: int unix timestamp of beginning time
    :param end: int unix timestamp of ending time
    :param period: int candle width in seconds
    :return: pandas df of ohlcv data from poloniex for specified pair, times, and period
    """
    query = POLONIEX_OHLCV_BASEURL + pair + '&start=' + str(start) + '&end=' + str(end) + '&period=' + str(period)
    resp = requests.get(query)

    if resp.status_code != 200:
        raise requests.ApiError('GET /tasks/ {}'.format(resp.status_code))

    return pd.DataFrame(resp.json())

In [4]:
all_data = get_ohlcv_poloniex(pair='USDT_BTC', start=1483600958, end=9999999999, period=900)

In [5]:
# all_data = pd.read_csv("DataSources/" + _coin_type + "_hist.csv")

Prints frame data

In [6]:
all_data.describe()

Unnamed: 0,close,date,high,low,open,quoteVolume,volume,weightedAverage
count,59196.0,59196.0,59196.0,59196.0,59196.0,59196.0,59196.0,59196.0
mean,5824.77289,1510239000.0,5847.629526,5800.683728,5825.14758,71.02851,354622.5,5824.271555
std,3969.995954,15379700.0,3991.888425,3946.715239,3970.453571,111.719318,628424.8,3969.47445
min,756.0,1483601000.0,759.754,751.0,756.722973,0.0,0.0,755.019113
25%,2317.000833,1496920000.0,2327.0,2305.515729,2317.287125,10.287996,42871.37,2315.70375
50%,6175.69572,1510239000.0,6189.0,6160.0,6176.721392,35.60017,153119.1,6176.034566
75%,8183.475172,1523558000.0,8200.133759,8163.923119,8183.832008,86.317418,399464.6,8184.431215
max,19896.6873,1536877000.0,19903.440214,19780.6,19896.6873,2702.802346,17301620.0,19854.840682


Selects the appropriate amount of data from the full frame

In [7]:
_evaluated = [all_data[x*_subset_period:(x+1)*_subset_period] for x in range(_number_subsets)]

Pulls the dates and prices from the data and creates the log and volatility columns

In [8]:
dfs = [pd.DataFrame(zip(x.date , x.close), columns=['ds','price']) for x in _evaluated]
for x in range(_number_subsets):
    dfs[x]['log'] = np.log(dfs[x]['price'])
    dfs[x]['vol'] = dfs[x]['log'].rolling(_volatility_range).mean()

Shows statistics for the three columns

In [9]:
for df in dfs:
    print df.describe()

                 ds        price        log       vol
count  1.500000e+01    15.000000  15.000000  6.000000
mean   1.483608e+09  1109.255494   7.011156  7.008853
std    4.024922e+03    27.598496   0.024836  0.008731
min    1.483601e+09  1067.000000   6.972606  6.997553
25%    1.483605e+09  1086.900000   6.991085  7.002994
50%    1.483608e+09  1093.851000   6.997460  7.008392
75%    1.483611e+09  1136.696185   7.035881  7.014643
max    1.483614e+09  1146.628895   7.044582  7.020858
                 ds        price        log       vol
count  1.500000e+01    15.000000  15.000000  6.000000
mean   1.483621e+09  1000.989186   6.906807  6.897254
std    4.024922e+03    64.973126   0.064187  0.024667
min    1.483615e+09   921.120000   6.825590  6.866260
25%    1.483618e+09   948.700000   6.855085  6.880073
50%    1.483621e+09   974.597589   6.882025  6.895586
75%    1.483624e+09  1071.080706   6.976423  6.913642
max    1.483628e+09  1099.593509   7.002696  6.931528


Displays kurtosis for the three data sources

Kurtosis for log and vol is relatively low indicating that there aren't many extrema.

In [10]:
for df in dfs:
    print df.kurtosis()

ds      -1.200000
price   -1.646138
log     -1.635911
vol     -1.233750
dtype: float64
ds      -1.200000
price   -1.632971
log     -1.647914
vol     -1.262494
dtype: float64


Displays skew for the three data sources

The skew is positive meaning that that the the graph is asymmetric with a left weight

In [11]:
for df in dfs:
    print df.skew()

ds       0.000000
price    0.194915
log      0.176651
vol      0.134015
dtype: float64
ds       0.000000
price    0.468037
log      0.434948
vol      0.201862
dtype: float64


Defines a method to extract and forecast the specified data stream

In [None]:
def gen_prophets(field):
    to_return = []
    for df in dfs:
        m = Prophet(changepoint_prior_scale=_change_flex)
        df['y'] = df[field]
        m.fit(df)
        future = m.make_future_dataframe(periods=_forecast_distance)
        forecast = m.predict(future)
        forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()
        to_return.append({"m":m, "f":forecast})
    return to_return

In [None]:
prices = gen_prophets('price')

In [None]:
for price in prices:
    fig = price['m'].plot(price['f'])
    a = add_changepoints_to_plot(fig.gca(), price['m'], price['f'])
    fig.show()

In [None]:
for price in prices:
    fig = price['m'].plot_components(price['f'],uncertainty=True)
    fig.show()

In [None]:
logs = gen_prophets('log')

In [None]:
for log in logs:
    fig = log['m'].plot(log['f'])
    a = add_changepoints_to_plot(fig.gca(), log['m'], log['f'])
    fig.show()

In [None]:
for log in logs:
    fig = log['m'].plot_components(log['f'],uncertainty=True)
    fig.show()

In [None]:
vols = gen_prophets('vol')

In [None]:
for vol in vols:
    fig = vol['m'].plot(vol['f'])
    a = add_changepoints_to_plot(fig.gca(), vol['m'], vol['f'])
    fig.show()

In [None]:
for vol in vols:
    fig = vol['m'].plot_components(vol['f'],uncertainty=True)
    fig.show()

Defines method to plot the data with a red line and blue points overlayed

In [None]:
def all_df(type):
    to_return = []
    for df in dfs:
        to_return.extend(df[type])
    return to_return

def gen_line(type):
    plt.plot(range(_subset_period*_number_subsets),list(all_df(type)),color='red',linewidth=2, zorder=1)
    plt.scatter(range(_subset_period*_number_subsets), list(all_df(type)), s=40, zorder=2)

In [None]:
plt.title(r'$Price\ vs.\ Date$', fontsize=20)
plt.xlabel('time (d)')
plt.ylabel('price ($)')
gen_line('price')
plt.show()

In [None]:
plt.title(r'$Log\  vs.\ Date$', fontsize=20)
plt.xlabel('time (d)')
plt.ylabel('log ($)')
gen_line('log')
plt.show()

In [None]:
plt.title(r'$Volatility\  vs.\ Date$', fontsize=20)
plt.xlabel('time (d)')
plt.ylabel('volatility ($)')
gen_line('vol')
plt.show()