Install and import packages

In [1]:
!pip install fbprophet

import datetime
import io
import requests

from fbprophet import Prophet
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from statsmodels.tsa.arima_model import ARIMA
from tqdm.notebook import tqdm

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


  import pandas.util.testing as tm


Read prepared dataframe of hourly btc data from polygon (api)

In [2]:
df = pd.read_csv('btc.csv', index_col=0)
df

Unnamed: 0,volume,volume weighted,open price,close price,high price,low price,timestamp,number
0,1853.726683,58263.4399,58049.000000,58632.00,58710.000000,57800.00,2021-03-20 00:00:00,31574
1,671.137374,58658.3421,58636.000000,58735.64,58887.410000,58491.92,2021-03-20 01:00:00,21873
2,1171.039574,58668.4997,58751.000000,58551.42,58916.010000,58421.60,2021-03-20 02:00:00,20182
3,828.101230,58385.3884,58551.420000,58390.00,58615.000000,58187.37,2021-03-20 03:00:00,19970
4,943.265974,58396.8700,58373.211671,58231.97,58631.940000,58153.00,2021-03-20 04:00:00,18637
...,...,...,...,...,...,...,...,...
2340,639.819480,32778.6041,32725.920000,32861.00,32989.000000,32576.90,2021-06-27 19:00:00,13085
2341,630.101561,32788.5058,32823.500000,32633.23,32983.000000,32620.00,2021-06-27 20:00:00,12818
2342,1153.339324,32609.5134,32635.440000,32862.40,32885.218314,32425.77,2021-06-27 21:00:00,16810
2343,5395.979243,34002.6519,32869.000000,34422.00,34639.360000,32820.25,2021-06-27 22:00:00,56107


Plot close price

In [3]:
fig = px.line(df, x="timestamp", y="close price", title='Close price change over time')
fig.show()

Divivde data into train (80%) and test (20%)

In [4]:
# test is 20 percent of the data
train_n = int(df.shape[0] * 0.8)
train = df[:train_n].copy()
test = df[train_n:].copy()

Enter models' paramaters to be later passed to models' instances

In [5]:
models = {
    'ARMA': {'order': (5,0,2)}, 
    'ARIMA': {'order': (6,1,3)},
    'Prophet': {'growth': 'linear', 'changepoint_prior_scale': 0.7}}

Forecast close price using Prophet; Determine SMA/LMA

In [6]:
def ph_method(mdl_params, train, test):

  print('Time Series Forecasting: Prophet')
  ph = Prophet(**mdl_params)

  phdf = train[['timestamp', 'close price']]
  phdf.columns = ['ds', 'y']

  ph.fit(phdf)

  future = ph.make_future_dataframe(periods=len(test), freq='H', include_history=False)

  pred = ph.predict(future)

  wind_df = pd.DataFrame(list(train['close price']) + list(pred['yhat']), columns=['pred'])

  wind_df['SMA'] = wind_df['pred'].rolling(window=10,center=False).mean()
  wind_df['LMA'] = wind_df['pred'].rolling(window=50,center=False).mean()

  return [list(wind_df['SMA'][-len(test):]), list(wind_df['LMA'][-len(test):]), list(pred['yhat'])]

Forecast close price using ARMA/ARIMA; Determine close price on some interval and LMA

In [7]:
def arma_method(mdl_params, train, test, interval_size=5, name='Unknown'):

  obs = []
  pred = []
  train_ext = train.copy()

  for k in tqdm(range(0, len(test), interval_size), desc=f'Time Series Forecasting: {name}'):

    mdl = ARIMA(train_ext['close price'], **mdl_params)
    fmdl = mdl.fit()

    if k + interval_size > len(test):
      output = fmdl.forecast(len(test) - k)
      addit = test[k:].copy()
    else:
      addit = test[k:k+interval_size].copy()
      output = fmdl.forecast(interval_size)

    obs.append(list(output[0]))
    pred += list(output[0])
    train_ext = train_ext.append(addit).reset_index(drop=True)
  if len(obs) == 1:
    obs = obs[0]
  wind_df = pd.DataFrame(list(train['close price']) + list(pred), columns=['pred'])
  wind_df['LMA'] = wind_df['pred'].rolling(window=90,center=False).mean()
  return obs, list(wind_df['LMA'][-len(test):])

Use above methods and plot predictions with their LMAs/SMAs

In [8]:
results = {}
for key, mdl_params in models.items():
  if 'AR' in key and 'MA' in key:
    rst, lr = arma_method(mdl_params, train, test, name=key, interval_size=24)
    res = {'intervals': rst, 
           'long run': lr}
  else:
    res = ph_method(mdl_params, train, test)

  

  pls = []
  rls = []
  nm = None
  if type(res) == dict:
    nm = 'Prediction_interval '
    rps = res['long run']
    for k in res['intervals']:
      pls += k
    rls = list(res['long run'].copy())
  else:
    nm = 'Prediction '
    pls = res[-1]
    rls = res[-2]
  fig = go.Figure()
  fig.add_traces(go.Scatter(x=df['timestamp'], y=df['close price'], name='Actual'))
  fig.add_traces(go.Scatter(x=test['timestamp'], y=pls, name=nm))

  if type(res) != dict:
    fig.add_traces(go.Scatter(x=test['timestamp'], y=res[0], name='SMA'))

  fig.add_traces(go.Scatter(x=test['timestamp'], y=rls, name='LMA'))
  fig.update_layout(
    title=f"{key}: Actual VS Prediction",
    xaxis_title="timestamp",
    yaxis_title="close price",
)
  fig.show()
  results[key] = res

Time Series Forecasting: ARMA:   0%|          | 0/20 [00:00<?, ?it/s]

Time Series Forecasting: ARIMA:   0%|          | 0/20 [00:00<?, ?it/s]

Time Series Forecasting: Prophet


INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.


Determine max profit for a time series (buy/sell 1 stock/option)

In [9]:
def get_profit(prices, ac_prices=None):
    if not ac_prices:
      ac_prices = prices[:]
    profit = 0
    for i in range(1, len(prices)):
        if prices[i] > prices[i-1]:
            profit += ac_prices[i] - ac_prices[i-1]
    return profit

Determine buy/sell signals and the profit from sending these signals using intervals determined by ARMA/ARIMA and LMA

In [10]:
def sr_lr_forecast_2(sr_vals, lr_vals, ac_prices):
  buys = []
  sells = []
  rev = 0
  exp = 0
  for k in range(len(sr_vals)):
    if sr_vals[k][-1] > lr_vals[k * len(sr_vals[0]) + len(sr_vals[k]) - 1] and sr_vals[k][0] < lr_vals[k * len(sr_vals[0])]:
        buys.append(k * len(sr_vals[k]))
    elif sr_vals[k][-1] < lr_vals[k * len(sr_vals[0]) + len(sr_vals[k]) - 1] and sr_vals[k][0] > lr_vals[k * len(sr_vals[0])] and len(buys) > len(sells):
        sells.append(k * len(sr_vals[k]))
  for k in range(len(sells)):
    rev += ac_prices[sells[k]]
    exp += ac_prices[buys[k]]
  profit = rev - exp
  return [profit, buys, sells]

Determine buy/sell signals and the profit from sending these signals using SMA/LMA crossover strategy for Prophet

In [11]:
def sr_lr_forecast(sr_vals, lr_vals, ac_prices):
  buys = []
  sells = []
  rev = 0
  exp = 0
  for k in range(1, len(sr_vals)):
    if sr_vals[k] > lr_vals[k] and sr_vals[k-1] < lr_vals[k-1]:
        buys.append(k)
    elif sr_vals[k] < lr_vals[k] and sr_vals[k-1] > lr_vals[k-1] and len(buys) > len(sells):
        sells.append(k)
  for k in range(len(sells)):
    rev += ac_prices[sells[k]]
    exp += ac_prices[buys[k]]
  profit = rev - exp

  return [profit, buys, sells]

Use above methods and evaluate the given metrics
Draw corresponding graphs

In [12]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error
def smape(A, F):
  return 100/len(A) * np.sum(np.abs(F - A) / (np.abs(A) + np.abs(F)))

mp = get_profit(list(test['close price']))

print(f'Max profit:{mp}\n')

metrics = {'sMAPE': smape, 'MAE': mean_absolute_error, 'MSE': mean_squared_error, 'MAPE': mean_absolute_percentage_error}
closes = list(test['close price'])
for key, vals in results.items():  
  
  if 'Proph' not in key:
    prof, buys, sells = sr_lr_forecast_2(vals['intervals'], vals['long run'], list(test['close price']))
  else:
    prof, buys, sells = sr_lr_forecast(vals[0], vals[1], list(test['close price']))
  fig = go.Figure()
  fig.add_traces(go.Scatter(
      x=df['timestamp'],
      y=df['close price'],
      name='close price'
))

  fig.add_trace(go.Scatter(
  x=[list(test['timestamp'])[k] for k in buys],
  y=[closes[k] for k in buys],
  marker=dict(color="green"),
  mode="markers",
  marker_symbol='triangle-up',
  marker_size=10,
  name='buy'
))
  
  fig.add_trace(go.Scatter(
  x=[list(test['timestamp'])[k] for k in sells],
  y=[closes[k] for k in sells],
  marker=dict(color="red"),
  mode="markers",
  marker_symbol='triangle-down',
  marker_size=10,
  name='sell'
))
  
  pls = []
  if type(vals) == dict:
    for k in vals['intervals']:
      pls += k
  else:
    pls = vals[0]
  
  fig.add_traces(go.Scatter(x=test['timestamp'], y=pls, name='Prediction', marker=dict(color="orange"),))
  fig.show()
  for name, func in metrics.items():
    metric = func(test['close price'], np.array(pls))
    if 'MAPE' in name:
      print(f'{key} - {name}:{metric:.2f}%')
    else:
      print(f'{key} - {name}:{metric:.2f}')
  metric = smape(test['close price'], np.array(pls))
  print(f'{key} - PROFIT:{prof}')

Max profit:64070.18475861003



ARMA - sMAPE:1.49%
ARMA - MAE:1053.02
ARMA - MSE:1811089.85
ARMA - MAPE:0.03%
ARMA - PROFIT:4139.800000000003


ARIMA - sMAPE:1.52%
ARIMA - MAE:1067.16
ARIMA - MSE:1865992.19
ARIMA - MAPE:0.03%
ARIMA - PROFIT:4139.800000000003


Prophet - sMAPE:2.82%
Prophet - MAE:2012.16
Prophet - MSE:6023289.41
Prophet - MAPE:0.06%
Prophet - PROFIT:5332.720000000001


Show metrics in a dataframe format

In [13]:
metrics_df = pd.DataFrame(index=list(results.keys()), columns=list(metrics.keys()) + ['PROFIT'])

for key, vals in results.items():    
  pls = []
  if 'Proph' not in key:
    prof, buys, sells = sr_lr_forecast_2(vals['intervals'], vals['long run'], list(test['close price']))
    for k in vals['intervals']:
      pls += k
  else:
    prof, buys, sells = sr_lr_forecast(vals[0], vals[1], list(test['close price']))
    pls = vals[0][:]
  
  
  entry = dict()
  for name, func in metrics.items():
    metric = func(test['close price'], np.array(pls))
    entry[name] = f'{metric:.3f}'

  metric = smape(test['close price'], np.array(pls))
  entry['PROFIT'] = f'{(prof):.3f}'
  metrics_df.loc[key] = entry

metrics_df

Unnamed: 0,sMAPE,MAE,MSE,MAPE,PROFIT
ARMA,1.486,1053.02,1811089.849,0.03,4139.8
ARIMA,1.517,1067.157,1865992.187,0.03,4139.8
Prophet,2.824,2012.159,6023289.414,0.057,5332.72
