<a href="https://colab.research.google.com/github/sevaroy/Prophet-playbook/blob/main/Prophet_ETC_20220430.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install tqdm
!pip install pandas
!pip install prophet
!pip install yfinance
!pip install plotly
!pip install datetime
!pip install numpy

Collecting prophet
  Downloading prophet-1.0.1.tar.gz (65 kB)
[K     |████████████████████████████████| 65 kB 2.4 MB/s 
Collecting cmdstanpy==0.9.68
  Downloading cmdstanpy-0.9.68-py3-none-any.whl (49 kB)
[K     |████████████████████████████████| 49 kB 2.8 MB/s 
Collecting ujson
  Downloading ujson-5.2.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (45 kB)
[K     |████████████████████████████████| 45 kB 1.6 MB/s 
Building wheels for collected packages: prophet
  Building wheel for prophet (setup.py) ... [?25l[?25hdone
  Created wheel for prophet: filename=prophet-1.0.1-py3-none-any.whl size=6640491 sha256=99d7744f0b85383d159ebb61609d280dafd8c2ff61810889293a4118b9fb0dbd
  Stored in directory: /root/.cache/pip/wheels/4e/a0/1a/02c9ec9e3e9de6bdbb3d769d11992a6926889d71567d6b9b67
Successfully built prophet
Installing collected packages: ujson, cmdstanpy, prophet
  Attempting uninstall: cmdstanpy
    Found existing installation: cmdstanpy 0.9.5
    Uninstalling cmdstanpy-0.9

In [2]:
# Libraries
from tqdm import tqdm
import pandas as pd
from prophet import Prophet
import yfinance as yf
from datetime import datetime, timedelta
import plotly.express as px
import numpy as np

In [3]:
# Time periods
now = datetime.now()

# How far back to retrieve
ago = now - timedelta(days=730)

# Designating the Ticker
crypto = yf.Ticker("ETC-USD")

# Getting price history
df = crypto.history(start=ago.strftime("%Y-%m-%d"), end=now.strftime("%Y-%m-%d"), interval="1d")

# Handling missing data from yahoo finance
df = df.reindex(
    [df.index.min()+pd.offsets.Day(i) for i in range(df.shape[0])],
    fill_value=None
).fillna(method='ffill')

In [4]:
# Getting the N Day Moving Average and rounding the values
df['MA'] = df[['Open']].rolling(window=5).mean().apply(lambda x: round(x, 2))

# Dropping the NaNs
df.dropna(inplace=True)

In [5]:
# Formatted
df = df.reset_index().rename(columns={"Date": "ds", "MA": "y"})
df

Unnamed: 0,ds,Open,High,Low,Close,Volume,Dividends,Stock Splits,y
0,2020-05-03,7.479534,7.603361,7.117666,7.226432,2920876669,0,0,6.72
1,2020-05-04,7.226930,7.320296,6.771815,7.242700,2925477275,0,0,6.93
2,2020-05-05,7.249679,7.481677,7.125103,7.191884,2657879225,0,0,7.02
3,2020-05-06,7.195827,7.305091,6.949167,6.949167,2229841906,0,0,7.17
4,2020-05-07,6.983987,7.186053,6.847743,7.097957,2463492782,0,0,7.23
...,...,...,...,...,...,...,...,...,...
722,2022-04-25,33.638069,33.989685,31.596489,33.648376,608579157,0,0,35.04
723,2022-04-26,33.645489,33.874214,30.118757,30.632242,499384644,0,0,34.46
724,2022-04-27,30.633402,31.624762,30.405497,31.257376,440634931,0,0,33.61
725,2022-04-28,31.258797,31.660122,30.517565,31.138372,447937007,0,0,32.75


In [6]:
# Setting up Prophet
m = Prophet(
    daily_seasonality=True, 
    yearly_seasonality=True, 
    weekly_seasonality=True
)

# Fitting to the prices
m.fit(df[['ds', 'y']])

<prophet.forecaster.Prophet at 0x7f4cbce50210>

In [7]:
# Future DF of 3 months
future = m.make_future_dataframe(periods=90)

In [8]:
forecast = m.predict(future)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]

Unnamed: 0,ds,yhat,yhat_lower,yhat_upper
0,2020-05-03,-4.730631,-13.168359,2.771148
1,2020-05-04,-1.579059,-9.403337,6.281924
2,2020-05-05,1.438089,-6.906636,9.891153
3,2020-05-06,4.281912,-3.986338,12.534429
4,2020-05-07,6.972620,-1.114483,14.516097
...,...,...,...,...
812,2022-07-24,56.993105,48.728619,65.375548
813,2022-07-25,57.029104,48.510360,65.182439
814,2022-07-26,57.019594,49.048785,65.192404
815,2022-07-27,56.959924,49.064801,65.588868


In [9]:
# Visual DF
vis_df = df[['ds','Open']].append(
    forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]
).rename(columns={'yhat': 'Prediction'})

# Visualizing results
fig = px.line(
    vis_df,
    x='ds',
    y=['Open', 'Prediction', 'yhat_lower', 'yhat_upper'],
    title='Crypto Forecast',
    labels={'value':'Price',
            'ds': 'Date'}
)

# Adding a slider
fig.update_xaxes(
    rangeselector=dict(
        buttons=list([
            dict(count=1, label="1m", step="month", stepmode="backward"),
            dict(count=3, label="3m", step="month", stepmode="backward"),
            dict(count=6, label="6m", step="month", stepmode="backward"),
            dict(count=1, label="YTD", step="year", stepmode="todate"),
            dict(count=1, label="1y", step="year", stepmode="backward"),
            dict(step="all")
        ])
    )
)

fig.show()

In [10]:
def getData(ticker, window, ma_period):
    """
    Grabs price data from a given ticker. Retrieves prices based on the given time window; from now
    to N days ago.  Sets the moving average period for prediction. Returns a preprocessed DF
    formatted for FB Prophet.
    """
    # Time periods
    now = datetime.now()

    # How far back to retrieve tweets
    ago = now - timedelta(days=window)

    # Designating the Ticker
    crypto = yf.Ticker(ticker)

    # Getting price history
    df = crypto.history(start=ago.strftime("%Y-%m-%d"), end=now.strftime("%Y-%m-%d"), interval="1d")
    
    # Handling missing data from yahoo finance
    df = df.reindex(
        [df.index.min()+pd.offsets.Day(i) for i in range(df.shape[0])],
        fill_value=None
    ).fillna(method='ffill')
    
    # Getting the N Day Moving Average and rounding the values
    df['MA'] = df[['Open']].rolling(window=ma_period).mean().apply(lambda x: round(x, 2))

    # Dropping the NaNs
    df.dropna(inplace=True)

    # Formatted for FB Prophet
    df = df.reset_index().rename(columns={"Date": "ds", "MA": "y"})
    
    return df

In [11]:
def fbpTrainPredict(df, forecast_period):
    """
    Uses FB Prophet and fits to a appropriately formatted DF. Makes a prediction N days into 
    the future based on given forecast period. Returns predicted values as a DF.
    """
    # Setting up prophet
    m = Prophet(
        daily_seasonality=True, 
        yearly_seasonality=True, 
        weekly_seasonality=True
    )
    
    # Fitting to the prices
    m.fit(df[['ds', 'y']])
    
    # Future DF
    future = m.make_future_dataframe(periods=forecast_period)
        
    # Predicting values
    forecast = m.predict(future)

    # Returning a set of predicted values
    return forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]

In [12]:
def visFBP(df, forecast):
    """
    Given two dataframes: before training df and a forecast df, returns
    a visual chart of the predicted values and actual values.
    """
    # Visual DF
    vis_df = df[['ds','Open']].append(forecast).rename(
        columns={'yhat': 'Prediction',
                 'yhat_upper': "Predicted High",
                 'yhat_lower': "Predicted Low"}
    )

    # Visualizing results
    fig = px.line(
        vis_df,
        x='ds',
        y=['Open', 'Prediction', 'Predicted High', 'Predicted Low'],
        title='Crypto Forecast',
        labels={'value':'Price',
                'ds': 'Date'}
    )

    # Adding a slider
    fig.update_xaxes(
        rangeselector=dict(
            buttons=list([
                dict(count=1, label="1m", step="month", stepmode="backward"),
                dict(count=3, label="3m", step="month", stepmode="backward"),
                dict(count=6, label="6m", step="month", stepmode="backward"),
                dict(count=1, label="YTD", step="year", stepmode="todate"),
                dict(count=1, label="1y", step="year", stepmode="backward"),
                dict(step="all")
            ])
        )
    )

    return fig.show()

In [13]:
# Getting and Formatting Data
df = getData("BTC-USD", window=365, ma_period=3)

# Training and Predicting Data
forecast = fbpTrainPredict(df, forecast_period=21)

# Visualizing Data
visFBP(df, forecast)

In [14]:
def runningFBP(ticker, window=730, ma_period=5, days_to_train=365, forecast_period=10):
    """
    Runs the facebook prophet model over the provided ticker.  Trains with last N days given 
    by days_to_train.  Forecast N days into the future based on given forecast_period.  Moving average 
    is applied to the dataset based on given ma_period. Returns the root mean squared error and a DF 
    of the actual values and the predicted values for the same day.
    """

    # Getting and Formatting Data
    df = getData(ticker, window=window, ma_period=ma_period)
    
    # DF for the predicted values
    pred_df = pd.DataFrame()

    # Running the model on each day
    for i in tqdm(range(days_to_train, window-forecast_period, forecast_period)):

        # Training and Predicting the last day on the forecast
        forecast = fbpTrainPredict(df[i-days_to_train:i], 
                                   forecast_period=forecast_period).tail(forecast_period)[['ds',
                                                                                           'yhat',
                                                                                           'yhat_lower',
                                                                                           'yhat_upper']]

        # Adding the last day predicted
        pred_df = pred_df.append(forecast, ignore_index=True)
        
    # Combining the predicted df and original df
    comb_df = df[['ds', 'Open']].merge(pred_df, 
                                       on='ds', 
                                       how='outer').sort_values(by='ds')
    
    # Setting the index to the dates
    comb_df.set_index('ds', inplace=True)

    return comb_df

In [15]:
def get_prophet_positions(df, short=True):
    """
    For these positions, buy when actual value is above the upper bound and short 
    when actual value is below lower bound. Otherwise do nothing.
    """
    if df['Open'] >= df['yhat_upper']:
        return 1
    elif df['Open'] <= df['yhat_lower'] and short:
        return -1
    else:
        return 0

In [16]:
def fbpBacktest(df, short=True):
    """
    Performs the final backtest using log returns and the positions function.
    Returns the performance.
    """
    # Getting positions
    df['positions'] = df.apply(lambda x: get_prophet_positions(x, short=short), axis=1)

    # Compensating for lookahead bias
    df['positions'] = df['positions'].shift(1)
    
    # Getting log returns
    df['log_returns'] = df['Open'].apply(np.log).diff()

    # Dropping any Nans
    df.dropna(inplace=True)
    
    # Performing the backtest
    returns = df['positions'] * df['log_returns']

    # Inversing the log returns and getting daily portfolio balance
    performance = returns.cumsum().apply(np.exp)
    
    return performance

In [17]:
# Running the model and getting forecast
bt_df = runningFBP("ETH-USD", 
                   window=730, 
                   ma_period=5, 
                   days_to_train=370, 
                   forecast_period=10)

# Performing the backtest
performance = fbpBacktest(bt_df, short=False)

# Visualizing results
px.line(performance,
        x=performance.index,
        y=performance,
        title='Portfolio Performance',
        labels={"y": "Portfolio Balance",
                "ds": "Date"})

100%|██████████| 35/35 [01:50<00:00,  3.17s/it]


In [18]:
bt_df


Unnamed: 0_level_0,Open,yhat,yhat_lower,yhat_upper,positions,log_returns
ds,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-05-08,3481.988037,2965.146245,2873.290942,3061.332742,0.0,-0.002328
2021-05-09,3911.463135,2984.955221,2899.129381,3082.702978,1.0,0.116308
2021-05-10,3924.413330,2997.769207,2908.435129,3085.882798,1.0,0.003305
2021-05-11,3948.271973,3009.090475,2924.577170,3099.496434,1.0,0.006061
2021-05-12,4174.635742,3018.281798,2919.445895,3113.324867,1.0,0.055749
...,...,...,...,...,...,...
2022-04-18,2993.483887,3271.018111,3072.892042,3501.167272,0.0,-0.022592
2022-04-19,3057.570312,3286.296571,3077.147767,3499.075802,0.0,0.021183
2022-04-20,3103.935059,3315.838721,3112.943146,3527.396454,0.0,0.015050
2022-04-21,3077.829346,3358.867869,3134.228072,3557.842691,0.0,-0.008446
