In [None]:
# Download Bitcoin historical daily data
# and perform a TA (Techinical Analysis) based trading strategy 
# which is SMA (Simple Moving Average) crossover

import datetime
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Interative charting
import altair as alt
alt.data_transformers.disable_max_rows()

# Google colab interactive table
%load_ext google.colab.data_table 
%matplotlib inline

# Download cryto historical price data from the web
def CryptoData(symbol, frequency, start=0):
    #Params: String symbol, int frequency = 300,900,1800,7200,14400,86400
    #Returns: df from first available date
    url ='https://poloniex.com/public?command=returnChartData&currencyPair='+symbol+'&end=9999999999&period='+str(frequency)+'&start='+str(start)
    df = pd.read_json(url)
    df.set_index('date',inplace=True)
    return df

SCREEN_X, SCREEN_Y = 12, 8

In [None]:
# Download the daily bitcoin historical data
startDate = 0 
#startDate = int((datetime.datetime.strptime('1/1/2018', "%d/%m/%Y").timestamp()))
testSymbol = 'USDT_BTC' #bitCoin

# frequency = 86400s which is 1 day
df = CryptoData(testSymbol, 86400, startDate)
df

In [None]:
# Save the dataframe to csv for downloading
df.to_csv('bitcoin.csv')

In [None]:
# Plot the closing price
df['close'].plot(figsize = (SCREEN_X, SCREEN_Y))

In [None]:
# look at price volatility (closing price change) summary statistics
print(df['close'].pct_change().describe())
df['close'].pct_change().hist(bins=100)

In [None]:
# lead and lag days
lead = 45 
lag = 125

df['lead'] = df['close'].rolling(lead).mean()
df['lag'] = df['close'].rolling(lag).mean()

COLUMNS = ['close', 'lead', 'lag']
df[COLUMNS]

In [None]:
# Rearrange the data into altair format
# altair plots interactive charts with zooming and panning
plot_data = df[COLUMNS].reset_index().melt('date')

alt.Chart(plot_data).mark_line().encode(
  x='date:T',
  y='value',
  color='variable'
).properties(
  width=800,
  height=600
).interactive()

In [None]:
"""
  SMA crossover strategy consists of a leading and a lagging simple moving averages. 
  Leading SMA has a shorter look-back period than lagging moving average. 
  Hence, by definition, leading SMA will be more sensitive to most recent price moves; lagging SMA will be slower to react

Regime (1, -1, 0)
   1 LONG: if Leading SMA is above Lagging SMA by some threshold -> hold 1 long position of bitcoin
  -1 SHORT: if Leading SMA is below Lagging SMA by some threshold -> short 1 bitcoin
   0 FLAT: if the difference of Leading and Lagging SMA is less than the threshold -> flatten the bitcoin position
  
   # threshold is applied to filter out weak signals 
"""

def test_ma(df, lead, lag, pc_thresh = 0.025):
    ma_df = df.copy()

    # build the lead/lag indicator
    ma_df['lead'] = ma_df['close'].rolling(lead).mean()
    ma_df['lag'] = ma_df['close'].rolling(lag).mean()

    # Remove the NaN rows
    ma_df.dropna(inplace = True)

    # calculate the lead and lag difference
    ma_df['lead-lag'] = ma_df['lead'] - ma_df['lag']
    ma_df['pc_diff'] = ma_df['lead-lag'] / ma_df['close']

    # regime column will govern the buying and selling logic as described above; 
    # 1 means that we are long, -1 means that we are short and 0 means no position
    ma_df['regime'] = np.where(ma_df['pc_diff'] > pc_thresh, 1, 0)
    ma_df['regime'] = np.where(ma_df['pc_diff'] < -pc_thresh, -1, ma_df['regime'])
    
    # Market column as log returns of price series - which is the market performance
    ma_df['Market'] = np.log(ma_df['close'] / ma_df['close'].shift(1))
    
    # compute our Strategy returns by multiplying regime (shifted forward to match the Market column
    ma_df['Strategy'] = ma_df['regime'].shift(1) * ma_df['Market']
    
    # perform a cumulative sum operation as well as apply an exponent on Market and Strategy log returns 
    # in order to recover the original normalised series
    ma_df[['Market','Strategy']] = ma_df[['Market','Strategy']].cumsum().apply(np.exp)
    return ma_df

In [None]:
# threshold is how sensitivity of the strategy
# smaller the number means more sensitive (which usually means more long/short positions)
threshold = 0.1
ma_df = test_ma(df, lead, lag, threshold).dropna()

SMA_COLUMNS = ['close', 'lead', 'lag', 'lead-lag', 'pc_diff', 'regime', 'Market', 'Strategy']
ma_df[SMA_COLUMNS]

In [None]:
# Market: buy_and_hold strategy which benefit from soaring bitcoin price
# Strategy: SMA crossover long/short which can benefit from both soaring and dropping bit coin price
# (provided that the SMA predicted the price movement correctly)

ma_df[['Market', 'Strategy']].iloc[-1]

In [None]:
# Plot Market vs Strategy performance
ma_df[['Market', 'Strategy']].plot(figsize = (SCREEN_X, SCREEN_Y))

In [None]:
# Plot regime over "lead-lag" price difference
ma_df.regime.plot(label='Regime', legend=True, figsize=(SCREEN_X, SCREEN_Y))
ma_df.pc_diff.plot(secondary_y=True, label='Lead-Lag Px Diff', legend=True, figsize=(SCREEN_X, SCREEN_Y))


In [None]:
# Plot regime over close price
ma_df.regime.plot(label='regime', legend=True, figsize=(SCREEN_X, SCREEN_Y))
ma_df.close.plot(secondary_y=True, label='close', legend=True, figsize=(SCREEN_X, SCREEN_Y))


In [None]:
# backtest the strategy with different combination to find the optimal lead/lag params

# start, end, step
leads = np.arange(5, 100, 5) 
lags = np.arange(105, 200, 5)

lead_lags = [[lead,lag] for lead in leads for lag in lags]
pnls = pd.DataFrame(index=lags,columns = leads)
pnls

In [None]:
# Run the strategy with the different combination of lead/lag params
# Calculate the different between market and strategy performance as PnL
for lead, lag in lead_lags:
  ma_df = test_ma(df, lead, lag, threshold)
  pnls[lead][lag] = ma_df['Strategy'][-1] - ma_df['Market'][-1]
  print(lead, lag, pnls[lead][lag])

pnls

In [None]:
# Use Heatmap to visualize the backtest results
PNLs = pnls[pnls.columns].astype(float)
plt.subplots(figsize = (SCREEN_X, SCREEN_Y))
sns.heatmap(PNLs, cmap='coolwarm', annot=True, cbar_kws={'orientation': 'horizontal'})

In [None]:
""" Suggestions
1. Backtest with different threshold values
2. Try different time frequency. May have to shorten the time period of the download as there're certain size limitation in each download
3. Try other crypto symbols: 'USDT_BTC','USDT_BCH','USDT_ETC','USDT_XMR','USDT_ETH','USDT_DASH','USDT_XRP','USDT_LTC','USDT_NXT','USDT_STR','USDT_REP','USDT_ZEC'
4. Use other crypto as correlations to form a better strategy
5. Separate the data into 2 portions. Optimize the lead/lag/threshold params from the 1st portion of the data and use it to measure the strategy performance on 2nd portion of the data
6. Try different data source: https://colab.research.google.com/github/kenwkliu/ideas/blob/master/colab/DataSource.ipynb
7. Try different TA signals such as RSI: https://github.com/PacktPublishing/Learn-Algorithmic-Trading---Fundamentals-of-Algorithmic-Trading/tree/master/chapter2
"""