In [None]:
%load_ext Cython

In [None]:
%%cython
import numpy as np
cimport numpy as np

# rewrite without Pandas & class object for benchmark with (hopefully) Cython ver
cpdef get_current_zscore(series, rolling_window=None):
  if rolling_window is not None:
    # this windows definitely could be optimized or sth
    spread_mean = series.rolling(center=False, window=rolling_window).mean()
    spread_std = series.rolling(center=False, window=rolling_window).std()
    z_score_series = (series-spread_mean)/spread_std
  else:  
    z_score_series = (series-series.mean())/series.std()
    
  return z_score_series[-1], z_score_series

cdef class MyKalmanFilter:
    
  def __cinit__(self, delta=1e-4, R=1e-3):
    # measurement noise variance
    self.R = R

    # co-variance of process noise(2 dimensions)
    self.Q = delta / (1-delta) * np.eye(2)

    # state (slope, intercept) will be (2 x n), we will initialize with just one column at first
    self.x = np.zeros((2, 1))     

    # state covariance
    self.P = np.ones((2,2))

  cpdef step_forward(self, y1, y2):
    # Before entering the equations, let's define H as (1, 2) matrix
    H = np.array([y2, 1])[None]
    # and define z
    z = y1

    ## TIME UPDATE ##
    # first thing is to predict new state as the previous one (2x1)
    x_hat = self.x[:, -1][..., None]

    # then, the uncertainty or covariance prediction 
    P_hat = self.P + self.Q

    ## MEASUREMENT UPDATE ##
    # calc the Kalman gain
    K = P_hat.dot(H.T)/(H.dot(P_hat.dot(H.T))+self.R)

    # state update part 1 (measurement estimation)
    z_hat = H.dot(x_hat)
    # state update part 2
    x = x_hat + K.dot(z-z_hat)

    # uncertainty update
    self.P = (np.eye(2)-K.dot(H)).dot(P_hat)

    # append the new state to the vector
    self.x = np.concatenate([self.x, x], axis=1)

    return x
  
  cpdef regression(self, series1, series2):
    state_means = np.zeros((0, 2), dtype="float64")
    
    for t in range(0, series1.shape[0]):
        x = self.step_forward(series1[t], series2[t])
        state_means = np.vstack((state_means, [x[0][0], x[1][0]]))
    
    return state_means
mkf = MyKalmanFilter(delta=1e-4, R=2)

cpdef backtest_pair(
    series1,
    series2,
    returns,
    inst1,
    inst2,
    window=400,
    z_long_perc=3,
    z_short_perc=97,
    tp=0.01,
    sl=-0.01,
    avg_kalman=False,
    rolling_z_window=None,
    # silent=False,
    start_date=None,
    tc=0.0006
    ):
  
    # create an empty Numpy array with 8 columns or 0 rows
    # result = np.empty((0, 9), float)
    
    inst1_position = np.empty((0, 1), dtype="float64")
    inst2_position = np.empty((0, 1), dtype="float64")
    signals = np.empty((0, 1), dtype="float64")
    spread_record = np.empty((0, 1), dtype="float64")
    zscore = np.empty((0, 1), dtype="float64")
    z_long_record = np.empty((0, 1), dtype="float64")
    z_short_record = np.empty((0, 1), dtype="float64")
    gross_returns = np.empty((0, 1), dtype="float64")
    net_returns = np.empty((0, 1), dtype="float64")
    
    signal = 0
    old_signal = 0
    current_return = 0
    position1 = 0
    position2 = 0
    
    iter_start = window
    
    
    # moving through the sample
    for t in range(iter_start, series1.shape[0]-1):
      # because of sampling index is off when sample[t]
      # sample = data.iloc[t-window:t+2].copy()
      sample_series1 = series1[t-window:t+2]
      sample_series2 = series2[t-window:t+2]

      old_signal = signal
      old_position1 = position1
      old_position2 = position2
      gross = 0
      net = 0
      
      state_means = mkf.regression(sample_series1, sample_series2)
        
      hedge_ratio = - state_means[:,0]
      # hedge_ratio = sample_series1/sample_series2
      spread = sample_series1 + (sample_series2 * hedge_ratio)
      
      # spread, hedge_ratio = get_Kalman_spread(sample_series1, sample_series2)
      
      # state_means = mkf.regression(series1, series2)
      # hedge_ratio = - state_means[:,0]
      # spread = series1 + (series2 * hedge_ratio)
      current_z_score, z_score_series = get_current_zscore(spread, rolling_window=rolling_z_window)
      
      z_percentiles = np.percentile(z_score_series, [z_long_perc, z_short_perc])
      z_long = z_percentiles[0]
      z_short = z_percentiles[1]
      
      if old_signal == 0:
        if current_z_score < z_long: 
          signal = 1
        if current_z_score > z_short: 
          signal = -1
      if old_signal != 0:
        if current_return >= tp:
          signal = 0
        elif current_return <= sl:
          signal = 0
        
      position1 = -signal
      position2 = signal

      gross = position1*returns[0][t+1] + position2*returns[1][t+1]
      net = gross - tc*(abs(position1 - old_position1) + abs(position2 - old_position2))
      if signal == old_signal:
        current_return = (1+current_return)*(1+net)-1
      else:
        current_return = net

      inst1_position = np.vstack((inst1_position, [position1]))
      inst2_position = np.vstack((inst2_position, [position2]))
      signals = np.vstack((signals, [signal]))
      spread_record = np.vstack((spread_record, [spread[-1]])) # double-check on this
      zscore = np.vstack((zscore, [current_z_score]))
      z_long_record = np.vstack((z_long_record, [z_long]))
      z_short_record = np.vstack((z_short_record, [z_short]))
      gross_returns = np.vstack((gross_returns, [gross]))
      net_returns = np.vstack((net_returns, [net]))
      
    gross_cum = np.reshape(np.cumprod(1+gross_returns), (-1, 1))
    net_cum = np.reshape(np.cumprod(1+net_returns), (-1, 1))
    
    output = np.concatenate((
      inst1_position,
      inst2_position,
      signals,
      spread_record,
      zscore,
      z_long_record,
      z_short_record,
      gross_returns,
      net_returns,
      gross_cum,
      net_cum,
    ), axis=1)
    
    return output
    

# self.result = data
# save_dir = None
# save_dir = self.save_dir
# self.get_backtest_charts(save_dir=save_dir, silent=silent)
# self.get_backtest_report(save_dir=save_dir, silent=silent)

# inst1 = "AUDIOUSDT_FUTURES"
# inst2 = "FTMUSDT_FUTURES"
# series1 = backtester.data[[inst1]].iloc[:,0:].values.flatten()
# series2 = backtester.data[[inst2]].iloc[:,0:].values.flatten()
# returns = backtester.data[["%s_returns"%inst1, "%s_returns"%inst2]].to_numpy()
inst2 = "FTMUSDT_FUTURES"
inst1 = "AUDIOUSDT_FUTURES"
ticker_interval = "15m"
raw_data_dir="../raw_data/Binance_Historical_15m_FUTURES_20_days_2022-07-14T12:00:43"
pair = [inst1, inst2]

entries = []
for instrument in pair:
  entries.append("%s_%s.csv" % (instrument, ticker_interval))
# removing duplicates
entries = list(set(entries))

x = np.genfromtxt ('%s/%s' % (raw_data_dir, entries[0]), delimiter=",", usecols=(4))
y = np.genfromtxt ('%s/%s' % (raw_data_dir, entries[1]), delimiter=",", usecols=(4))

x = x[~np.isnan(x)]
y = y[~np.isnan(y)]

returns_x = x / np.hstack((x[1:], [0]))
returns_x[returns_x == np.Inf] = 0
returns_y = y / np.hstack((y[1:], [0]))
returns_y[returns_y == np.Inf] = 0
returns_y, returns_x
returns = np.vstack((returns_x, returns_y))


output = backtest_pair(
  series1=x,
  series2=y,
  returns=returns,
  inst1=inst1,
  inst2=inst2,
  window=300,
  z_long_perc=4,
  z_short_perc=97,
  tp=0.03,
  sl=-0.12
  )

print(output)

In [None]:
df = pd.DataFrame(output, columns=["pos1", "pos2", "signals", "spread", "zscore", "z_long", "z_short", "gross", "net", "gross_cum", "net_cum"])

In [None]:
result = df.copy()
fig, axs = plt.subplots(
  3, 1, gridspec_kw={'height_ratios': [2, 1, 2]}, 
  sharex=True, figsize=(18, 20))
fig.patch.set_facecolor('lavender')
fig.suptitle("PAIR: %s | INT: %s" % (inst1+"-"+inst2, "15m"), fontsize=24)

axs[0].plot(result["net_cum"], color="blue", label="NET")
axs[0].plot(result["gross_cum"], color="orange", label="GROSS")
axs[0].grid()
axs[0].legend()
axs[0].set_xlabel("PNL | NET: {}%".format(round((result.net_cum[-1] - 1)*100, 2)), fontsize=22)

axs[1].plot(result["zscore"], color="black", label="ZSCORE")
axs[1].plot(result["z_long"], color="green", label="ZLONG")
axs[1].plot(result["z_short"], color="red", label="ZSHORT")
axs[1].grid()
axs[1].legend()
axs[1].set_xlabel("ENTRIES", fontsize=22)

spread = result["spread"]
long = spread.copy()
short = spread.copy()
long[result.signals!=1] = np.NaN
short[result.signals!=-1] = np.NaN
axs[2].plot(spread, color="slategrey", label="SPREAD")
axs[2].plot(long, color="green", label="LONG")
axs[2].plot(short, color="red", label="SHORT")
axs[2].grid()
axs[2].legend()
axs[2].set_xlabel("POSITIONS", fontsize=22)
plt.show()

In [None]:
inst2 = "FTMUSDT_FUTURES"
inst1 = "AUDIOUSDT_FUTURES"
ticker_interval = "15m"
raw_data_dir="../raw_data/Binance_Historical_15m_FUTURES_20_days_2022-07-14T12:00:43"
pair = [inst1, inst2]

entries = []
for instrument in pair:
  entries.append("%s_%s.csv" % (instrument, ticker_interval))
# removing duplicates
entries = list(set(entries))

x = np.genfromtxt ('%s/%s' % (raw_data_dir, entries[0]), delimiter=",", usecols=(4))
y = np.genfromtxt ('%s/%s' % (raw_data_dir, entries[1]), delimiter=",", usecols=(4))

x = x[~np.isnan(x)]
y = y[~np.isnan(y)]

In [None]:
# returns = backtester.data[["%s_returns"%inst1, "%s_returns"%inst2]].to_numpy()
# returns_x = 
# np.hstack((x[1:], [0])).shape
returns_x = x / np.hstack((x[1:], [0]))
returns_x[returns_x == np.Inf] = 0
returns_y = y / np.hstack((y[1:], [0]))
returns_y[returns_y == np.Inf] = 0
returns_y, returns_x
returns = np.vstack((returns_x, returns_y))

In [None]:
def _parse_observations(obs):
  """Safely convert observations to their expected format"""
  obs = np.ma.atleast_2d(obs)
  if obs.shape[0] == 1 and obs.shape[1] > 1:
      obs = obs.T
  return obs