# Kalman filter brute force paramater search

In [8]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import statsmodels.formula.api as sm
import statsmodels.tsa.stattools as ts
import statsmodels.tsa.vector_ar.vecm as vm

import time
from IPython.display import clear_output

In [2]:
df1 = pd.read_csv("../data/ZECUSDT-minute.csv", index_col=0, parse_dates=True)
df2 = pd.read_csv("../data/XMRUSDT-minute.csv", index_col=0, parse_dates=True)
print(len(df1))
print(len(df2))

1212880
1221532


In [3]:
df = df1.close.rename("A").to_frame()
df["B"] = df2.close
df = df[4000:]
df = df.dropna()
results = sm.ols(formula="B ~ A", data=df[['B', 'A']]).fit()
hedgeRatio = results.params[1]

In [50]:
def get_result(l, thres, s_thres, fee=0.001):
    """generates the profit
    l is length/lookback period, used for spread's mean and std"""
    spread = pd.Series((df['B'] - hedgeRatio * df['A'])).rename("spread").to_frame()
    spread["mean"] = spread.spread.rolling(l).mean()
    spread["std"] =  spread.spread.rolling(l).std()
    spread["zscore"] = pd.Series((spread["spread"]-spread["mean"])/spread["std"])
    spread = spread.dropna()
    spread["A"] = df1["close"].reindex(spread.index)
    spread["Ah"] = df1["high"].reindex(spread.index)
    spread["Al"] = df1["low"].reindex(spread.index)

    spread["B"] = df2["close"].reindex(spread.index)
    spread["Bh"] = df2["high"].reindex(spread.index)
    spread["Bl"] = df2["low"].reindex(spread.index)
    return run_backtest(spread, thres, s_thres, fee)
    
def run_backtest(spread, thres, s_thres, fee):
    # Buy  random value between high and low
    total = 100.
    fee = fee       #Set double normal rate
    thres = thres         #2
    sell_thres = s_thres   # 0, in range [0, ...) for more travel
    price_a = None
    price_b = None
    long = None       #Values: None, "A", "B"
    trades = 0
    for i in range(spread.shape[0]):
        z = spread.zscore[i]
        if long == None: # Looking to buy
            if z>thres or z<-thres:
                trades += 1
                al, ah, bl, bh = get_a_b(spread.Al[i], spread.A[i], spread.Ah[i], spread.Bl[i], spread.B[i], spread.Bh[i])
                if z>thres: 
                    price_a = [ah, ((total/2)/ah)*(1.-fee)]
                    price_b = [bl, ((total/2)/bl)*(1.-fee)]
                    long = "B"
                elif z<-thres:
                    price_a = [al, ((total/2)/al)*(1.-fee)]
                    price_b = [bh, ((total/2)/bh)*(1.-fee)]
                    long = "A"
        if long == "A" and z>sell_thres: #Liquidate positions
            al, ah, bl, bh = get_a_b(spread.Al[i], spread.A[i], spread.Ah[i], spread.Bl[i], spread.B[i], spread.Bh[i])
            total += (price_a[0] - ah)*price_a[1]-(ah*price_a[1]*fee)
            total += (bl - price_b[0])*price_b[1]-(bl*price_b[1]*fee)
            price_a, price_b, long = None, None, None
        elif long == "B" and z<-sell_thres: #Liquidate positions
            al, ah, bl, bh = get_a_b(spread.Al[i], spread.A[i], spread.Ah[i], spread.Bl[i], spread.B[i], spread.Bh[i])
            total += (al - price_a[0])*price_a[1]-(al*price_a[1]*fee)
            total += (price_b[0] - bh)*price_b[1]-(bh*price_b[1]*fee)
            price_a, price_b, long = None, None, None
    return total, trades
    
def get_a_b(al, ac, ah, bl, bc, bh):
    return ac-abs(ac-al)/2, ac+abs(ac-ah)/2, bc-abs(bc-bl)/2, bc+abs(bc-bh)/2

In [10]:
start = time.time()
get_result(6000, 2, -1)
time.time()-start

14.755175828933716

In [21]:
d = {"l":[], "thres":[], "sell_thres":[], "result":[]}

In [28]:
d["l"].append(3)
d["thres"].append(3)
d["sell_thres"].append(3)
d["l"].append(5)
d["thres"].append(5)
d["sell_thres"].append(6)
d["l"].append(1)
d["thres"].append(2)
d["sell_thres"].append(1)

In [29]:
d

{'l': [3, 3, 5, 3, 5, 1],
 'thres': [3, 3, 5, 3, 5, 2],
 'sell_thres': [3, 3, 6, 3, 6, 1]}

In [40]:
print(pd.DataFrame(d).sort_values("l", ascending=False).iloc[0])

l             6000.000000
thres            2.000000
sell_thres      -1.000000
result         179.341959
Name: 1, dtype: float64


In [51]:
d = {"l":[], "thres":[], "sell_thres":[], "trades":[], "result":[]}
for l in [500, 1000, 2000, 4000, 6000, 8000, 10000, 13000, 15000, 20000, 30000, 50000]:
    for thres in [0.4, 0.7, 1, 1.3, 1.7, 2., 2.3, 2.6, 2.9, 3.2]:
        for sell_thres in [-1.8, -1.5, -1.2, -0.9, -0.6, -0.3, 0., 0.3, 0.6, 0.9, 1.2, 1.5, 2.]:
            if sell_thres <= -thres:
                continue
            print(f"Now doing l: {l}, thres: {thres}, sell_thres: {sell_thres}")
            d["l"].append(l)
            d["thres"].append(thres)
            d["sell_thres"].append(sell_thres)
            result, trades = get_result(l, thres, sell_thres)
            d["trades"].append(trades)
            d["result"].append(result)
            clear_output()
            print(pd.DataFrame(d).sort_values("result", ascending=False).iloc[0])

l             2000.000000
thres            0.700000
sell_thres       0.900000
trades        1080.000000
result         502.221957
Name: 243, dtype: float64


In [52]:
df = pd.DataFrame(d)

In [55]:
df.sort_values("result", ascending=False).head(20)

Unnamed: 0,l,thres,sell_thres,trades,result
243,2000,0.7,0.9,1080,502.221957
234,2000,0.4,0.9,1088,494.449979
253,2000,1.0,0.9,1012,448.397588
348,4000,0.4,0.6,891,413.044896
242,2000,0.7,0.6,1505,399.001676
233,2000,0.4,0.6,1656,382.715078
426,4000,2.6,-0.3,341,359.575355
301,2000,2.3,0.6,604,348.545277
300,2000,2.3,0.3,650,348.121364
252,2000,1.0,0.6,1225,344.998788
