# Kalman filter brute force paramater search

In [14]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import statsmodels.formula.api as sm
import statsmodels.tsa.stattools as ts
import statsmodels.tsa.vector_ar.vecm as vm

import time
import math
from IPython.display import clear_output

In [9]:
# df1 = pd.read_csv("../data/ZECUSDT-minute.csv", index_col=0, parse_dates=True)
# df2 = pd.read_csv("../data/XMRUSDT-minute.csv", index_col=0, parse_dates=True)
df1 = pd.read_csv("../data/testing-hourly/CELRUSDT-minute.csv", index_col=0, parse_dates=True)
df2 = pd.read_csv("../data/testing-hourly/FETUSDT-minute.csv", index_col=0, parse_dates=True)
# df1 = pd.read_csv("../data/testing-hourly/BTTUSDT-minute.csv", index_col=0, parse_dates=True)
# df2 = pd.read_csv("../data/testing-hourly/HOTUSDT-minute.csv", index_col=0, parse_dates=True)

print(len(df1))
print(len(df2))

1212637
1247928


In [10]:
df = df1.close.rename("A").to_frame()
df["B"] = df2.close
df = df[4000:]
df = df.dropna()
# results = sm.ols(formula="B ~ A", data=df[['B', 'A']]).fit()
# hedgeRatio = results.params[1]

In [18]:
def get_hr(lookback, ds=100):
    """returns the spread with only hedgeRatio"""
    hedgeRatio = np.full(df.shape[0], np.nan)
    l = math.floor(len(hedgeRatio[:-lookback])/ds)
    index = []
    for t in np.arange(l):
        clear_output()
        print(f"{t} < {l}")
        regress_results = sm.ols(formula="B ~ A",
                                 data=df[t*ds:t*ds+lookback]).fit()  # Note this can deal with NaN in top row
        hedgeRatio[t] = regress_results.params[1]
        index.append(df.index[t*ds+lookback])
    hr = pd.Series(hedgeRatio).dropna().rename("hr").to_frame()
    hr["index"] = index
    hr.set_index("index", inplace=True)
    spread = pd.DataFrame(hr.hr, index=df.index)
    spread.ffill(inplace=True)
    return spread
    
def get_spread(spread, l):
    """returns the spread with everything, where l is lookback for mean and std"""
    spread = pd.Series((df['B'] - spread["hr"] * df['A'])).rename("spread").to_frame()
    spread["mean"] = spread.spread.rolling(l).mean()
    spread["std"] =  spread.spread.rolling(l).std()
    spread["zscore"] = pd.Series((spread["spread"]-spread["mean"])/spread["std"])
    spread = spread.dropna()
    spread["A"] = df1["close"].reindex(spread.index)
    spread["Ah"] = df1["high"].reindex(spread.index)
    spread["Al"] = df1["low"].reindex(spread.index)

    spread["B"] = df2["close"].reindex(spread.index)
    spread["Bh"] = df2["high"].reindex(spread.index)
    spread["Bl"] = df2["low"].reindex(spread.index)
    return spread

def get_result(spread, lm, ls, thres, s_thres, fee=0.003):
    """generates the profit
    l is length/lookback period, used for spread's mean and std"""
    return run_backtest(spread, thres, s_thres, fee)
    
def run_backtest(spread, fee=0.003):
    # Buy  random value between high and low
    total = 100.
    fee = fee       #Set double normal rate
    price_a = None
    price_b = None
    long = None       #Values: None, "A", "B"
    trades = 0
    for i in range(spread.shape[0]):
        z = spread.zscore[i]
        thres = spread.thres[i]
        sell_thres = spread.sell_thres[i]
        if long == None: # Looking to buy
            if z>thres or z<-thres:
                trades += 1
                al, ah, bl, bh = get_a_b(spread.Al[i], spread.A[i], spread.Ah[i], spread.Bl[i], spread.B[i], spread.Bh[i])
                if z>thres: 
                    price_a = [ah, ((total/2)/ah)*(1.-fee)]
                    price_b = [bl, ((total/2)/bl)*(1.-fee)]
                    long = "B"
                elif z<-thres:
                    price_a = [al, ((total/2)/al)*(1.-fee)]
                    price_b = [bh, ((total/2)/bh)*(1.-fee)]
                    long = "A"
        if long == "A" and z>sell_thres: #Liquidate positions
            al, ah, bl, bh = get_a_b(spread.Al[i], spread.A[i], spread.Ah[i], spread.Bl[i], spread.B[i], spread.Bh[i])
            total += (price_a[0] - ah)*price_a[1]-(ah*price_a[1]*fee)
            total += (bl - price_b[0])*price_b[1]-(bl*price_b[1]*fee)
            price_a, price_b, long = None, None, None
        elif long == "B" and z<-sell_thres: #Liquidate positions
            al, ah, bl, bh = get_a_b(spread.Al[i], spread.A[i], spread.Ah[i], spread.Bl[i], spread.B[i], spread.Bh[i])
            total += (al - price_a[0])*price_a[1]-(al*price_a[1]*fee)
            total += (price_b[0] - bh)*price_b[1]-(bh*price_b[1]*fee)
            price_a, price_b, long = None, None, None
    return total, trades
    
def get_a_b(al, ac, ah, bl, bc, bh):
    return ac-abs(ac-al)/2, ac+abs(ac-ah)/2, bc-abs(bc-bl)/2, bc+abs(bc-bh)/2

def bbands(price, thres, numsd, window):
    """thres=True for thres, False for sell_thres. returns average, upper band, and lower band"""
    ave = price.rolling(window).mean()
    sd = price.rolling(window).std(ddof=0)  
    if numsd == 0:
        return 0
    if thres:
        return ave + (sd*numsd)
    else:
        return -(ave - (sd*numsd))

In [19]:
# l              1000.000000
# thres             1.300000
# sell_thres        1.500000
# trades         1732.000000
# result        10120.093557
# Name: 151, dtype: float64

In [34]:
d = {"lookback":[], "l":[], "window":[], "thres":[], "sell_thres":[], "trades":[], "result":[]}
for lookback in [50_000, 100_000, 200_000]:
    hr = get_hr(lookback)
    for l in [1000, 2000, 4000, 8000]: 
        spread = get_spread(hr, l)
        for window in [10_000, 50_000, 100_000, 200_000]:
            for thres in [0.5, 1, 1.5]:
                spread['thres'] = bbands(spread.zscore, True, thres, window)
                for sell_thres in [-1, -0.5, 0, 0.5, 1, 1.5]:
                    spread['sell_thres'] = bbands(spread.zscore, False, sell_thres, window)
                    if sell_thres <= -thres:
                        continue
                    print(f"Now doing lookback {lookback} \nl: {l}, window: {window}, \nthres: {thres}, sell_thres: {sell_thres}, safe to kill kernel")
                    result, trades = run_backtest(spread.dropna())
                    
                    print(f"DON'T KILL KERNEL, ADDING TRADES")
                    time.sleep(1)
                    d["lookback"].append(lookback)
                    d["window"].append(window)
                    d["trades"].append(trades)
                    d["result"].append(result)
                    d["l"].append(l)
                    d["thres"].append(thres)
                    d["sell_thres"].append(sell_thres)
                    clear_output()
                    print("Best so far:")
                    print(pd.DataFrame(d).sort_values("result", ascending=False).iloc[0:3])

Best so far:
        l  thres  sell_thres  trades        result
331  2000    0.5         0.5    1527  51830.954325
316  2000    0.5         0.5    1569  49442.803308
542  2000    0.5         1.0     859  44280.875482


In [36]:
pd.DataFrame(d).sort_values("result", ascending=False).iloc[0:20]

Unnamed: 0,l,thres,sell_thres,trades,result
331,2000,0.5,0.5,1527,51830.954325
316,2000,0.5,0.5,1569,49442.803308
542,2000,0.5,1.0,859,44280.875482
6,1000,1.0,0.5,2008,40438.889188
487,1000,1.0,1.0,1382,34842.195393
547,2000,1.0,1.0,718,32739.726964
247,1000,1.0,1.0,1449,32462.04126
242,1000,0.5,1.0,1654,31061.199428
493,1000,1.5,1.0,1111,30440.464173
616,4000,0.5,0.5,762,29332.073276


In [None]:
d = {"lookback":[], "l":[], "window":[], "thres":[], "sell_thres":[], "trades":[], "result":[]}
for lookback in [100_000]:
    hr = get_hr(lookback)
    for l in [1000, 1500, 2000]: 
        spread = get_spread(hr, l)
        for window in [10_000, 50_000, 100_000, 200_000]:
            for thres in [0.5, 0.75, 1.]:
                spread['thres'] = bbands(spread.zscore, True, thres, window)
                for sell_thres in [-0.5, 0, 0.5, 1.]:
                    spread['sell_thres'] = bbands(spread.zscore, False, sell_thres, window)
                    if sell_thres <= -thres:
                        continue
                    print(f"Now doing lookback {lookback} \nl: {l}, window: {window}, \nthres: {thres}, sell_thres: {sell_thres}, safe to kill kernel")
                    result, trades = run_backtest(spread.dropna())
                    
                    print(f"DON'T KILL KERNEL, ADDING TRADES")
                    time.sleep(1)
                    d["lookback"].append(lookback)
                    d["window"].append(window)
                    d["trades"].append(trades)
                    d["result"].append(result)
                    d["l"].append(l)
                    d["thres"].append(thres)
                    d["sell_thres"].append(sell_thres)
                    clear_output()
                    print("Best so far:")
                    print(pd.DataFrame(d).sort_values("result", ascending=False).iloc[0:10])

Best so far:
    lookback     l  window  thres  sell_thres  trades        result
56    100000  1500   50000   0.50         0.5    2089  63402.421860
67    100000  1500  100000   0.50         0.5    2035  58583.437243
60    100000  1500   50000   0.75         0.5    1722  37098.847218
6     100000  1000   10000   0.75         1.0    1555  33072.064840
10    100000  1000   10000   1.00         1.0    1449  32462.041260
2     100000  1000   10000   0.50         1.0    1654  31061.199428
71    100000  1500  100000   0.75         0.5    1654  30953.446524
50    100000  1500   10000   0.75         1.0    1075  30927.512399
46    100000  1500   10000   0.50         1.0    1148  30489.351797
45    100000  1500   10000   0.50         0.5    2134  28240.086887
Now doing lookback 100000 
l: 1500, window: 100000, 
thres: 1.0, sell_thres: -0.5, safe to kill kernel


In [23]:
df = pd.DataFrame(d)

In [24]:
df.sort_values("result", ascending=False).head(20)

Unnamed: 0,l,thres,sell_thres,trades,result
135,1800,0.6,0.8,1950,3179701.0
141,1800,0.7,0.8,1919,3099943.0
140,1800,0.7,0.7,2188,2942029.0
134,1800,0.6,0.7,2252,2883720.0
181,2000,0.5,0.6,2498,2835383.0
129,1800,0.5,0.8,1960,2771383.0
128,1800,0.5,0.7,2298,2727635.0
175,2000,0.4,0.6,2548,2561568.0
123,1800,0.4,0.8,1972,2531659.0
117,1800,0.3,0.8,1981,2411582.0


In [29]:
d = {"lm":[], "ls":[], "thres":[], "sell_thres":[], "trades":[], "result":[]}
for lm in [1600, 1700, 1800, 1900, 2000]:
    for ls in [1600, 1700, 1800, 1900, 2000]:
        for thres in [0.6, 0.7, 0.8]:
            for sell_thres in [0.6, 0.7, 0.8]:
                if sell_thres <= -thres:
                    continue
                print(f"Now doing lm: {lm}, ls: {ls}, thres: {thres}, sell_thres: {sell_thres}, safe to kill kernel")
                result, trades = get_result(lm, ls, thres, sell_thres)
                print(f"DON'T KILL KERNEL, ADDING TRADES")
                time.sleep(1)
                d["trades"].append(trades)
                d["result"].append(result)
                d["lm"].append(lm)
                d["ls"].append(ls)
                d["thres"].append(thres)
                d["sell_thres"].append(sell_thres)
                clear_output()
                print(pd.DataFrame(d).sort_values("result", ascending=False).iloc[0])


lm            1.800000e+03
ls            1.900000e+03
thres         6.000000e-01
sell_thres    7.000000e-01
trades        2.187000e+03
result        3.226238e+06
Name: 118, dtype: float64


In [30]:
df = pd.DataFrame(d)

In [31]:
df.sort_values("result", ascending=False).head(20)

Unnamed: 0,lm,ls,thres,sell_thres,trades,result
118,1800,1900,0.6,0.7,2187,3226238.0
110,1800,1800,0.6,0.8,1950,3179701.0
113,1800,1800,0.7,0.8,1919,3099943.0
127,1800,2000,0.6,0.7,2110,2980127.0
112,1800,1800,0.7,0.7,2188,2942029.0
109,1800,1800,0.6,0.7,2252,2883720.0
95,1800,1600,0.7,0.8,2055,2877989.0
121,1800,1900,0.7,0.7,2113,2836844.0
74,1700,1900,0.6,0.8,1924,2686988.0
172,1900,2000,0.6,0.7,2061,2661213.0
