# Probabilistic Signal Estimator

The question we want to answer is the following. How long does it take to reach +2% gain from now?

In [1]:
from hypecommons import *
from hypecommons import plot as hyplot

from datetime import datetime, timedelta

In [2]:
CRYPTO = 'BNB'
FIAT = 'BUSD'
SYMBOL = f"{CRYPTO}{FIAT}"
FREQ = 1
START = '20210101000000'
TARGET = 1.02
DAYS = 150

In [3]:
from IPython.display import display, HTML
display(HTML(f"<center><img style='display: flex;' src='../img/symbols/{CRYPTO}.png'></center>"))

In [4]:
df = download_history_fast(SYMBOL, START, freq=FREQ, days=DAYS)

OSError: [Errno 22] Invalid argument

In [None]:
print(str(df.iloc[0]) + '\n\n' + str(df.iloc[-1]))

As the goal is to achieve a given value, the `high` price value is used. Binance allows users to create future orders with a limit, therefore this behaviour can be reproduced also in a real-time setting.

In [None]:
def minutes_to_target(df, start, target):
    t0 = df.index[start]
    close0 = df['close'].iloc[start]
    try:
        t1 = df[(df.index > t0) & (df['high'] >= target * close0)].index[0]
    except IndexError:
        return None, None # it never achieves the target
    return t1, round((t1 - t0).total_seconds() / 60.0)

In [None]:
x0 = minutes_to_target(df, 0, target=TARGET)
print(x0)
df.iloc[int(x0[1]/FREQ)]['high'] / df.iloc[0]['close']

In [None]:
mins2tgt = []

rng = range(len(df))[::100]

for i in tqdm(rng, desc='Testing minutes to target...'):
    t, y = minutes_to_target(df, i, target=TARGET)
    if y:
        mins2tgt.append(y)

In [None]:
df_m = pd.DataFrame(mins2tgt)
pos, total = len(df_m[df_m[0] <= 600]), len(df_m)
print(f"In {pos} cases out of {total} (~{pos/total*100:.0f}%), 2% gain is reached after less than 10 hours.")

In [None]:
import matplotlib.pyplot as plt

plt.rcParams.update({'figure.figsize': (7,5), 'figure.dpi': 100})

n_bins = 200
hist_x, hist_y, hist_z = plt.hist(mins2tgt, bins=n_bins)

print(f"First 10 bins: {hist_x[:10]}")

In [None]:
print(f"Each bin represents around 60000/{n_bins} = {60000/n_bins} minutes = {60000/n_bins/60} hours of time,\n"
      f"which means {sum(hist_x[:1])/len(mins2tgt)*100:.1f}% of the time we reach +2% gain within {60000/n_bins/60} hours,\n"
      f"{sum(hist_x[:2])/len(mins2tgt)*100:.1f}% of the time within {2*60000/n_bins/60} hours, and so on.")


In [None]:
def ema(data, n):
    alpha = 2 / (1 + n)
    return data.ewm(alpha=alpha, adjust=False).mean()

def normalise(df_orig):
    df = df_orig.copy()
    
    fields = list(df)
    
    for field in fields:
        if field in ['volume', 'trades']:
            for ma in [1, 3, 9]:
                df[f"{field}_pm_ma{ma}"] = df[field].rolling(window=ma).mean() / FREQ
        else:
            if field != 'close':
                df[f"{field}_norm"] = df[field] / df['close']

#         if field != 'close':
#             df.drop(field, axis=1, inplace=True)
    
    for x in [50, 200]:
        df[f"close_ma{x}_norm"] = df['close'].rolling(window=x).mean() / df['close']
    
    for x in [12, 26]:
        df[f"close_ema{x}_norm"] = ema(df['close'], x) / df['close']
    
#     df.drop('close', axis=1, inplace=True)
    return df

In [None]:
df_n = normalise(df)
df_n

In [None]:
from joblib import Parallel, delayed  

def compute_mins(i):
    t, y = minutes_to_target(df, i, target=TARGET)
    if y:
        return y
    else:
        return 120000

mins2tgt = Parallel(n_jobs=16)(delayed(compute_mins)(i) for i in tqdm(range(len(df))))

df_n['mins2tgt'] = mins2tgt

In [None]:
df.to_csv(f"{SYMBOL}-{START}-{FREQ}-{DAYS}-probabilistic-raw-{TARGET}.csv")
df_n.to_csv(f"{SYMBOL}-{START}-{FREQ}-{DAYS}-probabilistic-norm-{TARGET}.csv")

In [None]:
df_n.dropna(inplace=True)
df_n

In [None]:
df_n[df_n['mins2tgt'] == 120000]

In [None]:
signal_cat = ['VERY HIGH', 'HIGH', 'NORMAL', 'LOW', 'VERY LOW']
signal_thr = [0.2, 0.4, 0.6, 0.8, 1.0]

def find_quantiles(df_q):
    quantiles = []
    for c, q in zip(signal_cat, signal_thr):
        quantiles.append({'cat': c, 'quantile': q, 'value': df_q['mins2tgt'].quantile(q)})
    return quantiles

quantiles = find_quantiles(df_n)
quantiles

In [None]:
print(f"Everything higher than {quantiles[-2]['value']} will be treated as VERY LOW, therefore we can remove\n"
      f"the last rows within such interval.")

In [None]:
df_n[df_n.index >= df_n.index[-1]]

In [None]:
remove_after_this = df_n.index[-1] - timedelta(minutes=quantiles[-2]['value'])
df_ml = df_n[df_n.index < remove_after_this].copy()
df_ml

In [None]:
df_ml.to_csv(f"{SYMBOL}-{START}-{FREQ}-{DAYS}-probabilistic-ml-{TARGET}.csv")

## Train model

A few data visualisations...

In [None]:
plot(plt.plot, df_n.iloc[2000:4000], ['mins2tgt'])

In [None]:
plot(plt.plot, df.dropna().iloc[2000:4000], ['high'])

In [None]:
# p = 3009
# print(df.dropna().iloc[p])

# print(f"\ntarget: {df.dropna().iloc[p]['close'] * 1.02}")

# df[df.index == df.dropna().index[p] + timedelta(minutes=df.dropna().iloc[p]['mins2tgt'])]

In [None]:
df['mins2tgt'] = df_n['mins2tgt']

In [None]:
df_n

## Load from file

In [None]:
from hypecommons import *
from hypecommons import plot as hyplot

from datetime import datetime, timedelta

In [None]:
import pandas as pd

SYMBOL = 'BNBBUSD'
FREQ = 1
START = '20210101000000'
TARGET = 1.02
DAYS = 150

df = download_history_fast(SYMBOL, START, freq=FREQ, days=DAYS)

df_ml = pd.read_csv(f"{SYMBOL}-{START}-{FREQ}-{DAYS}-probabilistic-ml-{TARGET}.csv").set_index('Unnamed: 0')
df_ml = df_ml.set_index(pd.to_datetime(df_ml.index))
df_ml

### TODO this is where it should be

In [None]:
signal_cat = ['VERY HIGH', 'HIGH', 'NORMAL', 'LOW', 'VERY LOW']
signal_thr = [0.2, 0.4, 0.6, 0.8, 1.0]

def find_quantiles(df_q):
    quantiles = []
    for c, q in zip(signal_cat, signal_thr):
        quantiles.append({'cat': c, 'quantile': q, 'value': df_q['mins2tgt'].quantile(q)})
    return quantiles

quantiles = find_quantiles(df_ml)
quantiles

In [None]:
df_ml['cat'] = None

def to_cat(x):
    for q in reversed(quantiles):
        if x <= q['value']:
            res = q['cat']
    return res

df_ml['cat'] = df_ml['mins2tgt'].map(to_cat)
df_ml

In [None]:
str(list(df_ml.columns))

In [None]:
feat_cols = ['open_norm', 'high_norm', 'low_norm', 'volume_pm_ma1', 'volume_pm_ma3', 
             'volume_pm_ma9', 'trades_pm_ma1', 'trades_pm_ma3', 'trades_pm_ma9', 
             'close_ma50_norm', 'close_ma200_norm', 'close_ema12_norm', 'close_ema26_norm']
df_ml_feats = df_ml[feat_cols]

## Dimensionality reduction and visualisation

In [None]:
# # apply dimensionality reduction

# # from sklearn.decomposition import PCA
# # pca = PCA(n_components=2)
# # dimred_result = pca.fit_transform(df_ml_feats)

# from sklearn.manifold import TSNE
# tsne = TSNE(n_components=2, verbose=1, perplexity=40, n_iter=250)
# dimred_result = tsne.fit_transform(df_ml_feats)

# import numpy as np
# import math
# import matplotlib.pyplot as plt

# plt.figure(figsize=(16,10))

# to_colour = {}
# for q in quantiles:
#     to_colour[q['cat']] = len(to_colour)

# colours = [to_colour[x] for x in list(df_ml['cat'])]

# plt.scatter(dimred_result[:,0], dimred_result[:,1], c=colours)
# plt.legend(loc="upper left")

In [None]:
len(df_ml_feats)

## Multiclass model training

In [None]:
# import numpy as np
# from sklearn.pipeline import make_pipeline
# from sklearn.preprocessing import StandardScaler
# from sklearn.svm import SVC

# TRAINING_SIZE = 100000

# clf = make_pipeline(StandardScaler(), SVC(gamma='auto'))
# clf.fit(df_ml_feats[:TRAINING_SIZE], df_ml['cat'][:TRAINING_SIZE])

In [None]:
# from sklearn.metrics import confusion_matrix, precision_recall_fscore_support

# def evaluate():
#     # evaluate on last N examples
#     N = 20000
#     labels = list(reversed(signal_cat))
#     test = df_ml_feats.tail(N)
#     actual = df_ml['cat'].tail(N)
#     pred = clf.predict(test)
#     cm = confusion_matrix(actual, pred, labels=labels)
#     prf = precision_recall_fscore_support(actual, pred, labels=labels)
#     print(f"{cm}\n")
#     T = 10
#     print("          \t" + "\t".join(labels).expandtabs(T))
#     print("Precision:\t" + "\t".join([f"{x:.3f}" for x in prf[0]]).expandtabs(T))
#     print("Recall:   \t" + "\t".join([f"{x:.3f}" for x in prf[1]]).expandtabs(T))
#     print("F-Score:  \t" + "\t".join([f"{x:.3f}" for x in prf[2]]).expandtabs(T))

# evaluate()

In [None]:
# df_ml_feats

In [None]:
# df_ml

In [None]:
# df_plot

## Signal detection

Each dot is a moment between Jan 1st and June 1st this year. The blue dots are moments with a high buy signal where the price went +2% in less than 30 hours (`VERY HIGH`, `HIGH`, `NORMAL`, and `LOW` categories). My aim is to detect them automatically.

In [None]:
# filter out y=120k (which means 'never reaches +2%') and y>40k (exceptionally bad points)
# df_plot = df_ml[df_ml['mins2tgt'] < 40000].copy()
df_plot = df_ml.copy()

good_signals = ['VERY HIGH', 'HIGH', 'NORMAL', 'LOW']

In [None]:
import matplotlib.pyplot as plt

for fc in feat_cols:
    print(fc)
    # red dots = idle signals
    plt.scatter(df_plot[~df_plot['cat'].isin(good_signals)][fc], 
                df_plot[~df_plot['cat'].isin(good_signals)]['mins2tgt'])
    # blue dots = buy signals
    plt.scatter(df_plot[df_plot['cat'].isin(good_signals)][fc], 
                df_plot[df_plot['cat'].isin(good_signals)]['mins2tgt'])
    plt.show()

In [None]:
df_plot = df_plot.join(df[['close']])

In [None]:
def rsi(data, n):
    # Make the positive gains (up) and negative gains (down) Series
    delta = data.diff()
    up, down = delta.copy(), delta.copy()
    up[up < 0] = 0
    down[down > 0] = 0

    # Calculate the EWMA
    roll_up1 = up.ewm(span=n).mean()
    roll_down1 = down.abs().ewm(span=n).mean()

    # Calculate the RSI based on EWMA
    rs1 = roll_up1 / roll_down1
    return 100.0 - (100.0 / (1.0 + rs1))

df_plot['rsi14'] = rsi(df_plot['close'], 14)

for fc in ['rsi14']:
    print(fc)
    # red dots = idle signals
    plt.scatter(df_plot[~df_plot['cat'].isin(good_signals)][fc], 
                df_plot[~df_plot['cat'].isin(good_signals)]['mins2tgt'])
    # blue dots = buy signals
    plt.scatter(df_plot[df_plot['cat'].isin(good_signals)][fc], 
                df_plot[df_plot['cat'].isin(good_signals)]['mins2tgt'])
    plt.show()

Check what happened right before the rightmost blue moments in the chart.

In [None]:
# for t0 in df_ml[df_ml['close_ema26_norm'] > 1.05].index:
#     print(f"{df.loc[t0]}\n")
#     plot(plt.plot, df[df.index <= t0].tail(26), ['close', 'high'])
# plt.show()

## Threshold study

In [None]:
# relevants = len(df_ml['cat'].isin(good_signals))

def threshold_study(field, val_min, val_max, target):
    thrs = np.arange(val_min, val_max, 0.005)
    precs = []
    for thr in thrs:
        goods = len(df_ml[(df_ml[field] >= thr) & (df_ml['cat'].isin(good_signals))])
        bads = len(df_ml[(df_ml[field] >= thr) & (~df_ml['cat'].isin(good_signals))])
        prec = goods / (goods + bads) if goods + bads > 0 else 0
#         rec = goods / relevants
        precs.append(prec if target == 'p' else goods / DAYS)
    
    plt.plot(thrs, precs, label=field)
    if target == 's':
        plt.yscale('log')

In [None]:
threshold_study('close_ema26_norm', 1.01, 1.2, 'p')
threshold_study('close_ma200_norm', 1.01, 1.2, 'p')
plt.legend(loc="best")
plt.title(f"Precision for each field")
plt.show()

threshold_study('close_ema26_norm', 1.01, 1.2, 's')
threshold_study('close_ma200_norm', 1.01, 1.2, 's')
plt.legend(loc="best")
plt.title(f"Support per day for each field")
plt.show()

## Binary model training

In [None]:
df_ml_feats_subset = df_ml_feats[['close_ema26_norm', 'close_ma200_norm']].copy()
df_ml_feats_binary = pd.DataFrame(df_ml['cat'].isin(good_signals))

In [None]:
# import numpy as np
# from sklearn.pipeline import make_pipeline
# from sklearn.preprocessing import StandardScaler
# from sklearn.svm import SVC

# N = 100000
# clf = make_pipeline(StandardScaler(), SVC(gamma='auto'))
# clf.fit(df_ml_feats_subset.head(N), df_ml_feats_binary.head(N))

In [None]:
# x_min, x_max = 0.5, 1.5
# y_min, y_max = 0.5, 1.5
# h = .005  # step size in the mesh
# xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
#                      np.arange(y_min, y_max, h))

# # Plot the decision boundary. For that, we will assign a color to each
# # point in the mesh [x_min, x_max]x[y_min, y_max].
# Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])

# # Put the result into a color plot
# Z = Z.reshape(xx.shape)
# plt.figure(figsize=(16,10))
# plt.contourf(xx, yy, 1-Z, cmap=plt.cm.coolwarm)

# #     # Plot also the training points
# #     plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.coolwarm)
# plt.xlabel('close_ema26_norm')
# plt.ylabel('close_ma200_norm')
# plt.xlim(xx.min(), xx.max())
# plt.ylim(yy.min(), yy.max())
# plt.title('SVC')

# plt.show()

In [None]:
from collections import Counter

# manually set thresholds
thr_x, thr_y = 1.02, 1.14

tp, fp = 0, 0

true_positive = []

for (_, X), (_, y) in tqdm(zip(df_ml_feats_subset.iterrows(), df_ml_feats_binary.iterrows()), 
                           desc='Evaluating precision...', total=len(df_ml_feats_subset), ncols=100):
#     print(X['close_ema26_norm'], X['close_ma200_norm'], y['cat'])
    if X['close_ema26_norm'] >= thr_x or X['close_ma200_norm'] >= thr_y:
        if y['cat']:
            tp += 1
            true_positive.append(True)
        else:
            fp += 1
            true_positive.append(False)
    else:
        true_positive.append(False)

df_ml_feats_subset['tp'] = true_positive
df_tp = df_ml_feats_subset[df_ml_feats_subset['tp'] == True].copy()
df_tp['date'] = pd.to_datetime(df_tp.index)

evaluation = {'pre': tp/(tp+fp), 'tp': tp, 'fp': fp, 'tp/day': tp/DAYS}

print(evaluation)

c = Counter()

for i, row in df_tp.iterrows():
    c[(row['date'].year, row['date'].month, row['date'].day)] += 1

print(f"Unique days with prospect of +2%: {len(c)}")
print(f"Total predicted change: {(1.02 ** len(c) - 1) * evaluation['pre']:+.1%}")

start = pd.to_datetime(df_ml_feats_subset.index[0])
end = pd.to_datetime(df_ml_feats_subset.index[-1])

t = start
datedist = []

while True:
    datedist.append({'date': datetime(t.year, t.month, t.day), 'tp': c[(t.year, t.month, t.day)]})
    t = t + timedelta(days=1)
    if t >= end:
        break

df_datedist = pd.DataFrame.from_records(datedist, index='date')

plot(plt.bar, df_datedist, ['tp'], bar_size=.9)
plt.show()

plot(plt.plot, df[::60], ['close'])

In [None]:
# def is_good_signal(X, Y, decfun=False):
#     Tx = -0.809
#     Ty = -0.234
#     F = (12*(X+Tx) - 3*(Y+Ty))**2 + (X+Tx) + (Y+Ty) - 1
#     return F if decfun else F >= 0

from math import cos, sin, pi

def is_good_signal(X, Y, decfun=False):
    A = .012
    B = .046
    x = X - .998
    y = Y - .995
    alpha = -pi/14
    F = (x * cos(alpha) + y * sin(alpha)) ** 2 / A ** 2 + (x * sin(alpha) - y * cos(alpha)) ** 2 / B ** 2 - 1
    return F if decfun else F >= 0

plt.figure(figsize=(12, 12))

bad_signals = ['VERY LOW']

palette = plt.rcParams['axes.prop_cycle'].by_key()['color']  
to_col = {1: good_signals, 0: bad_signals}

delta = 0.001
xy_min, xy_max = 0.9, 1.1
xrange = np.arange(xy_min, xy_max, delta)
yrange = np.arange(xy_min, xy_max, delta)
X, Y = np.meshgrid(xrange,yrange)

F = is_good_signal(X, Y, decfun=True)
plt.contour(X, Y, F, [0])

for c, cl in to_col.items():
    selection = df_ml['cat'].isin(cl)
    plt.scatter(df_ml_feats[selection]['close_ema26_norm'], 
                df_ml_feats[selection]['close_ma200_norm'], 
                s=3, c=palette[c], label=cl)

plt.legend(loc="upper left")
plt.title("close_ema26_norm vs close_ma200_norm")
plt.gca().set_xlim([xy_min, xy_max])
plt.gca().set_ylim([xy_min, xy_max])
plt.show()

In [None]:
tp, fp = 0, 0

true_positive = []

for (_, X), (_, y) in tqdm(zip(df_ml_feats_subset.iterrows(), df_ml_feats_binary.iterrows()), 
                           desc='Evaluating precision...', total=len(df_ml_feats_subset), ncols=100):
#     print(X['close_ema26_norm'], X['close_ma200_norm'], y['cat'])
    if is_good_signal(X['close_ema26_norm'], X['close_ma200_norm']):
        if y['cat']:
            tp += 1
            true_positive.append(True)
        else:
            fp += 1
            true_positive.append(False)
    else:
        true_positive.append(False)

df_ml_feats_subset['tp'] = true_positive
df_tp = df_ml_feats_subset[df_ml_feats_subset['tp'] == True].copy()
df_tp['date'] = pd.to_datetime(df_tp.index)

evaluation = {'pre': tp/(tp+fp), 'tp': tp, 'fp': fp, 'tp/day': tp/DAYS}

print(evaluation)

c = Counter()

for i, row in df_tp.iterrows():
    c[(row['date'].year, row['date'].month, row['date'].day)] += 1

print(f"Unique days with prospect of +2%: {len(c)}")
# print(f"Total predicted change: {(1.02 ** len(c) - 1) * evaluation['pre']:+.1%}")

start = pd.to_datetime(df_ml_feats_subset.index[0])
end = pd.to_datetime(df_ml_feats_subset.index[-1])

t = start
datedist = []

while True:
    datedist.append({'date': datetime(t.year, t.month, t.day), 'tp': c[(t.year, t.month, t.day)]})
    t = t + timedelta(days=1)
    if t >= end:
        break

df_datedist = pd.DataFrame.from_records(datedist, index='date')

plot(plt.bar, df_datedist, ['tp'], bar_size=.9)
plt.show()

plot(plt.plot, df[::60], ['close'])

## Simulation & stop loss optimisation

In [None]:
import pandas as pd
from math import cos, sin, pi

# def is_good_signal(X, Y, decfun=False):
#     Tx = -0.809
#     Ty = -0.234
#     F = (12*(X+Tx) - 3*(Y+Ty))**2 + (X+Tx) + (Y+Ty) - 1
#     return F if decfun else F >= 0

def is_good_signal(X, Y, decfun=False):
    A = .012
    B = .046
    x = X - .998
    y = Y - .995
    alpha = -pi/14
    F = (x * cos(alpha) + y * sin(alpha)) ** 2 / A ** 2 + (x * sin(alpha) - y * cos(alpha)) ** 2 / B ** 2 - 1
    return F if decfun else F >= 0

SYMBOL = 'BNBBUSD'
FREQ = 1
START = '20210601000000'
TARGET = 1.02
DAYS = 153
# START = '20211001000000'
# TARGET = 1.02
# DAYS = 23

df_sim = download_history_fast(SYMBOL, START, freq=FREQ, days=DAYS)

In [None]:
df_sim.dropna(inplace=True)

df_sim = normalise(df_sim)

df_sim['pred'] = np.vectorize(is_good_signal)(df_sim['close_ema26_norm'], df_sim['close_ma200_norm'])

df_sim = df_sim.set_index(pd.to_datetime(df_sim.index))

# ------- simulation -------

STOP = 0.97
HRS_FROZEN = 0 # 0

fees = 0.001
stop_loss = 1 * STOP

In [None]:
df_sim = simulate(df_sim)

In [None]:
df_sim[df_sim['action'].isin(['PROFIT', 'LOSS'])][
    ['close', 'close_ema26_norm', 'close_ma200_norm', 'stake', 'action']]

In [None]:
df_sim[df_sim['action'].isin(['PROFIT', 'LOSS'])]['action'].str.get_dummies().sum().plot(
    kind='pie', label='action', autopct='%1.0f%%', figsize=[5, 5])

In [None]:
plot(plt.plot, df_sim, ['stake'], fig_size=(16, 8))

In [None]:
plot(plt.plot, df_sim, ['close'], fig_size=(16, 8))

## Test of time

In [None]:
from math import cos, sin, pi
from datetime import timedelta

def ema(data, n):
    alpha = 2 / (1 + n)
    return data.ewm(alpha=alpha, adjust=False).mean()

def normalise(df_orig):
    df = df_orig.copy()
    
    fields = list(df)
    
    for field in fields:
        if field in ['volume', 'trades']:
            for ma in [1, 3, 9]:
                df[f"{field}_pm_ma{ma}"] = df[field].rolling(window=ma).mean() / FREQ
        else:
            if field != 'close':
                df[f"{field}_norm"] = df[field] / df['close']

    for x in [50, 200]:
        df[f"close_ma{x}_norm"] = df['close'].rolling(window=x).mean() / df['close']
    
    for x in [12, 26]:
        df[f"close_ema{x}_norm"] = ema(df['close'], x) / df['close']
    
    return df

def simulate(df_sim, target=1.02, stop=0.94, hrs_frozen=0, fees=0.001):
    stake = [1]
    action = []
    invested = False
    frozen = False
    for i, row in tqdm(df_sim.iterrows(), desc='Simulating...', ncols=100, total=len(df_sim)):
        if frozen and i >= last_loss + timedelta(hours=hrs_frozen):
            frozen = False
        if frozen and i < last_loss + timedelta(hours=hrs_frozen):
            stake.append(stake[-1])
            action.append('FROZEN')
        elif row['pred'] and not invested:
            stake.append(stake[-1] * (1-fees))
            invested = True
            take_profit = row['close'] * target
            stop_loss = row['close'] * stop
            action.append('BUY')
        elif invested:
            if row['high'] > take_profit:
                invested = False
                action.append('PROFIT')
                stake.append(stake[-1] / prev_close * take_profit)
            elif row['low'] < stop_loss:
                invested = False
                action.append('LOSS')
                stake.append(stake[-1] / prev_close * stop_loss)
                last_loss = i
                frozen = True
            else:
                stake.append(stake[-1] / prev_close * row['close'])
                action.append('----')
        else:
            stake.append(stake[-1])
            action.append('----')
        prev_close = row['close']
    df_sim['stake'] = stake[1:]
    df_sim['action'] = action
    return df_sim

In [None]:
from hypecommons import *

def soru_viga_ellipse(X, Y, decfun=False):
    A = .012
    B = .046
    x = X - .998
    y = Y - .995
    alpha = -pi/14
    F = (x * cos(alpha) + y * sin(alpha)) ** 2 / A ** 2 + (x * sin(alpha) - y * cos(alpha)) ** 2 / B ** 2 - 1
    return F if decfun else F >= 0

def run_simulation_on_data(df_sim, signal_function, target=1.02, stop=0.94, hrs_frozen=0, fees=0.001):
    df_sim = normalise(df_sim)
    df_sim.dropna(inplace=True)
    df_sim['pred'] = np.vectorize(signal_function)(df_sim['close_ema26_norm'], df_sim['close_ma200_norm'])
    df_sim = df_sim.set_index(pd.to_datetime(df_sim.index))

    # ------- simulation -------
    df_sim = simulate(df_sim, target=target, stop=stop, hrs_frozen=hrs_frozen, fees=fees)

    display(df_sim[df_sim['action'].isin(['PROFIT', 'LOSS'])][
        ['close', 'close_ema26_norm', 'close_ma200_norm', 'stake', 'action']])

    df_sim[df_sim['action'].isin(['PROFIT', 'LOSS'])]['action'].str.get_dummies().sum().plot(
        kind='pie', label='action', autopct='%1.0f%%', figsize=[5, 5])
    plt.show()

    plot(plt.plot, df_sim, ['stake'], fig_size=(16, 6))
    plot(plt.plot, df_sim, ['close'], fig_size=(16, 6))
    
    return df_sim

def run_simulation(symbol, start, days, signal_function, freq=1, target=1.02, stop=0.94, hrs_frozen=0, fees=0.001):
    df_sim = download_history_fast(symbol, start, freq=freq, days=days)
    return run_simulation_on_data(df_sim, signal_function, target=target, stop=stop, hrs_frozen=hrs_frozen, fees=fees)

In [None]:
_ = run_simulation('BNBBUSD', '20210601000000', 153, soru_viga_ellipse, freq=1, 
                   target=1.02, stop=0.94, hrs_frozen=0, fees=0.001)

In [None]:
_ = run_simulation('BNBUSDT', '20171225000000', 365+7, soru_viga_ellipse, freq=1, 
                   target=1.02, stop=0.94, hrs_frozen=0, fees=0.001)

In [None]:
df_sim = download_history_fast('BNBUSDT', '20171225000000', freq=1, days=365+7)

df_sim.drop(df_sim[df_sim.index < datetime(2018, 8, 1)].index, inplace=True)

run_simulation_on_data(df_sim, soru_viga_ellipse, target=1.01, stop=0.94, hrs_frozen=0, fees=0.001)

In [None]:
df_sim = download_history_fast('BNBUSDT', '20171225000000', freq=1, days=365+7)

df_sim = run_simulation_on_data(df_sim, soru_viga_ellipse)

df_sim['close_stdev_norm'] = df_sim['close'].rolling(window=200).std() / df_sim['close'] * 100
df_sim['close_stdev_ema200_norm'] = ema(df_sim['close_stdev_norm'], 20000)

df_sim.dropna(inplace=True)

In [None]:
plot(plt.plot, df_sim, ['close_stdev_ema200_norm'], fig_size=(16, 6), baseline=0.6)

In [None]:
df_sim['stake_diff_norm'] = df_sim[::60*24]['stake'].diff()
df_sim[df_sim['stake_diff_norm'].notna()][['stake_diff_norm']]

In [None]:
plot(plt.plot, df_sim[::60*24], ['stake_diff_norm'], fig_size=(16, 6))