# Backtesting: Bet Sizing

### Loading Libraries

In [1]:
# Randomness
import random

# Numerical Computing
import numpy as np

# Data Manipulation
import pandas as pd
from pandas import Timestamp

# Data Visualization
import seaborn as sns
import matplotlib.pyplot as plt

import plotly.graph_objects as go
import plotly.io as pio
%matplotlib inline

# Date & Time
from datetime import datetime, timedelta

# Typing
from typing import Tuple, List, Dict, Union, Optional, Any, Generator

# Scikit-Learn
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, plot_roc_curve

# Scientific Statistical Python
from scipy.stats import jarque_bera
from scipy.stats import rv_continuous, kstest

### Averaging Active Bets
#### Bets are Average as long as They're still Active

In [2]:
def avg_active_signals_(signals: pd.DataFrame, molecule: np.ndarray) -> pd.Series:
    out = pd.Series()
    for loc in molecule:
        df0 = (signals.index.values <= loc) & ((loc < signals['t1']) | pd.isnull(signals['t1']))
        act = signals[df0].index
        if len(act) > 0:
            out[loc] = signals.loc[act, 'signal'].mean()
        else:
            out[loc] = 0    # no signals active at this time
    return out

In [3]:
def avg_active_signals(signals: pd.DataFrame) -> pd.Series:
    tPnts = set(signals['t1'].dropna().values)
    tPnts = tPnts.union(signals.index.values)
    tPnts = sorted(list(tPnts))
    out = avg_active_signals_(signals=signals, molecule=tPnts)
    return out

### Size Discretization
#### To Prevent Overtrading

In [4]:
def discrete_signal(signal0: pd.Series, stepSize: float) -> pd.Series:
    signal1 = (signal0 / stepSize).round() * stepSize    # discretize
    signal1[signal1 > 1] = 1    # cap
    signal1[signal1 < -1] = -1    # floor
    return signal1

#### Dynamic Position Size & Limit Price

In [5]:
def bet_size(x: float, w: float) -> float:
    return x * (w + x ** 2) ** (-0.5)


def get_target_pos(w: float, f: float, mP: float, maxPos: float) -> float:
    return int(bet_size(w, f - mP) * maxPos)


def inv_price(f: float, w: float, m: float) -> float:
    return f - m * (w / (1 - m**2)) ** 0.5


def limit_price(tPos: float, pos: float, f: float, w: float, maxPos: float) -> float:
    sgn = (1 if tPos >= pos else -1)
    lP = 0
    for j in range(abs(pos + sgn), abs(tPos + 1)):
        lP += inv_price(f, w, j / float(maxPos))
    lP /= tPos - pos
    return lP


def get_w(x: float, m: float):
    return x ** 2 * (m**(-2) - 1)

In [6]:
def get_num_conc_bets_by_date(date: Timestamp, signals: pd.DataFrame) -> Tuple[int, int]:
    long, short = 0, 0
    for ind in pd.date_range(start=max(signals.index[0], date - timedelta(days=25)), end=date):
        if ind <= date and signals.loc[ind]['t1'] >= date:
            if signals.loc[ind]['signal'] >= 0:
                long += 1
            else:
                short += 1
    return long, short