In [None]:
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPRegressor

In [None]:
line_color = '#f1f5f9'
font_color = '#f1f5f9'
background_color = '#032137'

layout_dict = dict(
    font_color=font_color,
    paper_bgcolor='rgba(0,0,0,0)',
    plot_bgcolor='rgba(0,0,0,0)',
    xaxis = dict(gridcolor=line_color, linecolor=line_color, zerolinecolor=line_color),
    yaxis = dict(gridcolor=line_color, linecolor=line_color, zerolinecolor=line_color),
    legend=dict(x=0, y=1, bgcolor=background_color),
    margin=dict(r=0, l=0, b=0)
)

tablet_args = dict(default_width='700px', default_height='300px')
mobile_args = dict(default_width='400px', default_height='175px')

In [None]:
def bound_array(a, bounds=[0, 1]):
    s = a.copy()

    # Check number of times it breaks bounds
    signed_crosses = np.floor((s - bounds[0]) / (bounds[1] - bounds[0])).astype(int)
    crosses = np.floor(np.where(signed_crosses < 0, -signed_crosses, signed_crosses)).astype(int)

    # Reorient direction
    s *= ((-1) ** crosses)

    # Reposition to maintain continuity
    s += (bounds[1] - bounds[0]) * signed_crosses * (-1) ** (crosses + 1)
    s += (bounds[1] + bounds[0]) * 0 ** ((crosses + 1) % 2)

    return s

def non_stationary_bounded_random_walk(initial=1/2, bounds=[0, 1], size=[1000, 1], drift=0, std=0.001, std_bounds=[0, 0.05]):
    # Generate stds
    std_array = abs(np.random.normal(loc=drift, scale=std, size=size).cumsum(0) + std)
    std_array = bound_array(std_array, std_bounds)   

    # Generate random walk initialized on 0
    walk = np.random.normal(loc=drift, scale=std_array, size=size).cumsum(0) + initial
    walk = bound_array(walk, bounds)        

    return walk

# Nice example plot
walks = non_stationary_bounded_random_walk(size=1000, drift=0.001)

fig = make_subplots(specs=[[{"secondary_y": True}]])

fig.add_trace(go.Scatter(y=walks, opacity=0.5, name='Random Walk'), secondary_y=False)
# fig.add_trace(go.Scatter(y=std_array, opacity=0.5, name='Std'), secondary_y=True)

fig.update_layout(**layout_dict, title='Non Stationary Random Walk')
fig.update_yaxes(range=[-0.2, 1.2], secondary_y=False)
fig.update_yaxes(range=[-0.01, 0.06], secondary_y=True)
fig.show()

# Approximating Standard Deviation

In [None]:
def apply_rolling_function(array, window, func):
    shape = list(array.shape)
    res = np.zeros(shape)

    for i in range(shape[0]):
        res[i] = func(array[max(0, i - window):min(shape[0], i + window)], axis=0)

    return res

def step_mean(array, alpha, init=0.5):
    N = array.shape
    means = alpha * np.ones(N) * init + (1 - alpha) * array[0]

    for i in range(1, N[0]):
        means[i] = alpha * means[i - 1] + (1 - alpha) * array[i]

    return means


In [None]:
def generate_X(games_results: np.array, windows: list = [5, 10, 20, 50, 100], alphas: list = [0.85, 0.9, 0.95, 0.975, 0.99]):
    # Generate indexes
    shape = games_results.shape
    random_walk_ids = np.log(1 + np.array(list(range(shape[0])) * shape[1])).tolist()

    # Calculate window means, stds
    window_means = np.empty([len(windows), 0]).tolist()
    window_stds = np.empty([len(windows), 0]).tolist()
    for i in range(len(windows)):
        window_means[i] = apply_rolling_function(games_results, windows[i], np.mean).T
        window_stds[i] = apply_rolling_function(games_results, windows[i], np.std).T

    # Calculate step means, stds
    step_means = np.empty([len(alphas), 0]).tolist()
    step_stds = np.empty([len(alphas), 0]).tolist()
    for i in range(len(alphas)):
        step_means[i] = step_mean(games_results, alphas[i])
        step_stds[i] = apply_rolling_function(step_means[i], 10, np.std).T
        step_means[i] = step_means[i].T
    X = np.array(
        random_walk_ids +
        np.array(games_results).flatten().tolist() +
        np.array(window_means).flatten().tolist() +
        np.array(window_stds).flatten().tolist() +
        np.array(step_means).flatten().tolist() +
        np.array(step_stds).flatten().tolist()
    ).reshape([2 + 2 * len(windows) + 2 * len(alphas), -1])

    return X



def data_generator(n_par: int, n_runs: int, depth: int, windows: list = [5, 10, 20, 50, 100], alphas: list = [0.85, 0.9, 0.95, 0.975, 0.99]):
    # Controlling variables
    N = [depth, n_runs]
    params = zip(np.random.rand(n_par), np.random.rand(n_par) * 0.001, np.random.rand(n_par) * 0.1)
    params = [
        {
            'initial': item[0],
            'drift': item[1],
            'std': item[2]
        } for item in params
    ]

    y = []
    X = np.empty([2 + 2 * len(windows) + 2 * len(alphas), 0]).tolist()
    for param in params:
        # Generate Y
        p = non_stationary_bounded_random_walk(**param, size=N)
        y += p.T.flatten().tolist()

        # Calculate games results
        games_results = (np.random.random(N) <= p).astype(int)

        # Fill X
        _X = generate_X(games_results, windows=windows, alphas=alphas).tolist()
        for i in range(len(X)):
            X[i] += _X[i]

    X = np.array(X).T
    y = np.array(y).reshape([-1, 1])
    return X, y

## Models

I want to try 3 different models: Linear Regression, Support Vector Regression and Neural Networks.

In [None]:
def fit_and_predict(model, flatten=False):
    X, y = data_generator(100, 10, 500)
    if flatten: y = y.flatten()

    reg = model.fit(X, y)

    X, y = data_generator(100, 5, 3000)
    if flatten: y = y.flatten()
    prediction = reg.predict(X)
    np.clip(prediction, 0, 1)
    diff = abs(y - prediction).flatten()

    return diff

def plot_histogram(diff):
    quantiles = np.quantile(diff, [0.1, 0.5, 0.9])

    print(f'Median: {quantiles[1]}\n80%CI: {quantiles[2] - quantiles[0]}')

    fig = go.Figure(layout=layout_dict)

    fig.add_trace(go.Histogram(x=diff))

    fig.show()

def plot_quantiles(diff, depth: int=3000):
    diff_2d = diff.reshape([-1, depth]).T
    quantiles_2d = np.quantile(diff_2d, [0.1, 0.5, 0.9], axis=1)

    x_range = list(range(depth)) + list(reversed(range(depth)))

    fig = go.Figure(layout=layout_dict)

    fig.add_trace(go.Scatter(y=quantiles_2d[1], name='Median'))
    fig.add_trace(go.Scatter(
            x = x_range, y=list(quantiles_2d[2]) + list(quantiles_2d[0][::-1]), fill='toself', mode='none',
            hoveron='points', fillcolor='lightblue', name=f'80% CI', opacity=0.5
        ))

    fig.show()

## Step median

In [None]:
X, y = data_generator(100, 5, 3000, windows=[], alphas=[0.9])

y = y.flatten()
prediction = X.T[2]
np.clip(prediction, 0, 1)
diff = abs(y - prediction).flatten()

plot_histogram(diff)

In [None]:
plot_quantiles(diff)

### Linear Regression

In [None]:
diff = fit_and_predict(LinearRegression())

plot_histogram(diff)

In [None]:
plot_quantiles(diff)

## Support Vector Regression

In [None]:
diff = fit_and_predict(make_pipeline(StandardScaler(), SVR(max_iter=1000, epsilon=0.01, C=1)), flatten=True)

plot_histogram(diff)

In [None]:
plot_quantiles(diff)

## Neural Network

In [None]:
layers = (4, 4, 2)
diff = fit_and_predict(MLPRegressor(batch_size=1000, max_iter=10000, hidden_layer_sizes=layers), flatten=True)

plot_histogram(diff)

In [None]:
plot_quantiles(diff)