In [None]:
import numpy as np
import pickle
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from sklearn.linear_model import LinearRegression
from sklearn.neural_network import MLPRegressor

import utils

In [None]:
# Nice example plot
walks = utils.non_stationary_bounded_random_walk(size=1000, drift=0.001)

fig = make_subplots(specs=[[{"secondary_y": True}]])

fig.add_trace(go.Scatter(y=walks, opacity=0.5, name='Random Walk'), secondary_y=False)

fig.update_layout(**utils.layout_dict, title='Non Stationary Random Walk')
fig.update_yaxes(range=[-0.2, 1.2], secondary_y=False)
fig.update_yaxes(range=[-0.01, 0.06], secondary_y=True)
fig.show()

# Approximating Standard Deviation

In [None]:
def data_generator(n_par: int, n_runs: int, depth: int, windows: list = [5, 10, 20, 50, 100], alphas: list = [0.85, 0.9, 0.95, 0.975, 0.99]):
    # Controlling variables
    N = [depth, n_runs]
    params = zip(np.random.rand(n_par), np.random.rand(n_par) * 0.001, np.random.rand(n_par) * 0.1)
    params = [
        {
            'initial': item[0],
            'drift': item[1],
            'std': item[2]
        } for item in params
    ]

    y = []
    X = np.empty([2 + 2 * len(windows) + 2 * len(alphas), 0]).tolist()
    for param in params:
        # Generate Y
        p = utils.non_stationary_bounded_random_walk(**param, size=N)
        y += p.T.flatten().tolist()

        # Calculate games results
        games_results = (np.random.random(N) <= p).astype(int)

        # Fill X
        _X = utils.generate_X(games_results, windows=windows, alphas=alphas).tolist()
        for i in range(len(X)):
            X[i] += _X[i]

    X = np.array(X).T
    y = np.array(y).reshape([-1, 1])
    return X, y

## Models

I want to try 3 different models: Linear Regression, Support Vector Regression and Neural Networks.

In [None]:
def fit_and_predict(model, flatten=False, save_model=False, model_name='model.pkl'):
    X, y = data_generator(100, 10, 500)
    if flatten: y = y.flatten()

    reg = model.fit(X, y)

    if save_model:
        pickle.dump(reg, open(model_name, 'wb'))

    X, y = data_generator(100, 5, 3000)
    if flatten: y = y.flatten()

    prediction = reg.predict(X)
    np.clip(prediction, 0, 1)
    diff = abs(y - prediction).flatten()

    return diff, X, y, prediction

def plot_histogram(diff):
    quantiles = np.quantile(diff, [0.1, 0.5, 0.9])

    print(f'Median: {quantiles[1]}\n80%CI: {quantiles[2] - quantiles[0]}')

    fig = go.Figure(layout=utils.layout_dict)

    fig.add_trace(go.Histogram(x=diff))

    fig.show()

def plot_quantiles(diff, depth: int=3000):
    diff_2d = diff.reshape([-1, depth]).T
    quantiles_2d = np.quantile(diff_2d, [0.1, 0.5, 0.9], axis=1)

    x_range = list(range(depth)) + list(reversed(range(depth)))

    fig = go.Figure(layout=utils.layout_dict)

    fig.add_trace(go.Scatter(y=quantiles_2d[1], name='Median'))
    fig.add_trace(go.Scatter(
            x = x_range, y=list(quantiles_2d[2]) + list(quantiles_2d[0][::-1]), fill='toself', mode='none',
            hoveron='points', fillcolor='lightblue', name=f'80% CI', opacity=0.5
        ))

    fig.show()

## Step median (baseline)

In [None]:
X, y = data_generator(100, 5, 3000, windows=[], alphas=[0.9])

y = y.flatten()
prediction = X.T[2]
np.clip(prediction, 0, 1)
diff = abs(y - prediction).flatten()

plot_histogram(diff)

In [None]:
plot_quantiles(diff)

### Linear Regression

In [None]:
diff, X, y, prediction = fit_and_predict(LinearRegression(), save_model=True, model_name='LR.pkl')

plot_histogram(diff)

In [None]:
plot_quantiles(diff)

In [None]:
y = y.reshape([-1, 3000])
prediction = prediction.reshape([-1, 3000])

fig = go.Figure(layout=utils.layout_dict)

i = 3
fig.add_trace(go.Scatter(y=y[i], name='Original Probabilities'))
fig.add_trace(go.Scatter(y=prediction[i], name='Predicted Probabilities'))

fig.show()


## Neural Network

In [None]:
layers = (4, 4, 2)
diff, X, y, prediction = fit_and_predict(MLPRegressor(batch_size=1000, max_iter=10000, hidden_layer_sizes=layers), flatten=True)

plot_histogram(diff)

In [None]:
plot_quantiles(diff)

In [None]:
y = y.reshape([-1, 3000])
prediction = prediction.reshape([-1, 3000])

fig = go.Figure(layout=utils.layout_dict)

i = 3
fig.add_trace(go.Scatter(y=y[i], name='Original Probabilities'))
fig.add_trace(go.Scatter(y=prediction[i], name='Predicted Probabilities'))

fig.show()