In [None]:
import numpy as np
import pandas as pd
from scipy.misc import central_diff_weights
from sklearn.gaussian_process import GaussianProcessRegressor

from sklearn.gaussian_process.kernels import RBF, ConstantKernel, Matern

In [None]:
def get_grad_mat(local_pts, total_pts, n_grad, epsilon=1):
    w = central_diff_weights(local_pts, n_grad)
    w_total = np.zeros((total_pts, local_pts * total_pts))
    for ii in range(total_pts):
        start = ii * local_pts
        w_total[ii, start:start + local_pts] = w
    w_total = w_total / (epsilon ** n_grad)
    return w_total


def get_local_points(x, local_pts, epsilon):
    assert local_pts % 2 == 1
    #
    pad = epsilon * np.array(range(-(local_pts // 2), (local_pts // 2) + 1))
    assert(len(pad) == local_pts)
    #
    x_total = np.zeros((local_pts * len(x),))
    for ii, xx in enumerate(x):
        start = ii * local_pts
        x_total[start:start + local_pts] = xx + pad
    #
    assert list(x_total) == sorted(x_total)
    return x_total

In [None]:
def get_grad_moments(gp, x, local_pts, epsilon, n_grad, prepend=[]):
    total_pts, = x.shape

    x_padded = get_local_points(x, local_pts, epsilon)

    X_test = np.tile(prepend + [0], (len(x_padded), 1))
    X_test[:, -1] = x_padded

    mu, cov = gp.predict(X_test, return_cov=True)

    W = get_grad_mat(local_pts, total_pts, n_grad, epsilon)
    mu_grad = np.dot(W, mu)
    cov_grad = np.dot(np.dot(W, cov), W.T)

    return mu_grad, cov_grad, W

In [None]:
def get_price(calls_df, tte, price_grid, local_pts=5, epsilon=1e-4, n_samples=100):
    X = calls_df[['TTE', 'Strike']].values
    y = calls_df['Mid'].values
    #
    missing = np.isnan(y)
    X = X[~missing, :]
    y = y[~missing]
    #
    gp = GaussianProcessRegressor(normalize_y=True)
    gp.fit(X, y)
    #
    mu_grad, cov_grad, _ = get_grad_moments(gp, price_grid, local_pts, epsilon, n_grad=2, prepend=[tte])
    #
    X = np.random.multivariate_normal(mu_grad, cov_grad, size=n_samples)
    X = np.maximum(X, 0)
    X = X / np.sum(X, axis=-1, keepdims=True)  # normalize
    return X, mu_grad, cov_grad

In [None]:
now = pd.datetime(2019, 1, 30)
target_date = pd.datetime(2020, 1, 17)

df_20191101 = pd.read_csv('20191101.tsv', delimiter='\t', header=0, index_col=False, na_values=['-'], comment='#')
df_20191101['expiry'] = pd.datetime(2019, 11, 1)

df_20200117 = pd.read_csv('20200117.tsv', delimiter='\t', header=0, index_col=False, na_values=['-'], comment='#')
df_20200117['expiry'] = pd.datetime(2020, 1, 17)

df_20210115 = pd.read_csv('20210115.tsv', delimiter='\t', header=0, index_col=False, na_values=['-'], comment='#')
df_20210115['expiry'] = pd.datetime(2021, 1, 15)

In [None]:
calls_df = pd.concat([df_20191101, df_20200117, df_20210115], axis=0)

calls_df['TTE'] = (calls_df['expiry'] - now).apply(lambda x: int(x.days))
calls_df['Mid'] = 0.5 * (calls_df['Bid'] + calls_df['Ask'])

In [None]:
tte = int((target_date - now).days)
price_grid = np.linspace(20, 60, 100)

In [None]:
X, mu_grad, cov_grad = get_price(calls_df, tte, price_grid, epsilon=1e-1)

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.plot(df_20200117['Strike'].values, df_20200117['Ask'].values, '.-')
plt.plot(df_20200117['Strike'].values, df_20200117['Bid'].values, '.-')

In [None]:
X = df_20200117['Strike'].values
y = 0.5 * (df_20200117['Ask'].values + df_20200117['Bid'].values)
spread = (df_20200117['Ask'].values - df_20200117['Bid'].values)
alpha_var = (spread**2) / 12  # moment match to uniform

price_grid = np.linspace(20, 60, 100)

In [None]:
base_kernel = Matern(
    nu=5.0 / 2.0,
    length_scale=10,
    length_scale_bounds=(1, 1000),
)

k1 = ConstantKernel(
    constant_value=1.0, constant_value_bounds=(0.01, 10000.0)
)

kernel = k1 * base_kernel

In [None]:
gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=10, normalize_y=False, alpha=alpha_var)
gp.fit(X[:, None], y)
mu, cov = gp.predict(price_grid[:, None], return_cov=True)
gp.kernel_.get_params()

In [None]:
S = np.random.multivariate_normal(mu, cov, size=20)

In [None]:
plt.plot(X, y, '.-')
plt.plot(price_grid, mu, '-')
plt.plot(price_grid, S.T, '-')

In [None]:
plt.plot(price_grid, S[0,:], '-')

In [None]:
plt.plot(price_grid, S[1,:], '-')

In [None]:
dx = 0.1
x_padded = get_local_points(price_grid, 3, dx)

In [None]:
mu, cov = gp.predict(x_padded[:, None], return_cov=True)

In [None]:
plt.plot(x_padded, mu)

In [None]:
W = get_grad_mat(3, len(price_grid), 1, epsilon=dx)
mu_grad = np.dot(W, mu)
cov_grad = np.dot(np.dot(W, cov), W.T)

In [None]:
plt.plot(price_grid, mu_grad)

In [None]:
W = get_grad_mat(3, len(price_grid), 2, epsilon=dx)
mu_grad = np.dot(W, mu)
cov_grad = np.dot(np.dot(W, cov), W.T)

In [None]:
plt.plot(price_grid, mu_grad)

In [None]:
S = np.random.multivariate_normal(mu_grad, cov_grad, size=20)

In [None]:
plt.plot(price_grid, S[0,:], '-')

In [None]:
plt.plot(price_grid, S[3,:], '-')

In [None]:
pp = np.maximum(mu_grad, 0)
pp = pp / np.sum(pp)
mean_price = np.sum(price_grid*pp)

In [None]:
plt.figure(figsize=(5,3), dpi=300)
plt.plot(price_grid, pp)
plt.plot([mean_price, mean_price], [0, max(pp)], 'r--')
plt.xlabel('price (USD)')
plt.ylabel('PDF')
plt.title('Jan 15 2021')
plt.grid()

In [None]:
np.sum(price_grid*pp)

In [None]:
plt.figure(figsize=(5,3), dpi=300)
plt.plot(price_grid, np.cumsum(pp))
plt.grid()