In [1]:
import numpy as np
import pandas as pd
from numpy import linalg as nplin
import volpriceinference as vl
import matplotlib as mpl
import seaborn as sns
import logging
import tqdm
from multiprocessing import Pool

In [83]:
%matplotlib inline
mpl.style.use('seaborn-talk')
np.set_printoptions(precision=3)

In [3]:
logging.basicConfig(filename='convergence.tmp.log', level=logging.INFO)
logging.captureWarnings(True)

## Setup the True Parameters /  Simulation Parameters

## Setup the linking functions.

In [4]:
total_true_params = true_params.copy()
total_true_params.update(
    {'gamma': vl.gamma(rho=true_params['rho'], scale=true_params['scale'], delta=true_params['delta'], 
                       phi=true_params['phi'], pi=true_params['vol_price'], theta=true_params['equity_price']),
     'beta':vl.beta(rho=true_params['rho'], scale=true_params['scale'], phi=true_params['phi'], 
                    pi=true_params['vol_price'], theta=true_params['equity_price']),
     'psi':vl.psi(true_params['rho'], true_params['scale'], true_params['phi'], 
                  theta=true_params['equity_price']), 
     'phi_squared': true_params['phi']**2})

NameError: name 'true_params' is not defined

## We start by examining the distributions of simulated data

In [None]:
data = vl.simulate_data(time_dim=time_dim, **true_params)

In [None]:
fig1, ax1 = mpl.pyplot.subplots()
ax1.plot(data.rtn, label='Return')
ax1.plot(data.vol, label='Volatility')
ax1.legend()


In [None]:
sns.jointplot(x='vol', y='rtn', data=data, kind='reg')

In [None]:
data.describe()

## I now estimate all of the parameters

In [5]:
true_params = {'equity_price': 0.50, 
               'vol_price': -7,
               'phi':-.17,
               'scale': .1,
               'delta': 1,
               'rho': 0,
              }
time_dim = 10000
sim_dim = 200

In [6]:
results = np.arange(sim_dim)

In [7]:
data = vl.simulate_data(time_dim=time_dim, **true_params)


In [8]:
vol_data = vl.simulate_autoregressive_gamma(delta=1, rho=0, scale=1,time_dim=time_dim)


In [9]:
def get_data_and_results(x):
    data = vl.simulate_data(time_dim=time_dim, **true_params)
    init_constants = vl.compute_init_constants(data.vol)
    vol_est = vl.compute_vol_gmm(data.vol, init_constants=init_constants)
    return data, vol_est

In [10]:
# with Pool(8) as pool:
data_arr, est_arr = zip(*[result  for result in tqdm.tqdm_notebook(
    map(get_data_and_results, results), total=len(results))])

HBox(children=(IntProgress(value=0, max=200), HTML(value='')))




In [11]:
def t_stats(x):
    mean = x.mean()
    cov = x.cov()
        
    return mean   / np.sqrt(np.diag(cov) / data.vol.size)

In [12]:
moment_data = pd.DataFrame([t_stats(vl.vol_moments(data.vol, delta=true_params['delta'], rho=true_params['rho'], 
                scale=true_params['scale'])) for data in tqdm.tqdm_notebook(data_arr)])

HBox(children=(IntProgress(value=0, max=200), HTML(value='')))




In [13]:
est_arr[0][1]

Unnamed: 0,delta,rho,scale
delta,0.000122,0.000101,0.00102
rho,0.000101,0.000206,0.001028
scale,0.00102,0.001028,0.010632


In [15]:
from scipy import linalg as scilin

In [16]:
est_arr[0][1]

Unnamed: 0,delta,rho,scale
delta,0.000122,0.000101,0.00102
rho,0.000101,0.000206,0.001028
scale,0.00102,0.001028,0.010632


In [36]:
G = vl.vol_moments_grad(data_arr[40].vol, delta=true_params['delta'], rho=true_params['rho'], 
                scale=true_params['scale'])

In [94]:
G.T @ G

Unnamed: 0,delta,rho,scale
delta,0.011013,0.011619,0.113161
rho,0.011619,0.01249,0.120346
scale,0.113161,0.120346,1.172044


In [93]:
G.T @ thing.cov() @ G

Unnamed: 0,delta,rho,scale
delta,0.000138,0.000147,0.001432
rho,0.000147,0.000157,0.001528
scale,0.001432,0.001528,0.014879


In [50]:
mean = data_arr[40].vol.mean()

In [53]:
data_arr[40].vol

2000-01-01    0.059948
2000-01-02    0.076651
2000-01-03    0.070169
2000-01-04    0.363460
2000-01-05    0.079381
2000-01-06    0.095994
2000-01-07    0.070769
2000-01-08    0.019691
2000-01-09    0.086584
2000-01-10    0.047018
2000-01-11    0.144535
2000-01-12    0.035183
2000-01-13    0.150649
2000-01-14    0.075087
2000-01-15    0.031039
2000-01-16    0.016946
2000-01-17    0.075618
2000-01-18    0.014752
2000-01-19    0.146969
2000-01-20    0.027518
2000-01-21    0.013961
2000-01-22    0.113601
2000-01-23    0.086447
2000-01-24    0.144424
2000-01-25    0.011112
2000-01-26    0.010471
2000-01-27    0.019567
2000-01-28    0.173040
2000-01-29    0.021515
2000-01-30    0.034526
                ...   
2027-04-19    0.161265
2027-04-20    0.027642
2027-04-21    0.130014
2027-04-22    0.114610
2027-04-23    0.046414
2027-04-24    0.043016
2027-04-25    0.004082
2027-04-26    0.105323
2027-04-27    0.187715
2027-04-28    0.078656
2027-04-29    0.023509
2027-04-30    0.059556
2027-05-01 

In [66]:
thing = vl.vol_moments(data_arr[40].vol, delta=true_params['delta'], rho=true_params['rho'], 
                       scale=true_params['scale'])

In [67]:
G

Unnamed: 0,delta,rho,scale
0,-0.1,-0.101601,-1.0
1,-0.01016,-0.020931,-0.101601
2,-0.03,-0.04064,-0.4
3,-0.003048,-0.008372,-0.04064
4,-0.000628,-0.002642,-0.008372


Unnamed: 0,delta,rho,scale
delta,11477.292442,27.888497,-1111.001184
rho,27.888497,7554.388223,-778.380759
scale,-1111.001184,-778.380759,188.045358


In [98]:
GprimeG = pd.DataFrame(scilin.inv(G.T @ G), index=G.columns, columns=G.columns)

In [99]:
GprimeG

Unnamed: 0,delta,rho,scale
delta,11477.292442,27.888497,-1111.001184
rho,27.888497,7554.388223,-778.380759
scale,-1111.001184,-778.380759,188.045358


In [69]:
pd.DataFrame(scilin.inv(thing.cov()))

Unnamed: 0,0,1,2,3,4
0,957.76134,-5026.146665,-1804.926182,9252.863789,1841.997827
1,-5026.146665,50166.372575,9428.331701,-90076.049753,-25972.269375
2,-1804.926182,9428.331701,4715.098091,-29988.499957,15639.549822
3,9252.863789,-90076.049753,-29988.499957,355523.231288,-304448.328241
4,1841.997827,-25972.269375,15639.549822,-304448.328241,849884.249474


In [103]:
np.linalg.inv(G.T @ np.linalg.inv(thing.cov()) @ G)

array([[ 4.993,  0.03 , -0.525],
       [ 0.03 ,  0.953, -0.1  ],
       [-0.525, -0.1  ,  0.076]])

In [101]:
GprimeG @ G.T @ thing.cov() @ G @ GprimeG

Unnamed: 0,delta,rho,scale
delta,5.01492,0.042765,-0.529402
rho,0.042765,0.965328,-0.102876
scale,-0.529402,-0.102876,0.076412


In [None]:
moment_data.mean()

In [None]:
moment_fig, moment_ax = mpl.pyplot.subplots()
sns.distplot(moment_data.iloc[:,0], ax=moment_ax)
moment_ax.plot(np.linspace(-5,5,100), stats.norm.pdf(np.linspace(-5,5,100)), color='black')

In [None]:
vl.vol_moments_grad(data_arr[0].vol, true_params['delta'], true_params['rho'], true_params['scale'])

In [None]:
delta_fig, delta_ax = mpl.pyplot.subplots()
sns.distplot([(est['delta'] - true_params['delta']) / np.sqrt(cov.loc['delta','delta']) 
              for est, cov in est_arr], ax=delta_ax)
delta_ax.plot(np.linspace(-5,5,100), stats.norm.pdf(np.linspace(-5,5,100)), color='black')
delta_ax.set_xlabel('delta')

In [None]:
scale_fig, scale_ax = mpl.pyplot.subplots()
sns.distplot([(est['scale'] - true_params['scale']) / np.sqrt(cov.loc['scale', 'scale']) 
              for est, cov in est_arr], ax=scale_ax)
scale_ax.plot(np.linspace(-5,5,100), stats.norm.pdf(np.linspace(-5,5,100)), color='black')
scale_ax.set_xlabel('scale')

In [None]:
rho_fig, rho_ax = mpl.pyplot.subplots()
sns.distplot([(est['rho'] - true_params['rho']) / np.sqrt(cov.loc['rho', 'rho']) 
              for est, cov in est_arr], ax=rho_ax)
rho_ax.plot(np.linspace(-5,5,100), stats.norm.pdf(np.linspace(-5,5,100)), color='black')
rho_ax.set_xlabel('rho')

In [None]:
vol_tables = [vl.create_est_table(estimates=est, truth=total_true_params, cov=cov)
                 for est,cov in vol_est_arr]

In [None]:
pd.DataFrame([frame.truth - frame.estimate for frame in vol_tables]).mean(axis=0)

In [None]:
stage2_estimates, stage2_cov = vl.estimate_params(data)


In [None]:
vl.create_est_table(estimates=stage2_estimates, truth=total_true_params, cov=stage2_cov)

In [None]:
vl.cov_to_corr(stage2_cov)