In [1]:
import numpy as np
import pandas as pd
from numpy import linalg as nplin
import volpriceinference as vl
import matplotlib as mpl
import seaborn as sns
import logging
import tqdm
from multiprocessing import Pool

In [2]:
%matplotlib inline
mpl.style.use('seaborn-talk')

In [3]:
logging.basicConfig(filename='convergence.tmp.log', level=logging.INFO)
logging.captureWarnings(True)

## Setup the True Parameters /  Simulation Parameters

## Setup the linking functions.

In [None]:
total_true_params = true_params.copy()
total_true_params.update(
    {'gamma': vl.gamma(rho=true_params['rho'], scale=true_params['scale'], delta=true_params['delta'], 
                       phi=true_params['phi'], pi=true_params['vol_price'], theta=true_params['equity_price']),
     'beta':vl.beta(rho=true_params['rho'], scale=true_params['scale'], phi=true_params['phi'], 
                    pi=true_params['vol_price'], theta=true_params['equity_price']),
     'psi':vl.psi(true_params['rho'], true_params['scale'], true_params['phi'], 
                  theta=true_params['equity_price']), 
     'phi_squared': true_params['phi']**2})

## We start by examining the distributions of simulated data

In [None]:
data = vl.simulate_data(time_dim=time_dim, **true_params)

In [None]:
fig1, ax1 = mpl.pyplot.subplots()
ax1.plot(data.rtn, label='Return')
ax1.plot(data.vol, label='Volatility')
ax1.legend()


In [None]:
sns.jointplot(x='vol', y='rtn', data=data, kind='reg')

In [None]:
data.describe()

## I now estimate all of the parameters

In [4]:
true_params = {'equity_price': 0.50, 
               'vol_price': -7,
               'phi':-.17,
               'scale': .1,
               'delta': 1,
               'rho': 0,
              }
time_dim = 3000
sim_dim = 1000

In [5]:
results = np.arange(sim_dim)

In [8]:
data = vl.simulate_data(time_dim=time_dim, **true_params)


In [9]:
vol_data = vl.simulate_autoregressive_gamma(delta=1, rho=0, scale=1,time_dim=time_dim)


In [10]:
def get_data_and_results(x):
    data = vl.simulate_data(time_dim=time_dim, **true_params)
    init_constants = vl.compute_init_constants(data.vol)
    vol_est = vl.compute_vol_gmm(data.vol, init_constants=init_constants)
    return data, vol_est

In [11]:
with Pool(8) as pool:
    data_arr, est_arr = zip(*[result  for result in tqdm.tqdm_notebook(
        pool.imap_unordered(get_data_and_results, results), total=len(results))])

HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

Process ForkPoolWorker-4:
Process ForkPoolWorker-8:
Process ForkPoolWorker-7:
Process ForkPoolWorker-1:
Process ForkPoolWorker-2:
Process ForkPoolWorker-3:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Process ForkPoolWorker-6:
Process ForkPoolWorker-5:
  File "/home/sangrey/anaconda3/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/sangrey/anaconda3/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/sangrey/anaconda3/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/sangrey/anaconda3/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/sangrey/anaconda3/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/sangrey/an

  File "/home/sangrey/anaconda3/lib/python3.6/site-packages/scipy/stats/_distn_infrastructure.py", line 2809, in rvs
    return super(rv_discrete, self).rvs(*args, **kwargs)
  File "/home/sangrey/anaconda3/lib/python3.6/site-packages/scipy/stats/_distn_infrastructure.py", line 937, in rvs
    args, loc, scale, size = self._parse_args_rvs(*args, **kwds)
  File "/home/sangrey/anaconda3/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
KeyboardInterrupt
  File "<string>", line 6, in _parse_args_rvs
  File "/home/sangrey/anaconda3/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/sangrey/anaconda3/lib/python3.6/multiprocessing/pool.py", line 119, in worker
    result = (True, func(*args, **kwds))
  File "/home/sangrey/anaconda3/lib/python3.6/site-packages/scipy/stats/_distn_infrastructure.py", line 849, in _argcheck_rvs
    for (bcdim, szdim) in zip(bcast_shape, size_)])
  File "<ipython-in

KeyboardInterrupt: 

In [None]:
vl.simulate_data(time_dim=time_dim, **true_params).head()

In [None]:
data_arr[1].head()

In [None]:
data_arr[0].head()

In [None]:
true_params['rho']

In [None]:
np.mean((data.vol - true_params['delta'] * true_params['scale']) * data.vol)

In [None]:
np.mean((data.vol[1:]**2 - true_params['delta'] * true_params['scale']**2 
         - true_params['delta']**2 * true_params['scale']**2))

In [None]:
vl.vol_moments(data.vol, delta=true_params['delta'], rho=0, scale=true_params['scale']).mean()

In [None]:
def t_stats(x):
    mean = x.mean()
    cov = x.cov()
    
    inv_cov = nplin.pinv(cov)
    
    return mean   / (np.diag(inv_cov)**(-.5) / np.sqrt(data.vol.size))
    
    

In [None]:
t_stats(vl.vol_moments(data.vol, true_params['delta'], true_params['rho'], true_params['scale']))

In [None]:
data.vol.std()

In [None]:
data_arr[1] - data_arr[0]

In [None]:
data_arr[0].head()

In [None]:
[(data.vol.mean() - .1) / np.sqrt(data.vol.var() / data.vol.size) for data in data_arr]

In [None]:
fig1, ax1 =  mpl.pyplot.subplots()
sns.distplot([(data.vol.mean() - .1) / np.sqrt(data.vol.var() / data.vol.size)
              for data in data_arr], ax=ax1)
ax1.plot(np.linspace(-5,5,100), stats.norm.pdf(np.linspace(-5,5,100)), color='black')


In [None]:
vol_moments = pd.DataFrame([t_stats(vl.vol_moments(data.vol, true_params['delta'], true_params['rho'],
                                                   true_params['scale'])) for data in data_arr])

In [None]:
delta_fig, delta_ax = mpl.pyplot.subplots()
sns.distplot(vol_moments.iloc[:,0], ax=delta_ax)
delta_ax.plot(np.linspace(-5,5,100), stats.norm.pdf(np.linspace(-5,5,100)), color='black')
delta_ax.set_xlabel('delta')

In [None]:
sns.distplot(vol_moments.iloc[:,0], fit=stats.norm)

In [None]:
vol_moments[0]

In [None]:
delta_fig, delta_ax = mpl.pyplot.subplots()
sns.distplot([(est['delta'] - true_params['delta']) / np.sqrt(cov.loc['delta','delta']) 
              for est, cov in est_arr], ax=delta_ax)
delta_ax.plot(np.linspace(-5,5,100), stats.norm.pdf(np.linspace(-5,5,100)), color='black')
delta_ax.set_xlabel('delta')

In [None]:
scale_fig, scale_ax = mpl.pyplot.subplots()
sns.distplot([(est['scale'] - true_params['scale']) / np.sqrt(cov.loc['scale', 'scale']) 
              for est, cov in est_arr], ax=scale_ax)
scale_ax.plot(np.linspace(-5,5,100), stats.norm.pdf(np.linspace(-5,5,100)), color='black')
scale_ax.set_xlabel('scale')

In [None]:
rho_fig, rho_ax = mpl.pyplot.subplots()
sns.distplot([(est['rho'] - true_params['rho']) / np.sqrt(cov.loc['rho', 'rho']) 
              for est, cov in est_arr], ax=rho_ax)
rho_ax.plot(np.linspace(-5,5,100), stats.norm.pdf(np.linspace(-5,5,100)), color='black')
rho_ax.set_xlabel('rho')

In [None]:
vol_tables = [vl.create_est_table(estimates=est, truth=total_true_params, cov=cov)
                 for est,cov in vol_est_arr]

In [None]:
pd.DataFrame([frame.truth - frame.estimate for frame in vol_tables]).mean(axis=0)

In [None]:
stage2_estimates, stage2_cov = vl.estimate_params(data)


In [None]:
vl.create_est_table(estimates=stage2_estimates, truth=total_true_params, cov=stage2_cov)

In [None]:
vl.cov_to_corr(stage2_cov)