In [7]:
%matplotlib inline
import io
import requests
import pymc3 as pm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

%config InlineBackend.figure_formats = ['retina']
plt.rc('font', size=12)
plt.style.use('seaborn-darkgrid')


print('pycm3:', pm.__version__)

pycm3: 3.7


In [11]:
# 1. load the stock returns data.

url='https://raw.githubusercontent.com/martin-fabbri/colab-notebooks/master/pymc3/stock_returns.csv'
s=requests.get(url).content

series = pd.read_csv(io.StringIO(s.decode('utf-8')))
returns = series.values[:1000]
returns[:3]

array([[ 0.        ],
       [ 0.        ],
       [-0.02702703]])

In [0]:
series.info()

In [0]:
plt.hist(returns, ec='white');

In [0]:
# 2. first, let's see if it makes sense to fit a Gaussian distribution to this.
with pm.Model() as model1:

    stdev = pm.HalfNormal('stdev', sd=.1)
    mu = pm.Normal('mu', mu=0.0, sd=1.)

    pm.Normal('returns', mu=mu, sd=stdev, observed=returns)

In [0]:
with model1:
    trace = pm.sample(500, tune=1000)

In [0]:
preds = pm.sample_ppc(trace, samples=500, model=model1)
y = np.reshape(np.mean(preds['returns'], axis=0), [-1])

In [0]:
fig, (ax1, ax2) = plt.subplots(1, 2)

ax1.hist(y, ec='white')
ax1.set_title('Normal distribution returns')
ax2.hist(returns, ec='white')
ax2.set_title('Real returns')

plt.show()

In [0]:
# 3. now let's relax the normal distribution assumption: let's fit a Cauchy distribution.
with pm.Model() as model2:

    beta = pm.HalfNormal('beta', sd=10.)

    pm.Cauchy('returns', alpha=0.0, beta=beta, observed=returns)

    mean_field = pm.fit(n=150000, method='advi', obj_optimizer=pm.adam(learning_rate=.001))

In [0]:
with model2:
    trace2 = mean_field.sample(draws=10000)
    trace3 = pm.sample(5000, tune=10000, target_accept=0.99)

In [0]:
pm.traceplot(trace3);

In [0]:
pm.summary(trace3)

In [0]:
# change to trace3 if you want
preds2 = pm.sample_ppc(trace3, samples=10000, model=model2)
y2 = np.reshape(np.mean(preds2['returns'], axis=0), [-1])

In [0]:
fig, (ax1, ax2) = plt.subplots(1, 2)

ax1.hist(y2, ec='white')
ax1.set_title('Cauchy distribution returns')
ax2.hist(returns, ec='white')
ax2.set_title('Real returns')

plt.show()

In [0]:
model1.name = 'Gaussian model'
model2.name = 'Cauchy model'
df_LOO = pm.compare({model1:trace, model2:trace2}, ic='LOO')

In [0]:
print("LOO comparison table: ")
df_LOO
