In [None]:
%matplotlib inline
import pymc3 as pm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

%config InlineBackend.figure_formats = ['retina']
plt.rc('font', size=12)
plt.style.use('seaborn-darkgrid')

In [None]:
# 1. load the stock returns data.
series = pd.read_csv('stock_returns.csv')
returns = series.values[:1000]

In [None]:
series.info()

In [None]:
plt.hist(returns, ec='white');

In [None]:
# 2. first, let's see if it makes sense to fit a Gaussian distribution to this.
with pm.Model() as model1:

    stdev = pm.HalfNormal('stdev', sd=.1)
    mu = pm.Normal('mu', mu=0.0, sd=1.)

    pm.Normal('returns', mu=mu, sd=stdev, observed=returns)

In [None]:
with model1:
    trace = pm.sample(500, tune=1000)

In [None]:
preds = pm.sample_ppc(trace, samples=500, model=model1)
y = np.reshape(np.mean(preds['returns'], axis=0), [-1])

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2)

ax1.hist(y, ec='white')
ax1.set_title('Normal distribution returns')
ax2.hist(returns, ec='white')
ax2.set_title('Real returns')

plt.show()

In [None]:
# 3. now let's relax the normal distribution assumption: let's fit a Cauchy distribution.
with pm.Model() as model2:

    beta = pm.HalfNormal('beta', sd=10.)

    pm.Cauchy('returns', alpha=0.0, beta=beta, observed=returns)

    mean_field = pm.fit(n=150000, method='advi', obj_optimizer=pm.adam(learning_rate=.001))

In [None]:
with model2:
    trace2 = mean_field.sample(draws=10000)
    trace3 = pm.sample(5000, tune=10000, target_accept=0.99)

In [None]:
pm.traceplot(trace3);

In [None]:
pm.summary(trace3)

In [None]:
# change to trace3 if you want
preds2 = pm.sample_ppc(trace3, samples=10000, model=model2)
y2 = np.reshape(np.mean(preds2['returns'], axis=0), [-1])

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2)

ax1.hist(y2, ec='white')
ax1.set_title('Cauchy distribution returns')
ax2.hist(returns, ec='white')
ax2.set_title('Real returns')

plt.show()

In [None]:
model1.name = 'Gaussian model'
model2.name = 'Cauchy model'
df_LOO = pm.compare({model1:trace, model2:trace2}, ic='LOO')

In [None]:
print("LOO comparison table: ")
df_LOO
