In [1]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
import pymc3 as pm
from theano import shared

# データの読み込み
df = pd.read_csv("data/data-salary.txt")
x = shared(df['X'].values)

# モデルの定義とサンプリング
with pm.Model() as model:
    a = pm.Normal("a", mu=0, sd=100)
    b = pm.Normal("b", mu=0, sd=100)
    sigma = pm.HalfNormal("sigma", sd=100)
    y = pm.Deterministic("y", a + b * x)
    obs = pm.Normal("obs", mu=y, sd=sigma, observed=df['Y'])

    trace = pm.sample(3000, njobs=4)

# 収束診断
trace = trace[100:]
pm.traceplot(trace[100:], plot_transformed=True)

pm.gelman_rubin(trace)
pm.forestplot(trace)

# 予測分布の描画
x.set_value(np.arange(23, 61))
ppc = pm.sample_ppc(trace, model=model, samples=3000)
pred_95 = np.percentile(ppc['obs'], [2.75, 97.75], axis=0)
pred_50 = np.percentile(ppc['obs'], [25, 75], axis=0)
pred_eap = ppc['obs'].mean(axis=0)

fig = plt.figure(figsize=(6, 3))
ax = fig.add_subplot(1, 1, 1)
ax.scatter(df.X, df.Y, color="gray", alpha=0.7, s=24)
ax.plot(np.arange(23, 61), pred_eap, color="k", lw=1.5)
ax.fill_between(np.arange(23, 61), pred_95[0], pred_95[1],
                facecolor="steelblue", alpha=0.3, lw=0)
ax.fill_between(np.arange(23, 61), pred_50[0], pred_50[1],
                facecolor="lightseagreen", alpha=0.3, lw=0)
ax.set(title="年収の予測区間", xlabel="年齢", ylabel="年収")
plt.show()

AttributeError: module 'numpy.distutils.__config__' has no attribute 'blas_opt_info'