
Lab: `pyro`簡介
================
在 `colab` 上，請先使用以下指定安裝 `pyro`

`!pip3 install pyro-ppl`

In [1]:
import torch
import pyro

## `pyro` 產生隨機資料

`pyro` 產生隨機資料的方式與 `torch.distribution` 很類似，但其主要透過 `pyro.sample` 進行抽樣，且每次的抽樣，都可以對該隨機變數設定一名稱。在以下的例子中，我們先後產生滿足邏輯斯迴歸架構之 `data_x` 與 `data_y`：

In [2]:
torch.manual_seed(246437)
n_sample = 10000
n_feature = 3
data_x = pyro.sample("data_x", pyro.distributions.MultivariateNormal(
            loc = torch.zeros(n_sample, n_feature),
            scale_tril = torch.eye(n_feature)))

In [3]:
mu_x = data_x.mean(axis = 0)
sigma_x = (data_x - data_x.mean(axis = 0)).T @ (data_x - data_x.mean(axis = 0)) / n_sample
print(mu_x)
print(sigma_x)


tensor([0.0063, 0.0103, 0.0074])
tensor([[ 1.0085,  0.0217,  0.0029],
        [ 0.0217,  0.9884, -0.0095],
        [ 0.0029, -0.0095,  0.9971]])


In [4]:
weight_true = torch.tensor([[10.], [5.], [-5.]])
intercept_true = torch.tensor(-5.)
data_y = pyro.sample("data_y", pyro.distributions.Bernoulli(
    logits = intercept_true + data_x @ weight_true))
data = {"y":data_y, "x":data_x}



## `pyro` 對邏輯斯回歸進行最大概似法估計

使用 `pyro` 進行最大概似法估計，最簡單的方法是透過 `pyro.infer.SVI` 此物件進行，該物件主要用於進行變分推論（variational inference），在使用 `pyro.infer.SVI` 時，使用者需設定一模型（`model`）之機率分佈，以及一指引（`guide`）之機率分佈，由於我們在這邊使用最大概似法，因此，將指引設為什麼都沒做的函數。

In [5]:
def model_lr(data):
    weight = pyro.param("weight", torch.zeros((3,1)))
    intercept = pyro.param("intercept", torch.zeros(()))
    logits = intercept + data["x"] @ weight
    y = pyro.sample("y", pyro.distributions.Bernoulli(logits = logits),
                    obs = data["y"])
    return y

def guide_lr(data):
    pass

接著，我們就可以使用 `pyro.infer.SVI` 來進行優化。

In [6]:
lr = 50. / n_sample
n_steps = 201

pyro.clear_param_store()
optimizer = pyro.optim.SGD({"lr": lr})
svi = pyro.infer.SVI(model_lr, guide_lr, optimizer,
                     loss=pyro.infer.Trace_ELBO())

for step in range(n_steps):
    loss = svi.step(data)
    if step % 50 == 0:
        print('[iter {}]  loss: {:.4f}'.format(step, loss))

[iter 0]  loss: 6931.4731
[iter 50]  loss: 984.7116
[iter 100]  loss: 969.8328
[iter 150]  loss: 967.9623
[iter 200]  loss: 967.8027


In [7]:
for name in pyro.get_param_store():
    print(name)

print(pyro.param("weight"))
print(pyro.param("intercept"))



weight
intercept
tensor([[10.1213],
        [ 5.0705],
        [-5.0297]], requires_grad=True)
tensor(-5.0715, requires_grad=True)


## `pyro` 對因素分析模型進行最大概似法估計
以下程式碼用於產生因素分析之資料

In [8]:
def create_fa_model(n_factor, n_item, ld, psi = None, rho = None):
    if (n_item % n_factor) != 0:
        n_item = n_factor * (n_item // n_factor)
    loading = torch.zeros((n_item, n_factor))
    item_per_factor = (n_item // n_factor)
    for i in range(n_factor):
        for j in range(i * item_per_factor,
                       (i + 1) * item_per_factor):
            loading[j, i] = ld
    if rho is None:
        cor = torch.eye(n_factor)
    else:
        unit = torch.ones((n_factor, 1))
        identity = torch.eye(n_factor)
        cor = rho * (unit @ unit.T) + (1 - rho) * identity
    if psi is None:
        uniqueness = 1 - torch.diagonal(loading @ cor @ loading.T)
    else:
        uniqueness = psi * torch.ones((n_item, ))
    return loading, uniqueness, cor

def generate_fa_data(n_sample, loading, uniqueness, cor):
    n_item = loading.size()[0]
    mean = torch.zeros((n_item, ))
    cov = loading @ cor @ loading.T + torch.diag_embed(uniqueness)
    mvn = torch.distributions.MultivariateNormal(
        loc = mean, scale_tril = torch.cholesky(cov))
    data = mvn.sample((n_sample,))
    return data

torch.manual_seed(246437)
n_factor = 4
n_item = 12
n_sample = 10000
loading_true, uniqueness_true, cor_true = create_fa_model(n_factor, n_item, ld = .7)
data = generate_fa_data(n_sample,
                        loading = loading_true,
                        uniqueness = uniqueness_true,
                        cor = cor_true)

接著，我們設定觀察變項之邊際分佈。這邊，我們在模型設定時，使用了 `pyro.plate` 來進行重複之設定。

In [9]:
def model_fa(data):
    loading = pyro.param("loading", 0.5 * loading_true)
    uniqueness = pyro.param("uniqueness", 0.5 * uniqueness_true)
    loading_mask = 1 *  (loading_true != 0)
    with pyro.plate("data", data.size(0)):
        pyro.sample("x", pyro.distributions.MultivariateNormal(
            loc = torch.zeros((loading.size()[0], )),
            scale_tril = torch.cholesky(
                (loading * loading_mask) @ (loading * loading_mask).T + torch.diag_embed(uniqueness))),
            obs=data)

def guide_fa(data):
    pass

In [10]:
lr = 1. / n_sample
n_steps = 201
pyro.clear_param_store()
optimizer = pyro.optim.SGD({"lr": lr})
svi = pyro.infer.SVI(model_fa, guide_fa, optimizer, loss=pyro.infer.Trace_ELBO())

for step in range(n_steps):
    loss = svi.step(data)
    if step % 50 == 0:
        print('[iter {}]  loss: {:.4f}'.format(step, loss))

[iter 0]  loss: 189637.8906
[iter 50]  loss: 156899.3438
[iter 100]  loss: 156943.4688
[iter 150]  loss: 156944.6719
[iter 200]  loss: 156944.6719


In [11]:
print(pyro.param("loading"))
print(pyro.param("uniqueness"))


tensor([[0.6992, 0.0000, 0.0000, 0.0000],
        [0.6954, 0.0000, 0.0000, 0.0000],
        [0.6993, 0.0000, 0.0000, 0.0000],
        [0.0000, 0.7133, 0.0000, 0.0000],
        [0.0000, 0.7040, 0.0000, 0.0000],
        [0.0000, 0.7090, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.7618, 0.0000],
        [0.0000, 0.0000, 0.6650, 0.0000],
        [0.0000, 0.0000, 0.6700, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.6963],
        [0.0000, 0.0000, 0.0000, 0.7116],
        [0.0000, 0.0000, 0.0000, 0.7105]], requires_grad=True)
tensor([0.5055, 0.5043, 0.5135, 0.5078, 0.5059, 0.5122, 0.5224, 0.5030, 0.5064,
        0.5262, 0.4993, 0.5094], requires_grad=True)


前述設定的模型中，我們在設定模型與指引時，皆直接將樣本資料視為函數的輸入，事實上，此給定的動作可以事後在使用 `pyro.poutine.condition` 進行。


In [12]:
def model_fa():
    loading = pyro.param("loading", 0.5 * loading_true)
    uniqueness = pyro.param("uniqueness", 0.5 * uniqueness_true)
    loading_mask = 1 *  (loading_true != 0)
    with pyro.plate("data", data.size(0)):
        pyro.sample("x", pyro.distributions.MultivariateNormal(
            loc = torch.zeros((loading.size()[0], )),
            scale_tril = torch.cholesky(
                (loading * loading_mask) @ (loading * loading_mask).T + torch.diag_embed(uniqueness))))

def guide_fa():
    pass


model_fa_cond = pyro.poutine.condition(model_fa, data={"x": data})
guide_fa_cond = pyro.poutine.condition(guide_fa, data={"x": data})

接著，我們可以把 `model_fa_cond` 與 `guide_fa_cond` 丟到 `pyro.infer.SVI` 進行優化。

In [13]:
lr = 1. / n_sample
n_steps = 201
pyro.clear_param_store()
optimizer = pyro.optim.SGD({"lr": lr})
svi = pyro.infer.SVI(model_fa_cond, guide_fa_cond, optimizer, loss=pyro.infer.Trace_ELBO())

for step in range(n_steps):
    loss = svi.step()
    if step % 50 == 0:
        print('[iter {}]  loss: {:.4f}'.format(step, loss))

[iter 0]  loss: 189637.8906
[iter 50]  loss: 156899.3438
[iter 100]  loss: 156943.4688
[iter 150]  loss: 156944.6719
[iter 200]  loss: 156944.6719


In [14]:
print(pyro.param("loading"))
print(pyro.param("uniqueness"))


tensor([[0.6992, 0.0000, 0.0000, 0.0000],
        [0.6954, 0.0000, 0.0000, 0.0000],
        [0.6993, 0.0000, 0.0000, 0.0000],
        [0.0000, 0.7133, 0.0000, 0.0000],
        [0.0000, 0.7040, 0.0000, 0.0000],
        [0.0000, 0.7090, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.7618, 0.0000],
        [0.0000, 0.0000, 0.6650, 0.0000],
        [0.0000, 0.0000, 0.6700, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.6963],
        [0.0000, 0.0000, 0.0000, 0.7116],
        [0.0000, 0.0000, 0.0000, 0.7105]], requires_grad=True)
tensor([0.5055, 0.5043, 0.5135, 0.5078, 0.5059, 0.5122, 0.5224, 0.5030, 0.5064,
        0.5262, 0.4993, 0.5094], requires_grad=True)


### 利用隨機 EM 進行最大概似法

In [15]:
def joint_model():
    loading = pyro.param("loading", 0.7 * loading_true)
    uniqueness = pyro.param("uniqueness", 0.7 * uniqueness_true)
    loading_mask = 1 *  (loading_true != 0)
    with pyro.plate("sample_plate", n_sample):
        eta = pyro.sample("eta", pyro.distributions.MultivariateNormal(
            loc = torch.zeros(n_factor),
            scale_tril = torch.eye(n_factor)))
        x = pyro.sample("x", pyro.distributions.MultivariateNormal(
                loc = eta @ (loading * loading_mask).T,
                scale_tril = torch.cholesky(torch.diag(uniqueness))))
    return x

def joint_guide():
    pass

In [16]:
lr = .1 / n_sample
n_steps = 51
pyro.clear_param_store()


for step in range(n_steps):
    model_cond_x = pyro.poutine.condition(joint_model, data = {"x": data})
    nuts_kernel = pyro.infer.NUTS(model_cond_x)
    mcmc = pyro.infer.MCMC(nuts_kernel, num_samples=1, warmup_steps = 20)
    mcmc.run()
    eta = mcmc.get_samples()["eta"].reshape((-1, 4))
    model_cond_x_eta = pyro.poutine.condition(joint_model,
                                              data = {"x": data,
                                            "eta":eta})
    guide_cond_x_eta = pyro.poutine.condition(joint_guide,
                                              data = {"x": data,
                                            "eta":eta})
    optimizer = pyro.optim.SGD({"lr": lr})
    svi = pyro.infer.SVI(model_cond_x_eta,
                         guide_cond_x_eta,
                         optimizer,
                         loss=pyro.infer.Trace_ELBO())
    loss = svi.step()
    if step % 5 == 0:
        print('[iter {}]  loss: {:.4f}'.format(step, loss))



Sample: 100%|██████████| 21/21 [00:05,  4.06it/s, step size=6.11e-01, acc. prob=0.182]
Sample: 100%|██████████| 21/21 [00:12,  1.63it/s, step size=1.16e+00, acc. prob=0.000]
Sample: 100%|██████████| 21/21 [00:05,  3.93it/s, step size=1.09e+00, acc. prob=0.000]
Sample: 100%|██████████| 21/21 [00:05,  3.50it/s, step size=5.61e-01, acc. prob=0.262]
Sample: 100%|██████████| 21/21 [00:05,  3.51it/s, step size=8.48e-01, acc. prob=0.286]
Sample: 100%|██████████| 21/21 [00:05,  3.90it/s, step size=7.55e-01, acc. prob=0.286]
Sample: 100%|██████████| 21/21 [00:07,  2.97it/s, step size=9.04e-01, acc. prob=0.000]
Sample: 100%|██████████| 21/21 [00:07,  2.94it/s, step size=6.45e-01, acc. prob=0.143]
Sample: 100%|██████████| 21/21 [00:05,  3.72it/s, step size=5.67e-01, acc. prob=0.286]
Sample: 100%|██████████| 21/21 [00:05,  4.19it/s, step size=8.66e-01, acc. prob=0.000]
Sample: 100%|██████████| 21/21 [00:04,  4.80it/s, step size=5.73e-01, acc. prob=0.429]
Sample: 100%|██████████| 21/21 [00:08,  2.3

[iter 0]  loss: 198613.4375
[iter 5]  loss: 189835.7930
[iter 10]  loss: 189176.4883
[iter 15]  loss: 187921.0781
[iter 20]  loss: 187489.3555
[iter 25]  loss: 187088.7109
[iter 30]  loss: 186847.2148
[iter 35]  loss: 186926.2734
[iter 40]  loss: 186526.2617
[iter 45]  loss: 186567.5039
[iter 50]  loss: 185957.7930


In [17]:
print(pyro.param("loading"))
print(pyro.param("uniqueness"))


tensor([[0.6921, 0.0000, 0.0000, 0.0000],
        [0.6881, 0.0000, 0.0000, 0.0000],
        [0.6977, 0.0000, 0.0000, 0.0000],
        [0.0000, 0.7109, 0.0000, 0.0000],
        [0.0000, 0.6946, 0.0000, 0.0000],
        [0.0000, 0.7025, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.7030, 0.0000],
        [0.0000, 0.0000, 0.6777, 0.0000],
        [0.0000, 0.0000, 0.6792, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.6893],
        [0.0000, 0.0000, 0.0000, 0.7036],
        [0.0000, 0.0000, 0.0000, 0.7068]], requires_grad=True)
tensor([0.5105, 0.5096, 0.5107, 0.5041, 0.5122, 0.5144, 0.4749, 0.5175, 0.5219,
        0.5299, 0.5044, 0.5082], requires_grad=True)
