Lab: 最大概似估計
================

In [1]:
import torch

## `torch` 分配物件

In [2]:
from torch.distributions import Normal
normal = Normal(loc=0., scale=1.)

In [3]:
print("random sample with shape ():\n",
      normal.sample())
print("random sample with shape (3,):\n",
      normal.sample(sample_shape=(3,)))
print("random sample with shape (2,3):\n",
      normal.sample(sample_shape=(2, 3)))

random sample with shape ():
 tensor(-0.4363)
random sample with shape (3,):
 tensor([0.2574, 0.0697, 0.0278])
random sample with shape (2,3):
 tensor([[ 1.3632, -0.0921, -1.1241],
        [ 2.1538,  0.6145, -1.1096]])


In [4]:
print("cumulative probability given value with shape ():\n",
      normal.cdf(value=0), "\n")
print("cumulative probability given value with (3,):\n",
      normal.cdf(value=torch.Tensor([-1, 0, .5])), "\n")
print("cumulative probability given value with (2,3):\n",
      normal.cdf(value=torch.Tensor([[-1, 0, .5], [-2, 1, 3]])))

cumulative probability given value with shape ():
 tensor(0.5000) 

cumulative probability given value with (3,):
 tensor([0.1587, 0.5000, 0.6915]) 

cumulative probability given value with (2,3):
 tensor([[0.1587, 0.5000, 0.6915],
        [0.0228, 0.8413, 0.9987]])


In [5]:
print("cumulative probability given value with shape ():\n",
      normal.log_prob(value=0), "\n")
print("cumulative probability given value with (3,):\n",
      normal.log_prob(value=torch.Tensor([-1, 0, .5])), "\n")
print("cumulative probability given value with (2,3):\n",
      normal.log_prob(value=torch.Tensor([[-1, 0, .5], [-2, 1, 3]])))

cumulative probability given value with shape ():
 tensor(-0.9189) 

cumulative probability given value with (3,):
 tensor([-1.4189, -0.9189, -1.0439]) 

cumulative probability given value with (2,3):
 tensor([[-1.4189, -0.9189, -1.0439],
        [-2.9189, -1.4189, -5.4189]])


In [6]:
print(normal)

Normal(loc: 0.0, scale: 1.0)


In [7]:
print(normal.batch_shape)
print(normal.event_shape)

torch.Size([])
torch.Size([])


In [8]:
normal_batch = Normal(loc=torch.Tensor([0., 1.]), scale=torch.Tensor([1., 1.5]))
print(normal_batch)

Normal(loc: torch.Size([2]), scale: torch.Size([2]))


In [9]:
print("random sample with sample_shape ():\n",
      normal_batch.sample(), "\n")
print("random sample with sample_shape (3,):\n",
      normal_batch.sample(sample_shape=(3,)), "\n")
print("random sample with sample_shape (2,3):\n",
      normal_batch.sample(sample_shape=(2,3)))


random sample with sample_shape ():
 tensor([-0.3663, -1.5823]) 

random sample with sample_shape (3,):
 tensor([[ 0.6634,  1.5090],
        [ 2.6187,  5.1088],
        [-0.9314,  2.4282]]) 

random sample with sample_shape (2,3):
 tensor([[[-0.1343,  0.7715],
         [ 0.3100,  0.4035],
         [ 0.5353, -0.8402]],

        [[-0.0110,  2.3349],
         [ 0.0228, -0.3227],
         [-0.6439,  1.3135]]])


In [10]:
print("log-probability given value with shape ():\n",
      normal_batch.log_prob(0), "\n")
print("log-probability given value with shape (2,):\n",
      normal_batch.log_prob(torch.Tensor([0, 0])), "\n")
print("log-probability given value with shape (2,1):\n",
      normal_batch.log_prob(torch.Tensor([[0], [0]])))

log-probability given value with shape ():
 tensor([-0.9189, -1.5466]) 

log-probability given value with shape (2,):
 tensor([-0.9189, -1.5466]) 

log-probability given value with shape (2,1):
 tensor([[-0.9189, -1.5466],
        [-0.9189, -1.5466]])


In [11]:
from torch.distributions import MultivariateNormal
mvn = MultivariateNormal(
    loc=torch.Tensor([0, 1]),
    scale_tril=torch.cholesky(torch.Tensor([[1., 0.], [0., .5]])))
print(mvn)


MultivariateNormal(loc: torch.Size([2]), scale_tril: torch.Size([2, 2]))


In [12]:
print("random sample with sample_shape ():\n",
      mvn.sample(), "\n")
print("random sample with sample_shape (3,):\n",
      mvn.sample(sample_shape=(3,)), "\n")
print("random sample with sample_shape (2, 3):\n",
      mvn.sample(sample_shape=(2, 3)))

random sample with sample_shape ():
 tensor([0.1890, 1.8827]) 

random sample with sample_shape (3,):
 tensor([[ 0.6836,  0.2677],
        [-0.0800,  0.5926],
        [ 0.4915,  0.7589]]) 

random sample with sample_shape (2, 3):
 tensor([[[-0.0305,  0.9733],
         [-0.8141, -0.2311],
         [-0.9130, -0.6153]],

        [[-0.5051,  0.4551],
         [ 0.6212,  1.1109],
         [-0.4245,  1.2862]]])


In [13]:
print("log-probability given value with shape (2,):\n",
      mvn.log_prob(torch.Tensor([0, 0])), "\n")
print("log-probability given value with shape (2,1):\n",
      mvn.log_prob(torch.Tensor([[0, 0], [0, 0]])))

log-probability given value with shape (2,):
 tensor(-2.4913) 

log-probability given value with shape (2,1):
 tensor([-2.4913, -2.4913])


In [14]:
from torch.distributions import Independent
normal_batch = Independent(normal_batch, reinterpreted_batch_ndims=1)
print(normal_batch.batch_shape)
print(normal_batch.event_shape)


torch.Size([])
torch.Size([2])


In [15]:
mvn_batch = MultivariateNormal(
    loc=torch.Tensor([[0, 1],[1, 2],[2, 3]]),
    scale_tril=torch.cholesky(torch.Tensor([[1., .2], [.2, .5]])))
mvn_batch

MultivariateNormal(loc: torch.Size([3, 2]), scale_tril: torch.Size([3, 2, 2]))

In [16]:
print("random sample with sample_shape ():\n",
      mvn_batch.sample(), "\n")
print("random sample with sample_shape (3,):\n",
      mvn_batch.sample(sample_shape=(3,)), "\n")
print("random sample with sample_shape (2, 3):\n",
      mvn_batch.sample(sample_shape=(2, 3)))

random sample with sample_shape ():
 tensor([[0.9742, 1.7229],
        [1.5654, 2.4931],
        [1.7410, 3.0971]]) 

random sample with sample_shape (3,):
 tensor([[[ 0.5257,  1.0954],
         [ 2.6943,  2.1913],
         [ 1.3331,  2.2719]],

        [[-0.4602,  1.5544],
         [ 1.4892,  2.2368],
         [ 1.9227,  3.4094]],

        [[ 1.8816,  1.2539],
         [ 0.7777,  2.4149],
         [ 1.7531,  3.5409]]]) 

random sample with sample_shape (2, 3):
 tensor([[[[ 0.3248,  1.8880],
          [ 1.3302,  1.3644],
          [ 1.2786,  2.7427]],

         [[-1.0076,  0.9874],
          [-0.1539,  1.2438],
          [ 2.5117,  3.6590]],

         [[ 2.0852,  2.3911],
          [-0.2115,  1.3507],
          [ 0.2167,  2.3528]]],


        [[[-0.0136, -0.3815],
          [ 0.2458,  0.9069],
          [ 1.6318,  2.2925]],

         [[ 1.5403, -0.0553],
          [-0.0505,  1.4078],
          [ 1.8310,  1.9361]],

         [[-0.6986,  1.5421],
          [ 1.0329,  1.6619],
          [

## 計算最大概似估計值

In [17]:
mu_true = torch.tensor([5.])
sigma_true = torch.tensor([2.])
model_normal_true = Normal(
    loc=mu_true,
    scale=sigma_true)
print("normal model:\n", model_normal_true, "\n")

normal model:
 Normal(loc: tensor([5.]), scale: tensor([2.])) 



In [18]:
sample_size = 1000
x = model_normal_true.sample(sample_shape=(sample_size,))
loss_value = -torch.mean(torch.sum(model_normal_true.log_prob(x), dim = 1))
print("negative likelihood value is", loss_value)


negative likelihood value is tensor(2.1215)


In [19]:
epochs = 200
lr = 1.0
mu = torch.tensor([0.], requires_grad=True)
sigma = torch.tensor([1.], requires_grad=True)
opt = torch.optim.Adam([mu, sigma], lr=.5)
for epoch in range(epochs):
    model_normal = Normal(loc=mu, scale=sigma)
    loss_value = -torch.mean(model_normal.log_prob(x))
    opt.zero_grad()
    loss_value.backward() # compute the gradient
    opt.step()

In [20]:
print("ML mean by gradient descent:", mu)
print("ML std by gradient descent:", sigma)

ML mean by gradient descent: tensor([4.9986], requires_grad=True)
ML std by gradient descent: tensor([2.0179], requires_grad=True)


In [21]:
print("ML mean by formula:", torch.mean(x))
print("ML std by formula:", torch.std(x, unbiased=False))

ML mean by formula: tensor(4.9985)
ML std by formula: tensor(2.0188)


In [22]:
mu_true = torch.tensor([-1., 0., 1.])
sigma_tril_true = torch.tensor([[3., 0., 0.], [2., 1., 0.], [.4, .5, .5]])
model_mvn_true = MultivariateNormal(
    loc=mu_true,
    scale_tril=sigma_tril_true)
print("true mean vector: \n", model_mvn_true.mean)
print("true covariance matrix: \n", model_mvn_true.covariance_matrix)

true mean vector: 
 

tensor([-1.,  0.,  1.])
true covariance matrix: 
 tensor([[9.0000, 6.0000, 1.2000],
        [6.0000, 5.0000, 1.3000],
        [1.2000, 1.3000, 0.6600]])


In [23]:
sample_size = 1000
x = model_mvn_true.sample(sample_shape=(sample_size,))
loss_value = -torch.mean(model_mvn_true.log_prob(x))
print("negative likelihood value is", loss_value)


negative likelihood value is tensor(4.6056)


In [24]:
epochs = 500
lr = .1
mu = torch.tensor(
    [0., 0., 0.], requires_grad=True)
sigma_tril = torch.tensor(
    [[1., 0., 0.], [0., 1., 0.], [0., 0., 1.]],
    requires_grad=True)
opt = torch.optim.Adam([mu, sigma_tril], lr=lr)

for epoch in range(epochs):
    model_mvn = MultivariateNormal(
    loc=mu,
    scale_tril=sigma_tril)
    loss_value = -torch.mean(model_mvn.log_prob(x))
    opt.zero_grad()
    loss_value.backward() # compute the gradient
    opt.step()

In [25]:
print("ML mean by gradient descent: \n",
      mu)
print("ML covariance by gradient descent: \n",
      sigma_tril @ torch.transpose(sigma_tril, 0, 1))

ML mean by gradient descent: 
 tensor([-0.8318,  0.0649,  0.9865], requires_grad=True)
ML covariance by gradient descent: 
 tensor([[8.9441, 6.0275, 1.2266],
        [6.0275, 4.9932, 1.2725],
        [1.2266, 1.2725, 0.6201]], grad_fn=<MmBackward>)


In [26]:
sample_mean = torch.mean(x, dim = 0)
sample_moment2 = (torch.transpose(x, 0, 1) @ x) / sample_size
sample_cov = sample_moment2 - torch.ger(sample_mean, sample_mean)
print("ML mean by formula: \n",
      sample_mean)
print("ML covariance by formula: \n",
      sample_cov)


ML mean by formula: 
 tensor([-0.8318,  0.0649,  0.9865])
ML covariance by formula: 
 tensor([[8.9441, 6.0275, 1.2266],
        [6.0275, 4.9933, 1.2725],
        [1.2266, 1.2725, 0.6201]])


## 實徵範例

### 產生邏吉斯迴歸資料

In [27]:
torch.manual_seed(48)

<torch._C.Generator at 0x7ffc60793f90>

In [28]:
from torch.distributions import Bernoulli
def generate_data(n_sample,
                  weight,
                  bias = 0,
                  mean_feature = 0,
                  std_feature = 1,
                  dtype = torch.float64):
    weight = torch.tensor(weight, dtype = dtype)
    n_feature = weight.shape[0]
    x = torch.normal(mean = mean_feature,
                     std = std_feature,
                     size = (n_sample, n_feature),
                     dtype = dtype)
    weight = weight.view(size = (-1, 1))
    logit = bias + x @ weight
    bernoulli = Bernoulli(logits = logit)
    y = bernoulli.sample()
    return x, y

In [29]:
# run generate_data
x, y = generate_data(n_sample = 1000,
                     weight = [-5, 3, 0],
                     bias = 2,
                     mean_feature = 10,
                     std_feature = 3,
                     dtype = torch.float64)

### 建立一進行邏吉斯迴歸分析之物件

In [30]:
# define a class to fit logistic regression
class LogisticRegression():
    def __init__(self, dtype = torch.float64):
        self.dtype = dtype
        self.weight = None
        self.bias = None
    def log_lik(self, x, y):
        logit = self.bias + x @ self.weight
        bernoulli = Bernoulli(logits = logit)
        return torch.mean(bernoulli.log_prob(y))
    def fit(self, x, y, epochs = 200, lr = .1):
        if x.dtype is not self.dtype:
            x = x.type(dtype = self.dtype)
        if y.dtype is not self.dtype:
            y = y.type(dtype = self.dtype)
        n_feature = x.size()[1]
        self.bias = torch.zeros(size = (1,),
                                dtype = self.dtype,
                                requires_grad = True)
        self.weight = torch.zeros(size = (n_feature, 1),
                                  dtype = self.dtype,
                                  requires_grad = True)
        opt = torch.optim.Adam([self.bias, self.weight], lr=lr)
        for epoch in range(epochs):
            loss_value = - self.log_lik(x, y)
            opt.zero_grad()
            loss_value.backward() # compute the gradient
            opt.step()
        return self

### 計算模型參數

In [31]:
# fit logistic model
model_lr = LogisticRegression()
model_lr.fit(x, y, epochs = 2000, lr = 1)
print(model_lr.bias)
print(model_lr.weight)

tensor([3.8176], dtype=torch.float64, requires_grad=True)
tensor([[-4.6982],
        [ 2.6668],
        [-0.0054]], dtype=torch.float64, requires_grad=True)


In [32]:
# fit logistic model via sklearn
# please install sklearn first
from sklearn import linear_model
model_lr_sklearn = linear_model.LogisticRegression(C=10000)
model_lr_sklearn.fit(x, y)
print(model_lr_sklearn.intercept_)
print(model_lr_sklearn.coef_)

[3.81736449]
[[-4.69795962  2.66667074 -0.00543114]]


  return f(**kwargs)
