# Maximum likelihood estimation (using the ML class)

In [1]:
from __future__ import print_function
import torch
import torch.utils.data
from torch import nn, optim
from torch.nn import functional as F
from torchvision import datasets, transforms
from tensorboardX import SummaryWriter

from tqdm import tqdm

batch_size = 128
epochs = 10
seed = 1
torch.manual_seed(seed)

if torch.cuda.is_available():
    device = "cuda"
else:
    device = "cpu"

In [2]:
root = '../data'
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Lambda(lambd=lambda x: x.view(-1))])
kwargs = {'batch_size': batch_size, 'num_workers': 1, 'pin_memory': True}

train_loader = torch.utils.data.DataLoader(
    datasets.MNIST(root=root, train=True, transform=transform, download=True),
    shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST(root=root, train=False, transform=transform),
    shuffle=False, **kwargs)

In [3]:
from pixyz.distributions import Categorical
from pixyz.models import ML
from pixyz.utils import print_latex

In [4]:
x_dim = 784
y_dim = 10


# classifier p(y|x)
class Classifier(Categorical):
    def __init__(self):
        super(Classifier, self).__init__(cond_var=["x"], var=["y"])
        self.fc1 = nn.Linear(x_dim, 512)
        self.fc2 = nn.Linear(512, 512)
        self.fc3 = nn.Linear(512, y_dim)

    def forward(self, x):
        h = F.relu(self.fc1(x))
        h = F.relu(self.fc2(h))
        h = F.softmax(self.fc3(h), dim=1)
        
        return {"probs": h}
    
p = Classifier().to(device)

In [5]:
print(p)
print_latex(p)

Distribution:
  p(y|x)
Network architecture:
  Classifier(
    name=p, distribution_name=Categorical,
    var=['y'], cond_var=['x'], input_var=['x'], features_shape=torch.Size([])
    (fc1): Linear(in_features=784, out_features=512, bias=True)
    (fc2): Linear(in_features=512, out_features=512, bias=True)
    (fc3): Linear(in_features=512, out_features=10, bias=True)
  )


<IPython.core.display.Math object>

In [6]:
model = ML(p, optimizer=optim.Adam, optimizer_params={"lr":1e-3})
print(model)
print_latex(model)

Distributions (for training): 
  p(y|x) 
Loss function: 
  mean \left(- \log p(y|x) \right) 
Optimizer: 
  Adam (
  Parameter Group 0
      amsgrad: False
      betas: (0.9, 0.999)
      eps: 1e-08
      lr: 0.001
      weight_decay: 0
  )


<IPython.core.display.Math object>

In [7]:
def train(epoch):
    train_loss = 0
    for x, y in tqdm(train_loader):
        x = x.to(device)
        y = torch.eye(10)[y].to(device)        
        loss = model.train({"x": x, "y": y})
        train_loss += loss
 
    train_loss = train_loss * train_loader.batch_size / len(train_loader.dataset)
    print('Epoch: {} Train loss: {:.4f}'.format(epoch, train_loss))
    return train_loss

In [8]:
def test(epoch):
    test_loss = 0
    for x, y in test_loader:
        x = x.to(device)
        y = torch.eye(10)[y].to(device)
        loss = model.test({"x": x, "y": y})
        test_loss += loss

    test_loss = test_loss * test_loader.batch_size / len(test_loader.dataset)
    print('Test loss: {:.4f}'.format(test_loss))
    return test_loss

In [9]:
writer = SummaryWriter('runs/maximum_likelihood')

for epoch in range(1, epochs + 1):
    train_loss = train(epoch)
    test_loss = test(epoch)
    
    writer.add_scalar('train_loss', train_loss.item(), epoch)
    writer.add_scalar('test_loss', test_loss.item(), epoch)     
    
writer.close()

100%|██████████| 469/469 [00:08<00:00, 52.55it/s]

Epoch: 1 Train loss: 0.2726



  0%|          | 0/469 [00:00<?, ?it/s]

Test loss: 0.1244


100%|██████████| 469/469 [00:08<00:00, 53.12it/s]

Epoch: 2 Train loss: 0.0944



  0%|          | 0/469 [00:00<?, ?it/s]

Test loss: 0.0828


100%|██████████| 469/469 [00:10<00:00, 46.20it/s]

Epoch: 3 Train loss: 0.0609



  0%|          | 0/469 [00:00<?, ?it/s]

Test loss: 0.0818


100%|██████████| 469/469 [00:08<00:00, 53.03it/s]

Epoch: 4 Train loss: 0.0440



  0%|          | 0/469 [00:00<?, ?it/s]

Test loss: 0.0695


100%|██████████| 469/469 [00:08<00:00, 52.12it/s]

Epoch: 5 Train loss: 0.0323



  0%|          | 0/469 [00:00<?, ?it/s]

Test loss: 0.0820


100%|██████████| 469/469 [00:08<00:00, 50.69it/s]

Epoch: 6 Train loss: 0.0265



  0%|          | 0/469 [00:00<?, ?it/s]

Test loss: 0.0692


100%|██████████| 469/469 [00:08<00:00, 52.43it/s]

Epoch: 7 Train loss: 0.0202



  0%|          | 0/469 [00:00<?, ?it/s]

Test loss: 0.0739


100%|██████████| 469/469 [00:08<00:00, 52.82it/s]

Epoch: 8 Train loss: 0.0154



  0%|          | 0/469 [00:00<?, ?it/s]

Test loss: 0.0686


100%|██████████| 469/469 [00:09<00:00, 51.12it/s]

Epoch: 9 Train loss: 0.0160



  0%|          | 0/469 [00:00<?, ?it/s]

Test loss: 0.0856


100%|██████████| 469/469 [00:09<00:00, 52.08it/s]

Epoch: 10 Train loss: 0.0181





Test loss: 0.0856
