In [1]:
import pandas as pd
import numpy as np

import torch
from torch import nn, optim
import torch.nn.functional as F
from torch.autograd import Variable

In [2]:
Xs = pd.read_csv('../data/sets/test-all-feat-from-kernel-repro.csv', nrows=1000000).drop('object_id', axis=1)
Xs = np.nan_to_num(Xs.values)
print(Xs.shape)

(1000000, 68)


In [31]:
learning_rate = 0.005
num_epochs = 1

In [41]:
class AutoEncoder(nn.Module):
    def __init__(self, input_size, latent_size):
        super(AutoEncoder, self).__init__()

        assert input_size > latent_size + 2
        mid_size = (input_size + latent_size) // 2 
        
        self.fc1 = nn.Linear(input_size, mid_size)
        self.fc2mu = nn.Linear(mid_size, latent_size)
        self.fc2var = nn.Linear(mid_size, latent_size)
        self.fc3 = nn.Linear(latent_size, mid_size)
        self.fc4 = nn.Linear(mid_size, input_size)

    def encode(self, x):
        h1 = F.relu(self.fc1(x))
        return self.fc2mu(h1), self.fc2var(h1)

    def reparametrize(self, mu, logvar):
        std = logvar.mul(0.01).exp_()
        if torch.cuda.is_available():
            eps = torch.cuda.FloatTensor(std.size()).normal_()
        else:
            eps = torch.FloatTensor(std.size()).normal_()
        eps = Variable(eps)
        return eps.mul(std).add_(mu)

    def decode(self, z):
        h3 = F.relu(self.fc3(z))
        return torch.sigmoid(self.fc4(h3))

    def forward(self, x):
        mu, logvar = self.encode(x)
        z = self.reparametrize(mu, logvar)
        return self.decode(z), mu, logvar


In [37]:
model = AutoEncoder(Xs.shape[1], 2)
if torch.cuda.is_available():
    model.cuda()

reconstruction_function = nn.MSELoss(size_average=False)


In [39]:
def loss_function(recon_x, x, mu, logvar):
    """
    recon_x: generating images
    x: origin images
    mu: latent mean
    logvar: latent log variance
    """
    BCE = reconstruction_function(recon_x, x)  # mse loss
    # loss = 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
    KLD_element = mu.pow(2).add_(logvar.exp()).mul_(-1).add_(1).add_(logvar)
    KLD = torch.sum(KLD_element).mul_(-0.5)
    # KL divergence
    print(f"bce {BCE}, kld {KLD}")
    return BCE + KLD


optimizer = optim.Adam(model.parameters(), lr=1e-3)

In [40]:
for epoch in range(num_epochs):
    model.train()
    train_loss = 0
    for X in [Xs]:
        X = Variable(torch.tensor(X.astype(np.float32)))
        if torch.cuda.is_available():
            X = X.cuda()
        optimizer.zero_grad()
        recon_batch, mu, logvar = model(X)
        loss = loss_function(recon_batch, X, mu, logvar)
        loss.backward()
        train_loss += loss.item()
        optimizer.step()

    #print(f'mu {mu}, logvar {logvar}')
    print('====> Epoch: {} loss: {:.4f}'.format(
        epoch, train_loss))

std tensor([[2.6972e+01, 1.9488e-19],
        [1.0656e+00, 2.0379e-01],
        [9.1921e-01, 2.8432e-01],
        ...,
        [1.0210e+00, 6.0328e-01],
        [1.1451e+00, 4.2066e-01],
        [1.0526e+00, 4.8205e-01]], device='cuda:0', grad_fn=<ExpBackward>), lv tensor([[ 3.2948e+02, -4.3082e+03],
        [ 6.3547e+00, -1.5907e+02],
        [-8.4241e+00, -1.2576e+02],
        ...,
        [ 2.0780e+00, -5.0538e+01],
        [ 1.3547e+01, -8.6593e+01],
        [ 5.1238e+00, -7.2971e+01]], device='cuda:0', grad_fn=<AddmmBackward>)
z tensor([[ 4.2032e+02,  9.3114e+03],
        [-2.0191e+00,  2.8988e+02],
        [-6.2071e+00,  2.2190e+02],
        ...,
        [ 4.9586e+00,  7.6861e+01],
        [ 5.5405e+00,  1.3463e+02],
        [ 1.2187e+01,  1.4045e+02]], device='cuda:0', grad_fn=<AddBackward0>)
bce nan, kld inf
====> Epoch: 0 loss: nan


In [8]:
torch.save(model.state_dict(), './vae.pth')