In [10]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import MinMaxScaler, StandardScaler

import torch
from torch import nn, optim
from torch.autograd import Variable

In [11]:
Xinp = pd.read_csv('../data/features/test-all-feat-from-kernel-repro.csv').drop('object_id', axis=1)
Xinp = np.nan_to_num(Xinp.values)
Xinp = StandardScaler().fit_transform(Xinp)
print(Xinp.shape)

(3492890, 68)


In [12]:

batch_size = 10000

batch_num = Xinp.shape[0]//batch_size
Xs = [Xinp[i*batch_size : (i+1)*batch_size] for i in range(0, batch_num)]

assert batch_num == len(Xs)
assert all([s.shape[0] == batch_size for s in Xs])

In [13]:
learning_rate = 0.005
num_epochs = 100

In [14]:
class AutoEncoder(nn.Module):
    def __init__(self, input_size, latent_size):
        super(AutoEncoder, self).__init__()

        if input_size <= latent_size + 2:
            raise Exception("input size is not enough bigger than latent size")
        
        mid_size = (input_size + latent_size) // 2 
        
        self.fc1 = nn.Linear(input_size, mid_size)
        self.fc2mu = nn.Linear(mid_size, latent_size)
        self.fc2var = nn.Linear(mid_size, latent_size)
        self.fc3 = nn.Linear(latent_size, mid_size)
        self.fc4 = nn.Linear(mid_size, input_size)

    def encode(self, x):
        h1 = torch.relu(self.fc1(x))
        return self.fc2mu(h1), self.fc2var(h1)

    def reparametrize(self, mu, logvar):
        std = logvar.mul(0.01).exp_()
        if torch.cuda.is_available():
            eps = torch.cuda.FloatTensor(std.size()).normal_()
        else:
            eps = torch.FloatTensor(std.size()).normal_()
        eps = Variable(eps)
        return eps.mul(std).add_(mu)

    def decode(self, z):
        h3 = torch.relu(self.fc3(z))
        return torch.sigmoid(self.fc4(h3))

    def forward(self, x):
        mu, logvar = self.encode(x)
        z = self.reparametrize(mu, logvar)
        return self.decode(z), mu, logvar


In [15]:
model = AutoEncoder(Xs[0].shape[1], 2)
if torch.cuda.is_available():
    model.cuda(3)

reconstruction_function = nn.MSELoss(size_average=False)




In [16]:
def loss_function(recon_x, x, mu, logvar):
    """
    recon_x: generating images
    x: origin images
    mu: latent mean
    logvar: latent log variance
    """
    BCE = reconstruction_function(recon_x, x)  # mse loss
    # loss = 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
    KLD_element = mu.pow(2).add_(logvar.exp()).mul_(-1).add_(1).add_(logvar)
    KLD = torch.sum(KLD_element).mul_(-0.5)
    # KL divergence
    print(f"bce {BCE}, kld {KLD}")
    return BCE + KLD


optimizer = optim.Adam(model.parameters(), lr=1e-3)

In [17]:
for epoch in range(num_epochs):
    model.train()
    train_loss = 0
    for X in Xs:
        X = Variable(torch.tensor(X.astype(np.float32)))
        if torch.cuda.is_available():
            X = X.cuda(3)
        optimizer.zero_grad()
        recon_batch, mu, logvar = model(X)
        loss = loss_function(recon_batch, X, mu, logvar)
        loss.backward()
        train_loss += loss.item()
        optimizer.step()

    #print(f'mu {mu}, logvar {logvar}')
    print('====> Epoch: {} loss: {:.4f}'.format(
        epoch, train_loss))

RuntimeError: CUDA error: out of memory

In [None]:
torch.save(model.state_dict(), './vae.pth')