In [4]:
import torch 
import numpy as np 
import pandas as pd
from torch.utils.data import Dataset, DataLoader


In [33]:
def normalize(df):
    result = df.copy()
    for feature_name in df.columns:
        max_value = df[feature_name].max()
        min_value = df[feature_name].min()
        result[feature_name] = (df[feature_name] - min_value) / (max_value - min_value)
    return result

class Titanic(Dataset):
    """Face Landmarks dataset."""

    def __init__(self, csv_file, root_dir=None, transform=None):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        raw = pd.read_csv(csv_file)
        data_col = ['Survived', 'Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Cabin', 'Embarked']
        relevant_data = raw[data_col]
        self.titanic_data = normalize(pd.get_dummies(relevant_data))
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.titanic_data)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        samples_pd = self.titanic_data[idx]
        labels = torch.tensor(samples_pd['Survived'.values], dtype=torch.int8)
        data=torch.tensor(samples_pd.drop('Survived', axis=1).values, dtype=torch.float64)
        sample = {'data':data, 'label':label}
        return sample

In [14]:
raw = pd.read_csv('titanic_train.csv')
data_col = ['Survived', 'Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Cabin', 'Embarked']
relevant_data = raw[data_col]
titanic_data = pd.get_dummies(relevant_data)

In [37]:
batch_size = 20
titanic_dataloader = DataLoader(Titanic('titanic_train.csv'), batch_size=batch_size, shuffle = True)

In [27]:
class VAE(nn.Module):
    def __init__(self, label, num_feat, z_size, device):
        # configurations
        super().__init__()
        self.label = label
        self.num_feat = num_feat
        self.channel_num = channel_num
        self.z_size = z_size
        self.device = device

        # encoder
        self.encoder = nn.Sequential(
            self._linear(num_feat, 7)#PARAMS!!!
            self._linear(7, 4)
        )

        # encoded feature's size and volume
        self.feature_size = image_size // 8
        self.feature_volume = 

        # q
        self.q_mean = self._linear(4, z_size, elu=False)
        self.q_logvar = self._linear(4, z_size, elu=False)

        # projection
        self.project = self._linear(z_size, 4, elu=False)

        # decoder
        self.decoder = nn.Sequential(
            self._linear(4, 5)#PARAMS!!!
            self._linear(5, num_feat)
        )

    def forward(self, x):
        # encode x
        encoded = self.encoder(x)

        # sample latent code z from q given x.
        mean, logvar = self.q(encoded)
        z = self.z(mean, logvar)
        # reconstruct x from z
        x_reconstructed = self.decoder(z_projected)

        # return the parameters of distribution of q given x and the
        # reconstructed image.
        return (mean, logvar), x_reconstructed

    # ==============
    # VAE components
    # ==============

    def q(self, encoded):
        unrolled = encoded.view(-1, self.feature_volume)
        return self.q_mean(unrolled), self.q_logvar(unrolled)

    def z(self, mean, logvar):
        std = logvar.mul(0.5).exp_()
        eps = (
            Variable(torch.randn(std.size()))
        ).to(self.device)
        return eps.mul(std).add_(mean)

    def reconstruction_loss(self, x_reconstructed, x):
        return nn.BCELoss(size_average=False)(x_reconstructed, x) / x.size(0)

    def kl_divergence_loss(self, mean, logvar):
        return ((mean**2 + logvar.exp() - 1 - logvar) / 2).mean()

    # =====
    # Utils
    # =====

    @property
    def name(self):
        return (
            'VAE'
            '-{kernel_num}k'
            '-{label}'
            '-{channel_num}x{image_size}x{image_size}'
        ).format(
            label=self.label,
            kernel_num=self.kernel_num,
            image_size=self.image_size,
            channel_num=self.channel_num,
        )

    def sample(self, size):
        z = Variable(
            torch.randn(size, self.z_size).cuda() if self._is_on_cuda() else
            torch.randn(size, self.z_size)
        )
        z_projected = self.project(z).view(
            -1, self.kernel_num,
            self.feature_size,
            self.feature_size,
        )
        return self.decoder(z_projected).data

    def _is_on_cuda(self):
        return next(self.parameters()).is_cuda

    # ======
    # Layers
    # ======

    def _linear(self, in_size, out_size, elu=True):
        return nn.Sequential(
            nn.Linear(in_size, out_size),
            nn.ELU(),
        ) if elu else nn.Linear(in_size, out_size)

TypeError: len() takes no keyword arguments

In [31]:
titanic_data.drop('Survived', axis=1).values

array([[ 3., 22.,  1., ...,  0.,  0.,  1.],
       [ 1., 38.,  1., ...,  1.,  0.,  0.],
       [ 3., 26.,  0., ...,  0.,  0.,  1.],
       ...,
       [ 3., nan,  1., ...,  0.,  0.,  1.],
       [ 1., 26.,  0., ...,  1.,  0.,  0.],
       [ 3., 32.,  0., ...,  0.,  1.,  0.]])

In [30]:
titanic_data

Unnamed: 0,Survived,Pclass,Age,SibSp,Parch,Fare,Sex_female,Sex_male,Cabin_A10,Cabin_A14,...,Cabin_F G73,Cabin_F2,Cabin_F33,Cabin_F38,Cabin_F4,Cabin_G6,Cabin_T,Embarked_C,Embarked_Q,Embarked_S
0,0,3,22.0,1,0,7.2500,0,1,0,0,...,0,0,0,0,0,0,0,0,0,1
1,1,1,38.0,1,0,71.2833,1,0,0,0,...,0,0,0,0,0,0,0,1,0,0
2,1,3,26.0,0,0,7.9250,1,0,0,0,...,0,0,0,0,0,0,0,0,0,1
3,1,1,35.0,1,0,53.1000,1,0,0,0,...,0,0,0,0,0,0,0,0,0,1
4,0,3,35.0,0,0,8.0500,0,1,0,0,...,0,0,0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,0,2,27.0,0,0,13.0000,0,1,0,0,...,0,0,0,0,0,0,0,0,0,1
887,1,1,19.0,0,0,30.0000,1,0,0,0,...,0,0,0,0,0,0,0,0,0,1
888,0,3,,1,2,23.4500,1,0,0,0,...,0,0,0,0,0,0,0,0,0,1
889,1,1,26.0,0,0,30.0000,0,1,0,0,...,0,0,0,0,0,0,0,1,0,0
