In [100]:
import torch
import random
import torch.nn.functional as F
import pandas as pd

from torch import nn, optim


# Prepare the data

train_file = './train.csv'
test_file = './test.csv'

def prepare_data(data, test_sample=False):
    persons = data[['PassengerId','Name','Age','Sex']]
    data = pd.get_dummies(data.drop(columns=['Cabin','Ticket','Pclass']), columns=['Sex', 'Embarked']).fillna(0)
    
    features = data[['Age', 'SibSp', 'Parch', 'Fare', 'Sex_female','Sex_male', 'Embarked_C', 'Embarked_Q', 'Embarked_S']].copy()
    features['Fare'] /= features['Fare'].max()
    features['Age'] /= features['Age'].max()
    features['Fare'] /= features['Fare'].max()
    features['SibSp'] /= features['SibSp'].max()
    
    features = torch.tensor(features.values, dtype=torch.float)
    persons = persons.values
    
    if(test_sample):
        return zip(features, persons)
    
    targets = data['Survived']
    targets = torch.tensor(targets.values, dtype=torch.float).view(-1,1)
    
    return zip(features, persons, targets)

def random_training_pair(train_list):
    choice = train_list[random.randint(0, len(train_list) - 1)]
    features, person, target = choice
    target = target.item()
    target = torch.LongTensor([1 if target == 0 else 0, target])
    
    return features, person, target


    

In [89]:
data = prepare_data(pd.read_csv(train_file))
test_data = prepare_data(pd.read_csv(test_file), True)

titanic_data = list(titanic_data)

n_data = len(titanic_data)
n_validation = 100
n_train = n_data - n_validation

validation_data = titanic_data[n_train:]
train_data = titanic_data[:-n_validation]



In [94]:
class Classifier(nn.Module):
    
    def __init__(self):
        super(Classifier, self).__init__()
        
        self.fc1 = nn.Linear(9, 6)
        self.fc2 = nn.Linear(6, 3)
        self.fc3 = nn.Linear(3, 2)
        self.dropout = nn.Dropout(0.2)
        
    def forward(self, X):
        X = self.dropout( F.relu( self.fc1(X) ) )
        X = self.dropout( F.relu( self.fc2(X) ) )
        X = self.dropout( F.softmax( self.fc3(X), dim=0 ) )
        
        return X
    

In [103]:
features, person, target = random_training_pair(train_data)
classifier = Classifier()
logits = classifier(features)
print(logits, target)

tensor([0.6324, 0.6176], grad_fn=<MulBackward0>) tensor([0, 1])


In [110]:
classifier = Classifier()

lr = 0.005
momentum = 0.5
criterion = nn.NLLLoss()
optimizer = optim.SGD(classifier.parameters(), lr=lr, momentum=momentum)

def train(features, target):
    """
        Training the data set
        Params:
            features: the expected category result
            target: the expected result
    """
    target = target.view(1,2)
    print(target)
    classifier.train()
    
    optimizer.zero_grad()
    
    output = classifier(features)

    loss = criterion(output.view(1,2), target)
    loss.backward()
    optimizer.step()
    
    return output, loss.item()

In [111]:
epochs = 1000
print_every = 20
current_loss = 0
accuracy = 0

for epoch in range(1, epochs + 1):
    features, person, target = random_training_pair(train_data)
    output, item_loss = train(features, target)
    
    top_v, top_i = output.topk(1, dim=1)
    accuracy += 1 if top_i == target[0] else 0
    current_loss += item_loss
    
    if epoch % print_every == 0:
        print(f'Epoch {epoch}/{epochs}: accuracy: {accuracy/epoch:.3f} and loss: {current_loss/epoch:.3f}')
        
    

tensor([[1, 0]])


RuntimeError: multi-target not supported at /opt/conda/conda-bld/pytorch_1556653215914/work/aten/src/THNN/generic/ClassNLLCriterion.c:20