In [26]:
import os
import pandas as pd
import torch
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import DataLoader, Dataset

In [27]:
class TitanicDataset(Dataset):
    def __init__(self, path, test_path):
      # Sex, Ticket, Cabin, Embarked
      x_headers = ['Pclass','Age','SibSp','Parch', 'Fare']
      y_headers = ['Survived']
      data = pd.read_csv(path)
      x_data = data.loc[:, x_headers].ffill(0)
      if(test_path != None):
        test_data = pd.read_csv(test_path)
        y_data = test_data.loc[:, y_headers].ffill(0)
      else:
        y_data = data.loc[:, y_headers]
      self.len = len(x_data)
      self.x_data = torch.from_numpy(x_data.to_numpy()).float()
      self.y_data = torch.from_numpy(y_data.to_numpy()).float()

    def __getitem__(self, index): 
        return self.x_data[index], self.y_data[index]

    def __len__(self):
        return self.len

In [33]:
train_dataset = TitanicDataset(os.path.join('../../tmp/titanic', 'train.csv'), None)
test_dataset = TitanicDataset(os.path.join('../../tmp/titanic', 'test.csv'), os.path.join('../../tmp/titanic', 'gender_submission.csv'))

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

  x_data = data.loc[:, x_headers].ffill(0)
  y_data = test_data.loc[:, y_headers].ffill(0)


In [29]:
test_data = [
  [[3., 34.5, 0., 0., 7.8292], 0.],
  [[3., 47., 1., 0., 7.], 1.],
  [[2., 62., 0., 0., 9.6875], 0.],
  [[3., 27., 0., 0., 8.6625], 0.],
]


In [30]:
class SigmoidRegressionModel(torch.nn.Module):
  def __init__(self):
    super(SigmoidRegressionModel, self).__init__()
    self.linear1 = torch.nn.Linear(5, 3)
    self.linear2 = torch.nn.Linear(3, 1)
    self.sigmoid = torch.nn.Sigmoid()
  
  def forward(self, x):
    x = self.linear1(x)
    x = self.linear2(x)
    y_pred = self.sigmoid(x)
    return y_pred
    
model = SigmoidRegressionModel()

In [31]:
criterion = torch.nn.BCELoss(reduction='mean')
optimizer = torch.optim.SGD(model.parameters(), lr=0.0001)
print('Before training: ', model(torch.tensor(test_data[0][0])))
for epoch in range(200):
  for i, (x, y) in enumerate(train_loader):
    y_pred = model(x)
    loss = criterion(y_pred, y)
    if(epoch % 10 == 0 and i == 0):
      print(epoch, loss.item())
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

print('After training: ', model(torch.tensor(test_data[0][0])))


Before training:  tensor([0.5396], grad_fn=<SigmoidBackward0>)
0 1.0179766416549683
10 3.783785820007324
20 0.6717469096183777
30 0.6240926384925842
40 0.6350038051605225
50 0.6556601524353027
60 0.6402060985565186
70 0.5641464591026306
80 0.6357682943344116
90 0.5458234548568726
100 0.6276463270187378
110 0.6098113059997559
120 0.5688248872756958
130 0.6414803266525269
140 0.5709202289581299
150 0.6113878488540649
160 0.5528122186660767
170 0.6780524253845215
180 0.7277050018310547
190 0.5265395641326904
After training:  tensor([0.2906], grad_fn=<SigmoidBackward0>)


In [32]:
device = 'cpu'
size = len(test_loader.dataset)
num_batches = len(test_loader)
model.eval()
test_loss, correct = 0, 0
with torch.no_grad():
    for X, y in test_loader:
        X, y = X.to(device), y.to(device)
        pred = model(X)
        test_loss += criterion(pred, y).item()
        correct += (pred.argmax(1) == y).type(torch.float).sum().item()
test_loss /= num_batches
correct /= size
print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

Test Error: 
 Accuracy: 2022.0%, Avg loss: 0.623807 

