In [103]:
import os
import pandas as pd
import torch
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import DataLoader, Dataset

In [104]:
class TitanicDataset(Dataset):
    def __init__(self, path, test_path):
      # Sex, Ticket, Cabin, Embarked
      x_headers = ['Pclass','Age','SibSp','Parch', 'Fare']
      y_headers = ['Survived']
      data = pd.read_csv(path)
      x_data = data.loc[:, x_headers].ffill(0)
      if(test_path != None):
        test_data = pd.read_csv(test_path)
        y_data = test_data.loc[:, y_headers].ffill(0)
      else:
        y_data = data.loc[:, y_headers]
      self.len = len(x_data)
      self.x_data = torch.from_numpy(x_data.to_numpy()).float()
      self.y_data = torch.from_numpy(y_data.to_numpy()).float()

    def __getitem__(self, index): 
        return self.x_data[index], self.y_data[index]

    def __len__(self):
        return self.len

In [105]:
train_dataset = TitanicDataset(os.path.join('../../tmp/titanic', 'train.csv'), None)
test_dataset = TitanicDataset(os.path.join('../../tmp/titanic', 'test.csv'), os.path.join('../../tmp/titanic', 'gender_submission.csv'))

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

  x_data = data.loc[:, x_headers].ffill(0)
  x_data = data.loc[:, x_headers].ffill(0)
  y_data = test_data.loc[:, y_headers].ffill(0)


In [106]:
test_data = [
  [[3., 34.5, 0., 0., 7.8292], 0.],
  [[3., 47., 1., 0., 7.], 1.],
  [[2., 62., 0., 0., 9.6875], 0.],
  [[3., 27., 0., 0., 8.6625], 0.],
]


In [107]:
class SigmoidRegressionModel(torch.nn.Module):
  def __init__(self):
    super(SigmoidRegressionModel, self).__init__()
    self.linear1 = torch.nn.Linear(5, 3)
    self.linear2 = torch.nn.Linear(3, 1)
    self.sigmoid = torch.nn.Sigmoid()
  
  def forward(self, x):
    x = self.linear1(x)
    x = self.linear2(x)
    y_pred = self.sigmoid(x)
    return y_pred
    
model = SigmoidRegressionModel()

In [108]:
criterion = torch.nn.BCELoss(reduction='mean')
optimizer = torch.optim.SGD(model.parameters(), lr=0.0001)
print('Before training: ', model(torch.tensor(test_data[0][0])))
for epoch in range(100):
  for i, (x, y) in enumerate(train_loader):
    y_pred = model(x)
    loss = criterion(y_pred, y)
    if(epoch % 10 == 0 and i == 0):
      print(epoch, loss.item())
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

print('After training: ', model(torch.tensor(test_data[0][0])))


Before training:  tensor([0.0136], grad_fn=<SigmoidBackward0>)
0 3.277015209197998
10 0.5519840717315674
20 0.6100013852119446
30 0.6798875331878662
40 0.4836369454860687
50 0.4961634874343872
60 0.5393467545509338
70 0.5566099286079407
80 0.5550968647003174
90 0.5622515082359314
After training:  tensor([0.2993], grad_fn=<SigmoidBackward0>)


In [109]:
device = 'cpu'
size = len(test_loader.dataset)
num_batches = len(test_loader)
model.eval()
test_loss, correct = 0, 0
with torch.no_grad():
    for X, y in test_loader:
        X, y = X.to(device), y.to(device)
        pred = model(X)
        test_loss += criterion(pred, y).item()
        correct += (pred.argmax(1) == y).type(torch.float).sum().item()
test_loss /= num_batches
correct /= size
print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

Test Error: 
 Accuracy: 2022.0%, Avg loss: 0.635570 

