# Titanic Neural Net

## Data

In [None]:
import torch
from torch import nn
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
file_string = "/content/drive/My Drive/titanic.csv"
data = pd.read_csv(file_string)
data

## Preprocessing

In [None]:
data.isnull().sum()

In [None]:
data = data.dropna(axis=0)
data

In [None]:
data = pd.get_dummies(data)

data

In [None]:
X = data.drop(["survived", "sex_male"], axis=1)
y = data["survived"]
X

### Loading the Preprocessed Data Into the DataLoader

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

X_tensors = torch.from_numpy(X.astype(float).values).float().to(device)
y_tensors = torch.from_numpy(y.astype(float).values).float().to(device)

X_train, X_test, y_train, y_test = train_test_split(X_tensors, y_tensors, test_size=0.2, train_size=0.8, random_state=42)

In [None]:
from torch.utils.data import Dataset, DataLoader
class TitanicDataset(Dataset):

    def __init__(self, X_train, y_train):
        self.x_data, self.y_data = X_train, y_train

    def __getitem__(self, i):
        return self.x_data[i], self.y_data[i]

    def __len__(self):
      return len(self.y_data)

titanic_data = TitanicDataset(X_train, y_train)

In [None]:
batch_size = 100
loader = DataLoader(dataset=titanic_data, batch_size=batch_size, shuffle=True)

## Model

In [None]:
class TitanicModel(nn.Module):

  def __init__(self):
    super(TitanicModel, self).__init__()
    # input layer
    self.input_layer_to_hidden_layer_1 = nn.Linear(what_goes_here?)
    # relu 1
    self.relu1 = nn.ReLU()

    # hidden layer 1
    self.hidden_layer_1_to_hidden_layer_2 = nn.Linear(what_goes_here?)
    # relu 2
    self.relu2 = nn.ReLU()

    # hidden layer 2
    self.hidden_layer_2_to_output_layer = nn.Linear(what_goes_here?)
    # sigmoid 3
    self.sigmoid3 = nn.Sigmoid()

  def forward(self, input):

    # input layer
    linear_combination1 = self.input_layer_to_hidden_layer_1(what_goes_here?)
    intermediate_relu1 = self.relu1(linear_combination1)

    # hidden layer 1
    linear_combination2 = self.hidden_layer_1_to_hidden_layer_2(intermediate_relu1)
    intermediate_relu2 = self.relu2(linear_combination2)

    # hidden layer 2
    linear_combination3 = self.hidden_layer_2_to_output_layer(intermediate_relu2)
    out = self.sigmoid3(what_goes_here?)
    return out

## Training

In [None]:
titanic = TitanicModel().to(device)
epochs = 3000
optimizer = torch.optim.Adam(titanic.parameters(), lr = 0.01)
loss_function = torch.nn.BCELoss()

torch.manual_seed(42)
losses = []
for epoch in range(epochs):
  mini_batch_losses = []
  for i, (features, labels) in enumerate(loader):

    Y_prediction = titanic.what_goes_here?(features)

    loss = loss_function(Y_prediction, labels.unsqueeze(1))

    titanic.zero_grad()

    loss.backward()

    optimizer.step()

    # keep track of losses
    mini_batch_losses.append(loss.detach().cpu().numpy()) # we can only execute numpy code on the cpu

  mini_batch_mean_loss = np.mean(mini_batch_losses)
  losses.append(mini_batch_mean_loss)
  if epoch % 200 == 0:
    print(f"Loss in Epoch {epoch}: {np.mean(losses)}")

In [None]:
plt.figure()
plt.plot(losses)

## Testing

In [None]:
y_test_predictions = titanic.forward(X_test)

In [None]:
y_test_predictions_thresholded = (y_test_predictions > 0.5) * 1

In [None]:
from sklearn.metrics import accuracy_score
total_records = len(y_test)
num_correct = accuracy_score(y_test.detach().cpu().numpy(), y_test_predictions_thresholded.detach().cpu().numpy(), normalize=False) # normalize would round up to an integer, so we set it False
print(num_correct / total_records)