In [13]:
# loading libraries
import torch
import pandas as pd
import numpy as np

In [72]:
# loading data
titanic_raw = pd.read_csv('titanic.csv')

In [94]:
#Processing data

titanic_processed = pd.DataFrame()

titanic_processed['Survived'] = titanic_raw['Survived']
titanic_processed['1_klasse'] = (titanic_raw['Pclass'] == 1).astype(int)
titanic_processed['2_klasse'] = (titanic_raw['Pclass'] == 2).astype(int)
titanic_processed['3_klasse'] = (titanic_raw['Pclass'] == 3).astype(int)
titanic_processed['Sex'] = (titanic_raw['Sex'] == "male").astype(int)
titanic_processed['SibSp'] = titanic_raw['SibSp']
titanic_processed['Parch'] = titanic_raw['Parch']
titanic_processed['Age'] = titanic_raw['Age'].fillna(titanic_raw['Age'].mean())

titanic_processed

Unnamed: 0,Survived,1_klasse,2_klasse,3_klasse,Sex,SibSp,Parch,Age
0,0,0,0,1,1,1,0,22.000000
1,1,1,0,0,0,1,0,38.000000
2,1,0,0,1,0,0,0,26.000000
3,1,1,0,0,0,1,0,35.000000
4,0,0,0,1,1,0,0,35.000000
...,...,...,...,...,...,...,...,...
886,0,0,1,0,1,0,0,27.000000
887,1,1,0,0,0,0,0,19.000000
888,0,0,0,1,0,1,2,29.699118
889,1,1,0,0,1,0,0,26.000000


In [86]:
# randomizing the data and splitting it into training and testing

torch.manual_seed(42)

rand_idx = torch.randperm(len(titanic_processed))

train_idx = rand_idx[:int(0.8*len(titanic_processed))]
val_idx = rand_idx[int(0.8*len(titanic_processed)):]

y_train = torch.from_numpy(titanic_processed.iloc[train_idx]['Survived'].values.astype(np.int16))
y_val = torch.from_numpy(titanic_processed.iloc[val_idx]['Survived'].values.astype(np.int16))

# x_train = titanic_processed.iloc[train_idx].drop('Survived', axis=1).values
# x_val = titanic_processed.iloc[val_idx].drop('Survived', axis=1).values
x_train = torch.from_numpy(titanic_processed.iloc[train_idx].drop('Survived', axis=1).values.astype(np.int16))
x_val = torch.from_numpy(titanic_processed.iloc[val_idx].drop('Survived', axis=1).values.astype(np.int16))

x_train

tensor([[ 0,  1,  0,  ...,  0,  0, 29],
        [ 0,  0,  1,  ...,  0,  0, 29],
        [ 0,  0,  1,  ...,  0,  0, 29],
        ...,
        [ 0,  0,  1,  ...,  0,  0, 30],
        [ 1,  0,  0,  ...,  0,  1, 38],
        [ 1,  0,  0,  ...,  1,  0, 29]], dtype=torch.int16)

In [87]:
# creating the model

class Layer(torch.nn.Module):
    """A simple feedforward layer with an activation function."""
    def __init__(self, n_feature, n_hidden, activation_function=None):
        super().__init__()

        self.linear = torch.nn.Linear(n_feature, n_hidden)

        if activation_function is None:
            self.activation_function = torch.nn.Sigmoid()
        else:
            self.activation_function = activation_function

    def forward(self, x):
        out = self.linear(x)
        out = self.activation_function(out)
        return out


class FeedForward(torch.nn.Module):
    """A simple feedforward neural network with two hidden layers."""
    def __init__(self, n_feature, n_hidden, n_output, activation_function):
        super().__init__()

        self.layer0 = Layer(n_feature=n_feature, n_hidden=n_hidden, activation_function=activation_function)
        self.layer1 = Layer(n_feature=n_hidden, n_hidden=n_hidden, activation_function=activation_function)
        self.layer2 = Layer(n_feature=n_hidden, n_hidden=n_output, activation_function=None)

    def forward(self, x):
        # (b, f) -> (b, h)
        out = self.layer0(x)
        # (b, h) -> (b, h)
        out = self.layer1(out)
        # (b, h) -> (b, o)
        out = self.layer2(out)
        return out

In [93]:
# Create a loss function
loss_fn = torch.nn.CrossEntropyLoss()

# Create a feedforward neural network
feedforward = FeedForward(n_feature=7, n_hidden=4, n_output=1, activation_function=torch.nn.ReLU())

# Create an optimizer
optimizer = torch.optim.AdamW(feedforward.parameters(), lr=0.01, weight_decay=0.0)

In [92]:
batch_size = 8

train_losses = []
val_losses = []


for epoch in range(10):
    random_index = torch.randperm(len(x_train))
    x_train = x_train[random_index]
    y_train = y_train[random_index]

    train_epoch_losses = []
    val_epoch_losses = []

    # Training loop.
    feedforward.train()  # Set the model to training mode.
    for start_index in range(0, len(x_train), batch_size):
        end_index = start_index + batch_size
        x_batch = x_train[start_index:end_index]
        y_batch = y_train[start_index:end_index]

        # Reset gradients.
        optimizer.zero_grad()

        # Forward pass.
        y_pred = feedforward(x_batch)

        # Compute loss.
        loss = loss_fn(y_pred, y_batch)

        # Backward pass.
        loss.backward()

        # Update weights.
        optimizer.step()

        train_epoch_losses.append(loss.item())

    # Validation loop.
    feedforward.eval()  # Set the model to evaluation mode.
    with torch.no_grad():
        for start_index in range(0, len(x_val), batch_size):
            end_index = start_index + batch_size
            x_batch = x_val[start_index:end_index]
            y_batch = y_val[start_index:end_index]
            y_val_pred = feedforward(x_val)
            val_loss = loss_fn(y_val_pred.squeeze(), y_val)

            val_epoch_losses.append(val_loss.item())

    train_losses.append(sum(train_epoch_losses) / len(train_epoch_losses))
    val_losses.append(sum(val_epoch_losses) / len(val_epoch_losses))


plt.plot(train_losses, label='train loss')
plt.plot(val_losses, label='val loss')
plt.legend()

RuntimeError: mat1 and mat2 must have the same dtype, but got Short and Float

In [None]:
feedforward.eval()  # Set the model to evaluation mode.

# Predict and detach the tensor from the computational graph and convert it to a NumPy array.
y_hat = feedforward(x_val).detach().numpy()

plt.plot(y_val, y_hat, 'o', markersize=0.5, color='black')
plt.xlabel('True values')
plt.ylabel('Predicted values')