Pu pratices his Pytorch skills by writing a classification algorithm using Pytorch. He also uses sklearn's LabelEncoder to transform categorical features into sparse features. This is based on the tutorial on this page: https://machinelearningmastery.com/use-pytorch-deep-learning-models-with-scikit-learn/

In [4]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import precision_score, recall_score, f1_score

In [5]:
# load the dataset
data = pd.read_csv('../data/label-encoder/train.csv')
# drop the columns that are not useful
data = data.drop(['PassengerId', 'Name', 'Ticket', 'Cabin'], axis=1)

# Fill missing data
data['Age'].fillna(data['Age'].mean(), inplace=True)
data['Embarked'].fillna(data['Embarked'].mode()[0], inplace=True)

# Convert categorical data into numerical
label_encoders = {}
for column in ['Sex', 'Embarked']:
    le = LabelEncoder()
    data[column] = le.fit_transform(data[column])
    label_encoders[column] = le

In [6]:
# Split the data
X = data.drop('Survived', axis=1)
y = data['Survived']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Convert to pytorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32)


In [7]:
# Define a simple feedforward neural network
class Net(nn.Module):
    def __init__(self, input_size):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.sigmoid(self.fc3(x))
        return x

# Training
model = Net(X_train_tensor.shape[1])
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 50
for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train_tensor).squeeze()
    loss = criterion(outputs, y_train_tensor)
    loss.backward()
    optimizer.step()
    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch + 1} / {num_epochs}], Loss: {loss.item():.4f}')

# Test
model.eval()
with torch.no_grad():
    test_outputs = model(X_test_tensor).squeeze()
    test_outputs = (test_outputs > 0.5).int()
    
    precision = precision_score(y_test, test_outputs)
    recall = recall_score(y_test, test_outputs)
    f1 = f1_score(y_test, test_outputs)
    print(f'Precision: {precision:.4f}, Recall: {recall:.4f}, F1: {f1:.4f}')

Epoch [10 / 50], Loss: 0.6120
Epoch [20 / 50], Loss: 0.5472
Epoch [30 / 50], Loss: 0.4858
Epoch [40 / 50], Loss: 0.4447
Epoch [50 / 50], Loss: 0.4261
Precision: 0.8060, Recall: 0.7297, F1: 0.7660
