In [4]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

In [15]:
# Load the .npz file
data = np.load("../data/dataset-2024-09-11-15:11:57.npz")

# Access the "features" and "labels"
features = data['features']
labels = data['labels']

# Display their shapes to verify
print("Features shape:", features.shape)
print("Labels shape:", labels.shape)
positive = labels.sum()
negative = (~labels).sum()
print(f"{positive} positive examples, {negative} negative examples")

Features shape: (391294, 9)
Labels shape: (391294,)
5289 positive examples, 386005 negative examples


In [6]:
# Define the model
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(9, 32)  # 9 input features, 32 hidden units
        self.fc2 = nn.Linear(32, 16)  # 32 hidden units, 16 hidden units in second layer
        self.fc3 = nn.Linear(16, 1)  # 16 hidden units, 1 output unit
        self.relu = nn.ReLU()  # Activation function
        self.sigmoid = nn.Sigmoid()  # Sigmoid for binary classification

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.sigmoid(self.fc3(x))
        return x

# Create the model
model = SimpleNN()

# Define loss function and optimizer
criterion = nn.BCELoss()  # Binary Cross Entropy loss for binary classification
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Prepare your data (assuming `features` and `labels` are NumPy arrays)
features_tensor = torch.tensor(features, dtype=torch.float32)
labels_tensor = torch.tensor(labels, dtype=torch.float32)

# Create a DataLoader to batch the data
dataset = TensorDataset(features_tensor, labels_tensor)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

In [7]:
# Training loop
num_epochs = 100

for epoch in range(num_epochs):
    for inputs, labels in dataloader:
        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels.unsqueeze(1))

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Print loss every 10 epochs
    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [10/100], Loss: 0.0593
Epoch [20/100], Loss: 0.0078
Epoch [30/100], Loss: 0.0040
Epoch [40/100], Loss: 0.0595
Epoch [50/100], Loss: 0.0050
Epoch [60/100], Loss: 0.0049
Epoch [70/100], Loss: 0.0044
Epoch [80/100], Loss: 0.0469
Epoch [90/100], Loss: 0.1737
Epoch [100/100], Loss: 0.0057
