In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import os

In [2]:
# Hinged Square Loss
class SquaredHingeLoss(nn.Module):
    def __init__(self, margin=1):
        super(SquaredHingeLoss, self).__init__()
        self.margin = margin

    def forward(self, predicted, y):
        low, high = y[:, 0:1], y[:, 1:2]
        loss_low = torch.relu(low - predicted + self.margin)
        loss_high = torch.relu(predicted - high + self.margin)
        loss = loss_low + loss_high
        return torch.mean(torch.square(loss))

In [3]:
class SimpleNN(nn.Module):
    def __init__(self, input_size):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(input_size, 32)
        self.fc2 = nn.Linear(32, 1)
        self.leaky_relu = nn.LeakyReLU(negative_slope=0.01)

    def forward(self, x):
        x = self.leaky_relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [4]:
dataset = 'detailed'
chosen_feature = ['log_range_value', 'log_variance', 'loglog_sum_diff', 'loglog_count']

In [5]:
# Load data
folds_df = pd.read_csv(f'../../training_data/{dataset}/folds.csv')
features_df = pd.read_csv(f'../../training_data/{dataset}/features.csv')[['sequenceID'] + chosen_feature]
target_df = pd.read_csv(f'../../training_data/{dataset}/target.csv')


# Create X_train, y_train, X_test
X_train = features_df[chosen_feature].to_numpy()
y_train = target_df.iloc[:, 1:].to_numpy()


# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)


# Initialize the model, loss function, and optimizer
input_dim = X_train.shape[1]
model = SimpleNN(input_dim)
criterion = SquaredHingeLoss()
optimizer = optim.Adam(model.parameters())


# Training with early stopping
best_train_loss = float('inf')
patience = 100
patience_counter = 0

for epoch in range(100000):
    model.train()
    optimizer.zero_grad()
    
    # Forward pass
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    
    # Backward pass and optimization
    loss.backward()
    optimizer.step()

    # Early stopping check
    train_loss = loss.item()
    if train_loss < best_train_loss:
        best_train_loss = train_loss
        best_model = model.state_dict()  # Save the best model state
        patience_counter = 0
    else:
        patience_counter += 1
    
    # Print loss every 1000 epochs
    if epoch % 1000 == 0:
        print(f"Epoch [{epoch}/{500000}], Loss: {train_loss:.8f}")
    
    if patience_counter >= patience:
        break

Epoch [0/500000], Loss: 0.70972949
Epoch [1000/500000], Loss: 0.18624765
Epoch [2000/500000], Loss: 0.17963301
Epoch [3000/500000], Loss: 0.17027456
Epoch [4000/500000], Loss: 0.16597910
Epoch [5000/500000], Loss: 0.16509597
Epoch [6000/500000], Loss: 0.16464680
Epoch [7000/500000], Loss: 0.16442712
Epoch [8000/500000], Loss: 0.16423614
Epoch [9000/500000], Loss: 0.16394523
Epoch [10000/500000], Loss: 0.16369344
Epoch [11000/500000], Loss: 0.16343717
Epoch [12000/500000], Loss: 0.16317037
Epoch [13000/500000], Loss: 0.16294731
Epoch [14000/500000], Loss: 0.16281290
Epoch [15000/500000], Loss: 0.16266014
Epoch [16000/500000], Loss: 0.16253226
Epoch [17000/500000], Loss: 0.16240522
Epoch [18000/500000], Loss: 0.16227843
Epoch [19000/500000], Loss: 0.16195327
Epoch [20000/500000], Loss: 0.15958576
Epoch [21000/500000], Loss: 0.15905392
Epoch [22000/500000], Loss: 0.15875958
Epoch [23000/500000], Loss: 0.15854289
Epoch [24000/500000], Loss: 0.15834226
Epoch [25000/500000], Loss: 0.15820177