In [None]:
X = df_train_features.drop("target", axis=1)
y = df_train_features["target"]


X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)


sc = StandardScaler()


X_train = sc.fit_transform(X_train)
X_val = sc.transform(X_val)

target_sc = StandardScaler()
y_train = target_sc.fit_transform(y_train.to_numpy().reshape(-1, 1))
y_val = target_sc.transform(y_val.to_numpy().reshape(-1, 1))


# Assuming X_train, X_val, y_train, y_val are your data frames
# Convert pandas dataframes to torch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.float32)

# Create DataLoader
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)

train_loader = DataLoader(train_dataset, batch_size=512, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=512)


# Define a simple neural network model
class SimpleNN(nn.Module):
    def __init__(self, input_size):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_size, 1000, bias=False),
            nn.BatchNorm1d(1000),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(1000, 2000, bias=False),
            nn.BatchNorm1d(2000),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(2000, 1),
        )

    def forward(self, x):
        return self.net(x)


# Initialize the model, loss function, and optimizer
input_size = X_train.shape[1]
model = SimpleNN(input_size)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

# Training loop
n_epochs = 10
for epoch in trange(n_epochs):
    model.train()
    train_loss = 0.0
    for X_batch, y_batch in tqdm(train_loader):
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(
            outputs, y_batch.view(-1, 1)
        )  # Reshape y_batch for compatibility
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * X_batch.size(0)

    train_loss /= len(train_loader.dataset)

    # Validation step
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for X_batch, y_batch in tqdm(val_loader):
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch.view(-1, 1))
            val_loss += loss.item() * X_batch.size(0)

    val_loss /= len(val_loader.dataset)

    print(
        f"Epoch {epoch+1}/{n_epochs}, Training Loss: {np.sqrt(train_loss):.4f}, Validation Loss: {np.sqrt(val_loss):.4f}"
    )

In [None]:
# Set the model to evaluation mode
model.eval()

# Collect predictions for the entire validation set
all_predictions = []

with torch.no_grad():
    for X_batch, _ in val_loader:
        outputs = model(X_batch)
        all_predictions.append(
            outputs.squeeze().cpu().numpy()
        )  # Squeeze to remove extra dimensions

y_pred = np.concatenate(all_predictions)

In [None]:
y_pred = target_sc.inverse_transform(y_pred.reshape(-1, 1)).reshape(1, -1).squeeze()
y_val = target_sc.inverse_transform(y_val.reshape(-1, 1)).reshape(1, -1).squeeze()
kendalltau(y_val, y_pred)
kendalltau(y_val, y_pred)