# Classification with Clinical Embeddings

In [1]:
# Importing necessary libraries
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import accuracy_score, classification_report

In [2]:
# Define the neural network model for classification
class PVEMClassifier(nn.Module):
    def __init__(self, input_dim, embedding_dim=128, num_classes=2):
        super().__init__()
        self.weight = nn.Parameter(torch.randn(input_dim, embedding_dim))
        self.bias = nn.Parameter(torch.randn(input_dim, embedding_dim))
        self.dropout = nn.Dropout(p=0.3)
        self.classifier = nn.Linear(input_dim * embedding_dim, num_classes)

    def forward(self, x):
        embeddings = x.unsqueeze(-1) * self.weight + self.bias
        flattened_embeddings = embeddings.reshape(x.shape[0], -1)
        flattened_embeddings = self.dropout(flattened_embeddings)
        logits = self.classifier(flattened_embeddings)
        return logits

In [3]:
# Load training data and labels
train_data = torch.tensor(pd.read_csv("../data/train_data.csv").values, dtype=torch.float32)
train_labels_df = pd.read_csv("../data/labels/train_labels.csv")
train_labels = torch.tensor(train_labels_df.iloc[:, 1].values, dtype=torch.long)

# Load test data and labels
test_data = torch.tensor(pd.read_csv("../data/test_data.csv").values, dtype=torch.float32)
test_labels_df = pd.read_csv("../data/labels/test_labels.csv")
test_labels = torch.tensor(test_labels_df.iloc[:, 1].values, dtype=torch.long)

print(f"Train data shape: {train_data.shape}")
print(f"Test data shape: {test_data.shape}")
print(f"Train data shape: {train_labels.shape}")
print(f"Test data shape: {test_labels.shape}")

Train data shape: torch.Size([84, 38])
Test data shape: torch.Size([21, 38])
Train data shape: torch.Size([84])
Test data shape: torch.Size([21])


In [4]:
test_data

tensor([[-1.0228e+00, -3.1097e-02, -5.9342e-01,  2.4495e-01,  3.9900e-02,
         -2.2204e+00, -8.4982e-01, -1.1445e+00, -7.3764e-01,  8.8298e-01,
         -1.0690e+00, -7.0711e-01,  1.0418e+00, -9.1867e-01, -4.5763e-01,
         -3.7291e-01, -1.6604e-01, -3.3002e-01, -4.5499e-01, -7.8446e-01,
          1.7245e+00,  5.3873e-01, -6.4273e-03,  1.7578e+00,  8.6550e-01,
         -2.4494e-02,  1.0730e-01,  9.5542e-01,  7.4422e-02,  1.7029e+00,
          3.7984e-01, -2.5546e-01,  1.1329e+00,  1.1187e-01, -9.1631e-02,
          2.1176e+00,  4.7410e-01, -3.7855e-01],
        [-6.5986e-01, -1.3521e-01, -9.3057e-02, -3.3368e-01,  1.6917e-01,
          8.2058e-01,  1.2875e+00,  1.0015e+00,  1.3557e+00, -1.1325e+00,
         -1.0690e+00,  1.4142e+00,  1.0418e+00,  4.0707e+00, -4.5763e-01,
          8.4294e-17, -1.6616e-01, -1.1752e+00, -9.7427e-01,  1.9612e+00,
         -3.2027e+00, -9.0713e-02, -1.4906e-01,  3.9258e-02,  7.7607e-02,
         -2.4494e-02,  1.0730e-01, -3.3264e-02,  7.4422e-02,  1

In [5]:
test_labels

tensor([1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0])

In [6]:
# Train the model using the training data and labels.

input_dim = train_data.shape[1]  # Number of features
model = PVEMClassifier(input_dim=input_dim, embedding_dim=128, num_classes=2)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(100):
    model.train()
    optimizer.zero_grad()

    # Forward pass
    logits = model(train_data)

    # Compute loss
    loss = criterion(logits, train_labels)

    # Backward pass and optimization
    loss.backward()
    optimizer.step()

    if epoch % 10 == 0:
        print(f"Epoch {epoch}, Loss: {loss.item():.6f}")

Epoch 0, Loss: 0.911634
Epoch 10, Loss: 0.742672
Epoch 20, Loss: 0.528279
Epoch 30, Loss: 0.349997
Epoch 40, Loss: 0.402448
Epoch 50, Loss: 0.378883
Epoch 60, Loss: 0.324763
Epoch 70, Loss: 0.430325
Epoch 80, Loss: 0.400720
Epoch 90, Loss: 0.367245


In [7]:
# Evaluate the model using the test data and labels.
model.eval()

with torch.no_grad():
    logits = model(test_data)
    _, predicted = torch.max(logits, 1)

accuracy = accuracy_score(test_labels.numpy(), predicted.numpy())
print(f"Test Accuracy: {accuracy:.4f}")
print("Classification Report:")
print(classification_report(test_labels.numpy(), predicted.numpy()))

Test Accuracy: 0.8095
Classification Report:
              precision    recall  f1-score   support

           0       0.90      0.75      0.82        12
           1       0.73      0.89      0.80         9

    accuracy                           0.81        21
   macro avg       0.81      0.82      0.81        21
weighted avg       0.83      0.81      0.81        21

