In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold

# Device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cpu


In [6]:
# Load the data
df1 = pd.read_excel(
    "/home/gddaslab/mxp140/sclerosis_project/miRNA_signal_hsa_number2.xlsx",
    engine="openpyxl",
    sheet_name="Sheet1",
)

# Drop non-feature columns
apoms_cols= np.array(list(df1.columns), dtype=object)[['aPOMS' in elem for elem in list(df1.columns)]]
spoms_cols = np.array(list(df1.columns), dtype=object)[['sPOMS' in elem for elem in list(df1.columns)]]
df = df1[list(apoms_cols) + list(spoms_cols)]
display(df.head(5))

# Label the columns based on their types
labels = {"aPOMS": 0, "sPOMS": 1}

# Create target labels for each column
y = []
for col in df.columns:
    for key in labels.keys():
        if col.startswith(key):
            y.append(labels[key])
            break

Unnamed: 0,aPOMS1,aPOMS2,aPOMS3,aPOMS4,aPOMS5,aPOMS6,aPOMS7,aPOMS9,sPOMS1,sPOMS2,sPOMS3
0,1.96,1.85,1.92,1.87,1.85,1.95,1.82,1.51,1.87,1.55,1.89
1,1.79,1.81,1.92,2.2,2.29,2.24,1.97,2.06,2.2,2.36,1.91
2,1.65,2.04,2.03,1.65,2.18,2.13,1.81,1.83,2.04,1.94,1.94
3,2.08,2.16,1.76,1.98,2.1,1.68,1.94,1.86,1.67,1.93,2.03
4,2.16,2.12,1.76,1.98,2.03,1.74,1.94,1.85,1.67,2.01,2.03


In [8]:
# Convert DataFrame to tensor
X = df.T.values
y = y

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [9]:
# Standardize features (optional but recommended)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [10]:
# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)

In [11]:
y_train_tensor

tensor([1, 0, 0, 1, 0, 0, 0, 0])

In [12]:
class SoftmaxRegression(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(SoftmaxRegression, self).__init__()
        self.linear = nn.Linear(input_dim, output_dim)

    def forward(self, x):
        return self.linear(x)


class ElasticNetLoss(nn.Module):
    def __init__(self, model, alpha=1.0, l1_ratio=0.5):
        super(ElasticNetLoss, self).__init__()
        self.model = model
        self.alpha = alpha
        self.l1_ratio = l1_ratio
        self.cross_entropy_loss = nn.CrossEntropyLoss()

    def forward(self, outputs, targets):
        ce_loss = self.cross_entropy_loss(outputs, targets)
        l1_norm = sum(param.abs().sum() for param in self.model.parameters())
        l2_norm = sum(param.pow(2).sum() for param in self.model.parameters())
        elastic_net_penalty = self.alpha * (
            self.l1_ratio * l1_norm + (1 - self.l1_ratio) * l2_norm
        )
        return ce_loss + elastic_net_penalty


# Instantiate the model
input_dim = X_train.shape[1]
output_dim = len(torch.unique(y_train_tensor))
model = SoftmaxRegression(input_dim, output_dim)

In [13]:
model

SoftmaxRegression(
  (linear): Linear(in_features=4570, out_features=2, bias=True)
)

In [14]:
# Define loss function and optimizer
criterion = ElasticNetLoss(model, alpha=0.01, l1_ratio=0.5)
optimizer = optim.SGD(model.parameters(), lr=0.01)

In [15]:
# Training loop
num_epochs = 1000
for epoch in range(num_epochs):
    model.train()  # Set the model to training mode

    # Forward pass
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)

    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

Epoch [10/1000], Loss: 0.3393
Epoch [20/1000], Loss: 0.3150
Epoch [30/1000], Loss: 0.2932
Epoch [40/1000], Loss: 0.2729
Epoch [50/1000], Loss: 0.2538
Epoch [60/1000], Loss: 0.2358
Epoch [70/1000], Loss: 0.2189
Epoch [80/1000], Loss: 0.2030
Epoch [90/1000], Loss: 0.1880
Epoch [100/1000], Loss: 0.1740
Epoch [110/1000], Loss: 0.1611
Epoch [120/1000], Loss: 0.1493
Epoch [130/1000], Loss: 0.1387
Epoch [140/1000], Loss: 0.1293
Epoch [150/1000], Loss: 0.1210
Epoch [160/1000], Loss: 0.1137
Epoch [170/1000], Loss: 0.1075
Epoch [180/1000], Loss: 0.1022
Epoch [190/1000], Loss: 0.0977
Epoch [200/1000], Loss: 0.0939
Epoch [210/1000], Loss: 0.0907
Epoch [220/1000], Loss: 0.0879
Epoch [230/1000], Loss: 0.0856
Epoch [240/1000], Loss: 0.0835
Epoch [250/1000], Loss: 0.0818
Epoch [260/1000], Loss: 0.0802
Epoch [270/1000], Loss: 0.0787
Epoch [280/1000], Loss: 0.0775
Epoch [290/1000], Loss: 0.0764
Epoch [300/1000], Loss: 0.0754
Epoch [310/1000], Loss: 0.0745
Epoch [320/1000], Loss: 0.0737
Epoch [330/1000],

In [16]:
# Save the model
torch.save(model.state_dict(), "softmax_classifier_stable_vs_active_peds.pth")

In [17]:
# Evaluation
with torch.no_grad():
    model.eval()
    test_outputs = model(X_test_tensor)
    _, y_pred_tensor = torch.max(test_outputs, 1)
    y_pred = y_pred_tensor.numpy()

    # Calculate accuracyI have
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy: {accuracy:.2f}")

    print(
        torch.max(
            model(
                torch.tensor(
                    scaler.fit_transform(X[0].reshape(1, -1)), dtype=torch.float32
                )
            ),
            1,
        )
    )

Accuracy: 1.00
torch.return_types.max(
values=tensor([0.0852]),
indices=tensor([0]))
