In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

# Device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [2]:
# Load the data
df1 = pd.read_excel(
    "/home/gddaslab/mxp140/sclerosis_project/miRNA_signal_hsa_number2.xlsx",
    engine="openpyxl",
    sheet_name="Sheet1",
)

# Drop non-feature columns
df = df1.drop(columns=["ID", "Transcript_ID"])

# Label the columns based on their types
labels = {"pHC": 0, "aHC": 1, "sMS": 2, "aMS": 3, "aPOMS": 4, "sPOMS": 5, "pBar": 6}

# Create target labels for each column
y = []
for col in df.columns:
    for key in labels.keys():
        if col.startswith(key):
            y.append(labels[key])
            break

In [3]:
# Convert DataFrame to tensor
X = df.T.values
y = y

In [4]:
# Standardize features (optional but recommended)
scaler = StandardScaler()

In [5]:
class SoftmaxRegression(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(SoftmaxRegression, self).__init__()
        self.linear = nn.Linear(input_dim, output_dim)

    def forward(self, x):
        # print(list(x[0]))
        return self.linear(x)


class ElasticNetLoss(nn.Module):
    def __init__(self, model, alpha=1.0, l1_ratio=0.5):
        super(ElasticNetLoss, self).__init__()
        self.model = model
        self.alpha = alpha
        self.l1_ratio = l1_ratio
        self.cross_entropy_loss = nn.CrossEntropyLoss()

    def forward(self, outputs, targets):
        ce_loss = self.cross_entropy_loss(outputs, targets)
        l1_norm = sum(param.abs().sum() for param in self.model.parameters())
        l2_norm = sum(param.pow(2).sum() for param in self.model.parameters())
        elastic_net_penalty = self.alpha * (
            self.l1_ratio * l1_norm + (1 - self.l1_ratio) * l2_norm
        )
        return ce_loss + elastic_net_penalty

In [6]:
# Load the model
input_dim = X.shape[1]
output_dim = len(torch.unique(torch.tensor(y)))
model = SoftmaxRegression(input_dim, output_dim)
model.load_state_dict(torch.load("softmax_classifier_with_pHC.pth"))
model.eval()  # Set the model to evaluation mode

SoftmaxRegression(
  (linear): Linear(in_features=4570, out_features=7, bias=True)
)

In [9]:
parameters = []
for param in model.parameters():
    print(param)
    print(param.shape)
    parameters.append(param)

Parameter containing:
tensor([[ 4.3902e-05, -1.1309e-05,  3.0655e-06,  ..., -1.1059e-05,
         -5.5029e-05,  3.1992e-05],
        [ 3.8382e-05, -3.1577e-05, -2.9805e-05,  ..., -3.3302e-05,
          5.2395e-06, -1.0548e-05],
        [ 3.6301e-05, -8.6224e-06, -2.9602e-05,  ...,  3.3170e-05,
          6.7094e-06,  1.2435e-06],
        ...,
        [-6.1704e-05,  3.3200e-04,  4.7289e-05,  ...,  2.8878e-05,
         -2.1543e-05,  1.1240e-02],
        [ 3.1717e-05,  1.7200e-03,  2.6634e-06,  ...,  1.7624e-02,
         -1.3855e-05,  2.5219e-05],
        [ 4.8298e-05, -3.9957e-06,  1.3569e-05,  ..., -5.9655e-05,
         -5.5986e-05,  2.5388e-05]], requires_grad=True)
torch.Size([7, 4570])
Parameter containing:
tensor([-2.5547e-05,  1.5846e-05,  2.9928e-05, -3.0617e-05, -1.4975e-06,
         2.2246e-05,  1.2235e-05], requires_grad=True)
torch.Size([7])


In [10]:
# Evaluation
import numpy as np

with torch.no_grad():
    model.eval()
    weights = parameters[0].detach_().numpy()

    ######## top features overall ##########
    feature_importance = np.abs(weights)
    aggregated_importance = np.sum(feature_importance, axis=0)
    ranking_indices_for_miRNA = np.argsort(aggregated_importance)[::-1]
    top_indices = ranking_indices_for_miRNA[:10]
    top_miRNA_signals = X[:, top_indices]
    top_miRNA_patient = df1["Transcript_ID"].values[top_indices]

    ########### top features class wise ##########
    top_indices_per_class = np.argsort(weights, axis=1)[:, -5:]
    top_rnas_per_class = {}
    for nth_row in range(top_indices_per_class.shape[0]):
        top_rnas_per_class[list(labels.keys())[nth_row]] = list(
            df1["Transcript_ID"].values[list(top_indices_per_class[nth_row, :])]
        )

In [13]:
pd.DataFrame(top_miRNA_patient).to_csv("top10_rnas_with_pHC.csv", sep=",", index=True)