In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold

# Device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [2]:
# Load the data
df1 = pd.read_excel(
    "/home/gddaslab/mxp140/sclerosis_project/miRNA_signal_hsa_number2.xlsx",
    engine="openpyxl",
    sheet_name="Sheet1",
)

# Drop non-feature columns
ams_cols = np.array(list(df1.columns), dtype=object)[
    ["aMS" in elem for elem in list(df1.columns)]
]
sms_cols = np.array(list(df1.columns), dtype=object)[
    ["sMS" in elem for elem in list(df1.columns)]
]
df = df1[list(ams_cols) + list(sms_cols)]
display(df.head(5))

# Label the columns based on their types
labels = {"aMS": 0, "sMS": 1}

# Create target labels for each column
y = []
for col in df.columns:
    for key in labels.keys():
        if col.startswith(key):
            y.append(labels[key])
            break

Unnamed: 0,aMS1,aMS2,aMS3,aMS4,aMS5,sMS1,sMS2,sMS4,sMS5
0,1.64,1.94,1.61,2.33,1.49,2.26,1.48,1.84,1.88
1,1.92,2.14,2.0,1.84,1.88,1.72,2.08,1.78,1.92
2,1.81,1.97,2.17,1.76,1.73,1.77,1.93,1.67,1.81
3,2.0,1.92,2.17,2.12,2.15,1.88,2.09,2.47,1.96
4,2.04,1.87,2.17,2.12,2.33,1.88,1.96,2.47,2.01


In [3]:
# Convert DataFrame to tensor
X = df.T.values
y = y

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [4]:
# Standardize features (optional but recommended)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [5]:
# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)

In [7]:
y_train_tensor

tensor([1, 0, 1, 0, 0, 0, 1])

In [41]:
class SoftmaxRegression(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(SoftmaxRegression, self).__init__()
        self.linear = nn.Linear(input_dim, output_dim)

    def forward(self, x):
        return self.linear(x)


class ElasticNetLoss(nn.Module):
    def __init__(self, model, alpha=1.0, l1_ratio=0.5):
        super(ElasticNetLoss, self).__init__()
        self.model = model
        self.alpha = alpha
        self.l1_ratio = l1_ratio
        self.cross_entropy_loss = nn.CrossEntropyLoss()

    def forward(self, outputs, targets):
        ce_loss = self.cross_entropy_loss(outputs, targets)
        l1_norm = sum(param.abs().sum() for param in self.model.parameters())
        l2_norm = sum(param.pow(2).sum() for param in self.model.parameters())
        elastic_net_penalty = self.alpha * (
            self.l1_ratio * l1_norm + (1 - self.l1_ratio) * l2_norm
        )
        return ce_loss + elastic_net_penalty


# Instantiate the model
input_dim = X_train.shape[1]
output_dim = len(torch.unique(y_train_tensor))
model = SoftmaxRegression(input_dim, output_dim)

In [42]:
model

SoftmaxRegression(
  (linear): Linear(in_features=4570, out_features=2, bias=True)
)

In [43]:
# Define loss function and optimizer
criterion = ElasticNetLoss(model, alpha=0.01, l1_ratio=0.5)
optimizer = optim.SGD(model.parameters(), lr=0.01)

In [45]:
# Training loop
num_epochs = 10000
for epoch in range(num_epochs):
    model.train()  # Set the model to training mode

    # Forward pass
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)

    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

Epoch [10/10000], Loss: 0.0389
Epoch [20/10000], Loss: 0.0389
Epoch [30/10000], Loss: 0.0389
Epoch [40/10000], Loss: 0.0389
Epoch [50/10000], Loss: 0.0389
Epoch [60/10000], Loss: 0.0389
Epoch [70/10000], Loss: 0.0389
Epoch [80/10000], Loss: 0.0389
Epoch [90/10000], Loss: 0.0388
Epoch [100/10000], Loss: 0.0389
Epoch [110/10000], Loss: 0.0389
Epoch [120/10000], Loss: 0.0389
Epoch [130/10000], Loss: 0.0389
Epoch [140/10000], Loss: 0.0388
Epoch [150/10000], Loss: 0.0389
Epoch [160/10000], Loss: 0.0389
Epoch [170/10000], Loss: 0.0388
Epoch [180/10000], Loss: 0.0389
Epoch [190/10000], Loss: 0.0388
Epoch [200/10000], Loss: 0.0388
Epoch [210/10000], Loss: 0.0388
Epoch [220/10000], Loss: 0.0388
Epoch [230/10000], Loss: 0.0388
Epoch [240/10000], Loss: 0.0388
Epoch [250/10000], Loss: 0.0388
Epoch [260/10000], Loss: 0.0388
Epoch [270/10000], Loss: 0.0388
Epoch [280/10000], Loss: 0.0388
Epoch [290/10000], Loss: 0.0388
Epoch [300/10000], Loss: 0.0388
Epoch [310/10000], Loss: 0.0388
Epoch [320/10000]

In [46]:
# Save the model
torch.save(model.state_dict(), "softmax_classifier_stable_active_adult.pth")

In [47]:
# Evaluation
with torch.no_grad():
    model.eval()
    test_outputs = model(X_test_tensor)
    _, y_pred_tensor = torch.max(test_outputs, 1)
    y_pred = y_pred_tensor.numpy()

    # Calculate accuracyI have
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy: {accuracy:.2f}")

    print(
        torch.max(
            model(
                torch.tensor(
                    scaler.fit_transform(X[0].reshape(1, -1)), dtype=torch.float32
                )
            ),
            1,
        )
    )

Accuracy: 0.50
torch.return_types.max(
values=tensor([3.0225e-05]),
indices=tensor([0]))


In [1]:
from sklearn.model_selection import KFold
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

# Device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [2]:
class SoftmaxRegression(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(SoftmaxRegression, self).__init__()
        self.linear = nn.Linear(input_dim, output_dim)

    def forward(self, x):
        return self.linear(x)


class ElasticNetLoss(nn.Module):
    def __init__(self, model, alpha=1.0, l1_ratio=0.5):
        super(ElasticNetLoss, self).__init__()
        self.model = model
        self.alpha = alpha
        self.l1_ratio = l1_ratio
        self.cross_entropy_loss = nn.CrossEntropyLoss()

    def forward(self, outputs, targets):
        ce_loss = self.cross_entropy_loss(outputs, targets)
        l1_norm = sum(param.abs().sum() for param in self.model.parameters())
        l2_norm = sum(param.pow(2).sum() for param in self.model.parameters())
        elastic_net_penalty = self.alpha * (
            self.l1_ratio * l1_norm + (1 - self.l1_ratio) * l2_norm
        )
        return ce_loss + elastic_net_penalty

In [5]:
# Load the data
df1 = pd.read_excel(
    "/home/gddaslab/mxp140/sclerosis_project/miRNA_signal_hsa_number2.xlsx",
    engine="openpyxl",
    sheet_name="Sheet1",
)

# Drop non-feature columns
df = df1.drop(columns=["ID", "Transcript_ID"])
df = df.iloc[:, 10:]

# Label the columns based on their types
labels = {"aHC": 0, "sMS": 1, "aMS": 2, "aPOMS": 3, "sPOMS": 4, "pBar": 5}

# Create target labels for each column
y = []
for col in df.columns:
    for key in labels.keys():
        if col.startswith(key):
            y.append(labels[key])
            break

In [74]:
# Define the number of folds for the cross-validation
n_folds = 4
kfold = KFold(n_splits=n_folds, shuffle=True, random_state=42)

In [75]:
# Initialize variables to store the best model and its accuracy
best_model = None
best_accuracy = 0.0

In [76]:
# Convert DataFrame to tensor
X = df.T.values
y = y

# Convert the entire dataset to PyTorch tensors
X_tensor = torch.tensor(X, dtype=torch.float32)
y_tensor = torch.tensor(y, dtype=torch.long)

In [77]:
# Cross-validation loop
for fold, (train_ids, val_ids) in enumerate(kfold.split(X_tensor)):
    print(f"Fold {fold+1}/{n_folds}")

    # Split the data into training and validation sets
    X_train, X_val = X_tensor[train_ids], X_tensor[val_ids]
    y_train, y_val = y_tensor[train_ids], y_tensor[val_ids]

    # Instantiate the model
    model = SoftmaxRegression(input_dim, output_dim)

    # Define loss function and optimizer
    criterion = ElasticNetLoss(model, alpha=0.01, l1_ratio=0.5)
    optimizer = optim.SGD(model.parameters(), lr=0.01)

    # Training loop
    for epoch in range(num_epochs):
        model.train()  # Set the model to training mode

        # Forward pass
        outputs = model(X_train)
        loss = criterion(outputs, y_train)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (epoch + 1) % 10 == 0:
            print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

    # Evaluation
    with torch.no_grad():
        model.eval()
        val_outputs = model(X_val)
        _, y_pred_tensor = torch.max(val_outputs, 1)
        y_pred = y_pred_tensor.numpy()
        y_true = y_val.numpy()

        # Calculate accuracy
        accuracy = accuracy_score(y_true, y_pred)
        print(f"Validation Accuracy: {accuracy:.2f}")

        # If this model is better than the previous best, update the best model and accuracy
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_model = model

# Save the best model
# torch.save(best_model.state_dict(), "softmax_classifier_best.pth")
print(f"Best Validation Accuracy: {best_accuracy:.2f}")

Fold 1/4
Epoch [10/10000], Loss: 55.6817
Epoch [20/10000], Loss: 63.2771
Epoch [30/10000], Loss: 21.8486
Epoch [40/10000], Loss: 15.1776
Epoch [50/10000], Loss: 69.6757
Epoch [60/10000], Loss: 25.2206
Epoch [70/10000], Loss: 1.0683
Epoch [80/10000], Loss: 1.0437
Epoch [90/10000], Loss: 1.0196
Epoch [100/10000], Loss: 0.9959
Epoch [110/10000], Loss: 0.9726
Epoch [120/10000], Loss: 0.9497
Epoch [130/10000], Loss: 0.9273
Epoch [140/10000], Loss: 0.9052
Epoch [150/10000], Loss: 0.8836
Epoch [160/10000], Loss: 0.8623
Epoch [170/10000], Loss: 0.8415
Epoch [180/10000], Loss: 0.8210
Epoch [190/10000], Loss: 0.8010
Epoch [200/10000], Loss: 0.7814
Epoch [210/10000], Loss: 0.7622
Epoch [220/10000], Loss: 0.7433
Epoch [230/10000], Loss: 0.7248
Epoch [240/10000], Loss: 0.7067
Epoch [250/10000], Loss: 0.6889
Epoch [260/10000], Loss: 0.6715
Epoch [270/10000], Loss: 0.6545
Epoch [280/10000], Loss: 0.6379
Epoch [290/10000], Loss: 0.6216
Epoch [300/10000], Loss: 0.6058
Epoch [310/10000], Loss: 0.5903
Ep

In [78]:
# Use split to generate indices for each fold
for fold, (train_index, test_index) in enumerate(kfold.split(y_tensor)):
    print(f"Fold {fold + 1}")
    print(
        f"Train: {list(df.columns[train_index])}, Test: {list(df.columns[test_index])}"
    )
    print("-" * 20)

Fold 1
Train: ['aMS1', 'aMS3', 'aMS4', 'aMS5', 'sMS2', 'sMS5'], Test: ['aMS2', 'sMS1', 'sMS4']
--------------------
Fold 2
Train: ['aMS2', 'aMS3', 'aMS4', 'aMS5', 'sMS1', 'sMS2', 'sMS4'], Test: ['aMS1', 'sMS5']
--------------------
Fold 3
Train: ['aMS1', 'aMS2', 'aMS4', 'sMS1', 'sMS2', 'sMS4', 'sMS5'], Test: ['aMS3', 'aMS5']
--------------------
Fold 4
Train: ['aMS1', 'aMS2', 'aMS3', 'aMS5', 'sMS1', 'sMS4', 'sMS5'], Test: ['aMS4', 'sMS2']
--------------------
