In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

# Device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [2]:
# Load the data
df1 = pd.read_excel(
    "/home/gddaslab/mxp140/sclerosis_project/miRNA_signal_hsa_number2.xlsx",
    engine="openpyxl",
    sheet_name="Sheet1",
)

# Drop non-feature columns
df = df1.drop(columns=["ID", "Transcript_ID"])
df = df.iloc[:, 10:]

# Label the columns based on their types
labels = {"aHC": 0, "sMS": 1, "aMS": 2, "aPOMS": 3, "sPOMS": 4, "pBar": 5}

# Create target labels for each column
y = []
for col in df.columns:
    for key in labels.keys():
        if col.startswith(key):
            y.append(labels[key])
            break

In [3]:
# Convert DataFrame to tensor
X = df.T.values
y = y

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [4]:
# Standardize features (optional but recommended)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [5]:
# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)

In [6]:
class SoftmaxRegression(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(SoftmaxRegression, self).__init__()
        self.linear = nn.Linear(input_dim, output_dim)

    def forward(self, x):
        # print(list(x[0]))
        return self.linear(x)


class ElasticNetLoss(nn.Module):
    def __init__(self, model, alpha=1.0, l1_ratio=0.5):
        super(ElasticNetLoss, self).__init__()
        self.model = model
        self.alpha = alpha
        self.l1_ratio = l1_ratio
        self.cross_entropy_loss = nn.CrossEntropyLoss()

    def forward(self, outputs, targets):
        ce_loss = self.cross_entropy_loss(outputs, targets)
        l1_norm = sum(param.abs().sum() for param in self.model.parameters())
        l2_norm = sum(param.pow(2).sum() for param in self.model.parameters())
        elastic_net_penalty = self.alpha * (
            self.l1_ratio * l1_norm + (1 - self.l1_ratio) * l2_norm
        )
        return ce_loss + elastic_net_penalty


# Instantiate the model
input_dim = X_train.shape[1]
output_dim = len(torch.unique(y_train_tensor))
model = SoftmaxRegression(input_dim, output_dim)

In [7]:
model

SoftmaxRegression(
  (linear): Linear(in_features=4570, out_features=6, bias=True)
)

In [8]:
# Define loss function and optimizer
criterion = ElasticNetLoss(model, alpha=0.01, l1_ratio=0.5)
optimizer = optim.SGD(model.parameters(), lr=0.01)

In [9]:
# Training loop
num_epochs = 1000
for epoch in range(num_epochs):
    model.train()  # Set the model to training mode

    # Forward pass
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)

    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

Epoch [10/1000], Loss: 1.0313
Epoch [20/1000], Loss: 0.9456
Epoch [30/1000], Loss: 0.8783
Epoch [40/1000], Loss: 0.8167
Epoch [50/1000], Loss: 0.7589
Epoch [60/1000], Loss: 0.7044
Epoch [70/1000], Loss: 0.6531
Epoch [80/1000], Loss: 0.6049
Epoch [90/1000], Loss: 0.5596
Epoch [100/1000], Loss: 0.5173
Epoch [110/1000], Loss: 0.4782
Epoch [120/1000], Loss: 0.4422
Epoch [130/1000], Loss: 0.4094
Epoch [140/1000], Loss: 0.3800
Epoch [150/1000], Loss: 0.3538
Epoch [160/1000], Loss: 0.3307
Epoch [170/1000], Loss: 0.3105
Epoch [180/1000], Loss: 0.2930
Epoch [190/1000], Loss: 0.2780
Epoch [200/1000], Loss: 0.2651
Epoch [210/1000], Loss: 0.2541
Epoch [220/1000], Loss: 0.2447
Epoch [230/1000], Loss: 0.2366
Epoch [240/1000], Loss: 0.2297
Epoch [250/1000], Loss: 0.2236
Epoch [260/1000], Loss: 0.2183
Epoch [270/1000], Loss: 0.2136
Epoch [280/1000], Loss: 0.2095
Epoch [290/1000], Loss: 0.2058
Epoch [300/1000], Loss: 0.2025
Epoch [310/1000], Loss: 0.1995
Epoch [320/1000], Loss: 0.1968
Epoch [330/1000],

In [10]:
# Save the model
torch.save(model.state_dict(), "softmax_classifier_wo_pHC.pth")

In [12]:
# Evaluation
with torch.no_grad():
    model.eval()
    test_outputs = model(X_test_tensor)
    _, y_pred_tensor = torch.max(test_outputs, 1)
    y_pred = y_pred_tensor.numpy()

    # Calculate accuracyI have
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy: {accuracy:.2f}")

    print(
        torch.max(
            model(
                torch.tensor(
                    scaler.fit_transform(X[0].reshape(1, -1)), dtype=torch.float32
                )
            ),
            1,
        )
    )

Accuracy: 0.57
torch.return_types.max(
values=tensor([5.3495e-05]),
indices=tensor([3]))


In [1]:
from sklearn.model_selection import KFold
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

# Device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [2]:
class SoftmaxRegression(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(SoftmaxRegression, self).__init__()
        self.linear = nn.Linear(input_dim, output_dim)

    def forward(self, x):
        return self.linear(x)


class ElasticNetLoss(nn.Module):
    def __init__(self, model, alpha=1.0, l1_ratio=0.5):
        super(ElasticNetLoss, self).__init__()
        self.model = model
        self.alpha = alpha
        self.l1_ratio = l1_ratio
        self.cross_entropy_loss = nn.CrossEntropyLoss()

    def forward(self, outputs, targets):
        ce_loss = self.cross_entropy_loss(outputs, targets)
        l1_norm = sum(param.abs().sum() for param in self.model.parameters())
        l2_norm = sum(param.pow(2).sum() for param in self.model.parameters())
        elastic_net_penalty = self.alpha * (
            self.l1_ratio * l1_norm + (1 - self.l1_ratio) * l2_norm
        )
        return ce_loss + elastic_net_penalty

In [5]:
# Load the data
df1 = pd.read_excel(
    "/home/gddaslab/mxp140/sclerosis_project/miRNA_signal_hsa_number2.xlsx",
    engine="openpyxl",
    sheet_name="Sheet1",
)

# Drop non-feature columns
df = df1.drop(columns=["ID", "Transcript_ID"])
df = df.iloc[:, 10:]

# Label the columns based on their types
labels = {"aHC": 0, "sMS": 1, "aMS": 2, "aPOMS": 3, "sPOMS": 4, "pBar": 5}

# Create target labels for each column
y = []
for col in df.columns:
    for key in labels.keys():
        if col.startswith(key):
            y.append(labels[key])
            break

In [3]:
# Define the number of folds for the cross-validation
n_folds = 5
kfold = KFold(n_splits=n_folds, shuffle=True, random_state=42)

In [4]:
# Initialize variables to store the best model and its accuracy
best_model = None
best_accuracy = 0.0

In [7]:
# Convert DataFrame to tensor
X = df.T.values
y = y

# Convert the entire dataset to PyTorch tensors
X_tensor = torch.tensor(X, dtype=torch.float32)
y_tensor = torch.tensor(y, dtype=torch.long)

In [None]:
# Cross-validation loop
for fold, (train_ids, val_ids) in enumerate(kfold.split(X_tensor)):
    print(f"Fold {fold+1}/{n_folds}")

    # Split the data into training and validation sets
    X_train, X_val = X_tensor[train_ids], X_tensor[val_ids]
    y_train, y_val = y_tensor[train_ids], y_tensor[val_ids]

    # Instantiate the model
    model = SoftmaxRegression(input_dim, output_dim)

    # Define loss function and optimizer
    criterion = ElasticNetLoss(model, alpha=0.01, l1_ratio=0.5)
    optimizer = optim.SGD(model.parameters(), lr=0.01)

    # Training loop
    for epoch in range(num_epochs):
        model.train()  # Set the model to training mode

        # Forward pass
        outputs = model(X_train)
        loss = criterion(outputs, y_train)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (epoch + 1) % 10 == 0:
            print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

    # Evaluation
    with torch.no_grad():
        model.eval()
        val_outputs = model(X_val)
        _, y_pred_tensor = torch.max(val_outputs, 1)
        y_pred = y_pred_tensor.numpy()
        y_true = y_val.numpy()

        # Calculate accuracy
        accuracy = accuracy_score(y_true, y_pred)
        print(f"Validation Accuracy: {accuracy:.2f}")

        # If this model is better than the previous best, update the best model and accuracy
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_model = model

# Save the best model
torch.save(best_model.state_dict(), "softmax_classifier_best.pth")
print(f"Best Validation Accuracy: {best_accuracy:.2f}")