In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

# Device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [20]:
# Load the data
df1 = pd.read_excel(
    "/home/gddaslab/mxp140/sclerosis_project/miRNA_signal_hsa_number2.xlsx",
    engine="openpyxl",
    sheet_name="Sheet1",
)

# Drop non-feature columns
df = df1.drop(columns=["ID", "Transcript_ID"])

# Label the columns based on their types
labels = {"pHC": 0, "aHC": 1, "sMS": 2, "aMS": 3, "aPOMS": 4, "sPOMS": 5, "pBar": 6}
labels2 = {"aHC": 0, "sMS": 1, "aMS": 2, "aPOMS": 3, "sPOMS": 4, "pBar": 5}

# Create target labels for each column
y = []
for col in df.columns:
    for key in labels.keys():
        if col.startswith(key):
            y.append(labels[key])
            break
y2 = []
for col in df2.columns:
    for key in labels2.keys():
        if col.startswith(key):
            y2.append(labels[key])
            break

In [21]:
# Convert DataFrame to tensor
X = df.T.values
y = y

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Convert DataFrame to tensor
X2 = df2.T.values
y2 = y2

# Split the data into training and testing sets
X2_train, X2_test, y2_train, y2_test = train_test_split(
    X2, y2, test_size=0.2, random_state=42
)

In [22]:
# Standardize features (optional but recommended)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

X2_train = scaler.fit_transform(X2_train)
X2_test = scaler.transform(X2_test)

In [23]:
# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)

X2_train_tensor = torch.tensor(X2_train, dtype=torch.float32)
y2_train_tensor = torch.tensor(y2_train, dtype=torch.long)
X2_test_tensor = torch.tensor(X2_test, dtype=torch.float32)

In [24]:
class SoftmaxRegression(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(SoftmaxRegression, self).__init__()
        self.linear = nn.Linear(input_dim, output_dim)

    def forward(self, x):
        # print(list(x[0]))
        return self.linear(x)


class ElasticNetLoss(nn.Module):
    def __init__(self, model, alpha=1.0, l1_ratio=0.5):
        super(ElasticNetLoss, self).__init__()
        self.model = model
        self.alpha = alpha
        self.l1_ratio = l1_ratio
        self.cross_entropy_loss = nn.CrossEntropyLoss()

    def forward(self, outputs, targets):
        ce_loss = self.cross_entropy_loss(outputs, targets)
        l1_norm = sum(param.abs().sum() for param in self.model.parameters())
        l2_norm = sum(param.pow(2).sum() for param in self.model.parameters())
        elastic_net_penalty = self.alpha * (
            self.l1_ratio * l1_norm + (1 - self.l1_ratio) * l2_norm
        )
        return ce_loss + elastic_net_penalty


# Instantiate the model
input_dim = X_train.shape[1]
output_dim = len(torch.unique(y_train_tensor))
model = SoftmaxRegression(input_dim, output_dim)

model2 = SoftmaxRegression(X2_train.shape[1], len(torch.unique(y2_train_tensor)))

In [25]:
model

SoftmaxRegression(
  (linear): Linear(in_features=4570, out_features=7, bias=True)
)

In [26]:
model2

SoftmaxRegression(
  (linear): Linear(in_features=4570, out_features=6, bias=True)
)

In [27]:
# Define loss function and optimizer
criterion = ElasticNetLoss(model, alpha=0.01, l1_ratio=0.5)
optimizer = optim.SGD(model.parameters(), lr=0.01)

criterion2 = ElasticNetLoss(model2, alpha=0.01, l1_ratio=0.5)
optimizer2 = optim.SGD(model2.parameters(), lr=0.01)

In [28]:
# Training loop
num_epochs = 1000
for epoch in range(num_epochs):
    model.train()  # Set the model to training mode

    # Forward pass
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)

    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

Epoch [10/1000], Loss: 1.2120
Epoch [20/1000], Loss: 1.1049
Epoch [30/1000], Loss: 1.0250
Epoch [40/1000], Loss: 0.9529
Epoch [50/1000], Loss: 0.8856
Epoch [60/1000], Loss: 0.8221
Epoch [70/1000], Loss: 0.7621
Epoch [80/1000], Loss: 0.7057
Epoch [90/1000], Loss: 0.6527
Epoch [100/1000], Loss: 0.6035
Epoch [110/1000], Loss: 0.5578
Epoch [120/1000], Loss: 0.5159
Epoch [130/1000], Loss: 0.4777
Epoch [140/1000], Loss: 0.4432
Epoch [150/1000], Loss: 0.4123
Epoch [160/1000], Loss: 0.3853
Epoch [170/1000], Loss: 0.3619
Epoch [180/1000], Loss: 0.3417
Epoch [190/1000], Loss: 0.3244
Epoch [200/1000], Loss: 0.3097
Epoch [210/1000], Loss: 0.2971
Epoch [220/1000], Loss: 0.2863
Epoch [230/1000], Loss: 0.2770
Epoch [240/1000], Loss: 0.2689
Epoch [250/1000], Loss: 0.2619
Epoch [260/1000], Loss: 0.2556
Epoch [270/1000], Loss: 0.2501
Epoch [280/1000], Loss: 0.2451
Epoch [290/1000], Loss: 0.2408
Epoch [300/1000], Loss: 0.2369
Epoch [310/1000], Loss: 0.2334
Epoch [320/1000], Loss: 0.2302
Epoch [330/1000],

In [29]:
# Training loop
num_epochs = 1000
for epoch in range(num_epochs):
    model2.train()  # Set the model to training mode

    # Forward pass
    outputs = model2(X2_train_tensor)
    loss = criterion2(outputs, y2_train_tensor)

    # Backward pass and optimization
    optimizer2.zero_grad()
    loss.backward()
    optimizer2.step()

    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

IndexError: Target 6 is out of bounds.

In [10]:
# Save the model
torch.save(model.state_dict(), "softmax_classifier.pth")

In [None]:
# Load the model
model = SoftmaxRegression(input_dim, output_dim)
model.load_state_dict(torch.load("softmax_classifier.pth"))
model.eval()  # Set the model to evaluation mode

In [47]:
parameters = []
for param in model.parameters():
    print(param)
    print(param.shape)
    parameters.append(param)

Parameter containing:
tensor([[-5.9147e-07, -8.2708e-06,  1.6829e-05,  ...,  1.0195e-05,
          2.3074e-05,  1.2896e-06],
        [ 1.5721e-05,  1.2976e-06,  2.9104e-05,  ..., -1.3274e-05,
         -1.6655e-05,  6.5924e-06],
        [-4.1872e-05, -3.1389e-05,  2.0746e-05,  ...,  4.4030e-05,
          3.9371e-05, -5.4530e-06],
        ...,
        [ 2.8874e-05,  1.0378e-05, -7.1626e-07,  ..., -4.7887e-05,
          3.1346e-07,  1.8096e-02],
        [-5.7171e-05,  8.1294e-05,  3.5546e-06,  ...,  2.4974e-02,
          3.0396e-05,  8.0215e-06],
        [ 7.4309e-05, -2.3077e-06, -2.1007e-05,  ..., -1.5852e-05,
         -4.7192e-05,  1.6979e-05]], requires_grad=True)
torch.Size([7, 4570])
Parameter containing:
tensor([ 6.4980e-05, -4.8892e-05,  1.9404e-05, -1.0488e-05,  1.6083e-05,
        -6.2903e-05,  4.5516e-07], requires_grad=True)
torch.Size([7])


In [48]:
# Evaluation
with torch.no_grad():
    model.eval()
    test_outputs = model(X_test_tensor)
    _, y_pred_tensor = torch.max(test_outputs, 1)
    y_pred = y_pred_tensor.numpy()

    # Calculate accuracyI have
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy: {accuracy:.2f}")

    print(
        torch.max(
            model(
                torch.tensor(
                    scaler.fit_transform(X[0].reshape(1, -1)), dtype=torch.float32
                )
            ),
            1,
        )
    )
    weights = parameters[0]

Accuracy: 0.78
torch.return_types.max(
values=tensor([6.4980e-05]),
indices=tensor([0]))
tensor([-5.9147e-07, -8.2708e-06,  1.6829e-05, -1.1646e-05,  3.0629e-06],
       requires_grad=True)
tensor([ 1.5721e-05,  1.2976e-06,  2.9104e-05, -3.3720e-05, -8.7782e-06],
       requires_grad=True)
tensor([-4.1872e-05, -3.1389e-05,  2.0746e-05,  5.5800e-06,  1.0061e-05],
       requires_grad=True)
tensor([4.9272e-06, 2.3955e-05, 5.7531e-05, 2.2516e-05, 1.4948e-02],
       requires_grad=True)
tensor([ 2.8874e-05,  1.0378e-05, -7.1626e-07, -1.0170e-06, -2.3070e-05],
       requires_grad=True)
tensor([-5.7171e-05,  8.1294e-05,  3.5546e-06,  2.6611e-06, -3.0562e-05],
       requires_grad=True)
tensor([ 7.4309e-05, -2.3077e-06, -2.1007e-05, -3.4664e-06,  5.3982e-06],
       requires_grad=True)


In [51]:
list(df.columns)

['pHC1',
 'pHC2',
 'pHC3',
 'pHC4',
 'pHC5',
 'pHC6',
 'pHC7',
 'pHC8',
 'pHC9',
 'pHC10',
 'aHC1',
 'aHC2',
 'aHC3',
 'aHC4',
 'aHC5',
 'sMS1',
 'sMS2',
 'sMS4',
 'sMS5',
 'aMS1',
 'aMS2',
 'aMS3',
 'aMS4',
 'aMS5',
 'aPOMS1',
 'aPOMS2',
 'aPOMS3',
 'aPOMS4',
 'aPOMS5',
 'aPOMS6',
 'aPOMS7',
 'aPOMS9',
 'sPOMS1',
 'sPOMS2',
 'sPOMS3',
 'pBar2',
 'pBar3',
 'pBar5',
 'pBar7',
 'pBar8',
 'pBar9']