In [2]:
import torch
from torch import nn
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
print(torch.__version__)
print("CUDA available?" if torch.cuda.is_available() else "Using CPU")

2.6.0
Using CPU


In [22]:
# Transform: normalize MNIST to [0,1]
#this next line converts the MNIST images to tensors
#and normalizes them to the range [0, 1]
transform = transforms.ToTensor()

# Download MNIST
#train_data is the training set, test_data is the test set
#both are downloaded from the 'data' directory

train_data = datasets.MNIST(root='data', train=True, download=True, transform=transform)
test_data  = datasets.MNIST(root='data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
test_loader  = DataLoader(test_data, batch_size=64, shuffle=False)

# Define a simple 2-layer neural net
# this is a feedforward neural network 
model = nn.Sequential(
    nn.Flatten(),# flatten the image to a 1D tensor 28x28 → 784
    nn.Linear(784, 128), # first layer: 784 inputs, 128 outputs
    nn.ReLU(), #activation function that introduces non-linearity
    nn.Linear(128, 10) # second layer: 128 inputs, 10 outputs (one for each digit 0-9)
)

# Loss and optimizer
loss_fn = nn.CrossEntropyLoss() #specifies the loss function 
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3) #defines the optimizer to update the model's parameters

# Training loop
#makes 5 full passes over the training data
#for each epoch, it iterates over the training data, where X are images and y are labels
# loss computes the loss between the predicted outputs and the true labels
# optimizer.zero_grad() resets the gradients to zero before backpropagation
# loss.backward() computes the gradients of the loss with respect to the model parameters
# optimizer.step() updates the model parameters based on the computed gradients
for epoch in range(5):
    for X, y in train_loader:
        pred = model(X)
        loss = loss_fn(pred, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    print(f"Epoch {epoch+1}: loss = {loss.item():.4f}")

Epoch 1: loss = 0.0884
Epoch 2: loss = 0.0553
Epoch 3: loss = 0.0955
Epoch 4: loss = 0.0547
Epoch 5: loss = 0.1572


In [23]:
correct = 0
total = 0

with torch.no_grad():
    for X, y in test_loader:
        pred = model(X)
        predicted = pred.argmax(1)
        correct += (predicted == y).sum().item()
        total += y.size(0)

print(f"Test Accuracy: {correct / total:.2%}")

Test Accuracy: 97.33%


In [24]:
from sklearn.model_selection import train_test_split
import pandas as pd
from sklearn.preprocessing import StandardScaler
from torch.utils.data import TensorDataset, DataLoader



data_df = pd.read_csv('data/embeddings/mutation_embeddings_esm2_t30_150M_UR50D.csv', index_col=0)
X = data_df.drop(columns=['ID', 'Effect'])
y = data_df['Effect']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(X_train.shape, X_test.shape)


scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32)  # or torch.long for classification
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32)    # or torch.long for classification


train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader  = DataLoader(test_dataset, batch_size=64, shuffle=False)

(856, 640) (215, 640)


  data_df = pd.read_csv('data/embeddings/mutation_embeddings_esm2_t30_150M_UR50D.csv', index_col=0)


In [33]:
np.shape(X)

torch.Size([23, 640])

In [None]:
model = nn.Sequential(
    nn.Linear(640, 128), # first layer: 640 inputs, 128 outputs
    nn.ReLU(), #activation function that introduces non-linearity
    nn.Linear(128, 1) # second layer: 128 inputs, 1 output
)

# Loss and optimizer
loss_fn = nn.MSELoss() #specifies the loss function 
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3) #defines the optimizer to update the model's parameters

# Training loop
#makes 5 full passes over the training data
#for each epoch, it iterates over the training data, where X are images and y are labels
# loss computes the loss between the predicted outputs and the true labels
# optimizer.zero_grad() resets the gradients to zero before backpropagation
# loss.backward() computes the gradients of the loss with respect to the model parameters
# optimizer.step() updates the model parameters based on the computed gradients
mse = 0
total = 0

y_true = []
y_pred = []
for epoch in range(5):
    for X, y in train_loader:
        pred = model(X)
        loss = loss_fn(pred, y)
    
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        pred = pred.detach()  # shape: [batch_size]
        mse += ((pred - y) ** 2).sum().item()
        total += y.size(0)
        y_true.extend(y.cpu().numpy())
        y_pred.extend(pred.cpu().numpy())

    
    print(f"Epoch {epoch+1}: loss = {loss.item():.4f}")

# Calculate R²
y_true = np.array(y_true)
y_pred = np.array(y_pred)
ss_res = ((y_true - y_pred) ** 2).sum()
ss_tot = ((y_true - y_true.mean()) ** 2).sum()
r2 = 1 - ss_res / ss_tot
print(f"Test R²: {r2:.4f}")

Epoch 1: loss = 0.4280
Epoch 2: loss = 0.2769
Epoch 3: loss = 0.2081
Epoch 4: loss = 0.1946
Epoch 5: loss = 0.3454
Test R²: -6330.9409


  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


In [30]:
import numpy as np
mse = 0
total = 0
y_true = []
y_pred = []

with torch.no_grad():
    for X, y in test_loader:
        pred = model(X).squeeze()
        mse += ((pred - y) ** 2).sum().item()
        total += y.size(0)
        y_true.extend(y.cpu().numpy())
        y_pred.extend(pred.cpu().numpy())

print(f"Test MSE: {mse / total:.4f}")

# Calculate R²
y_true = np.array(y_true)
y_pred = np.array(y_pred)
ss_res = ((y_true - y_pred) ** 2).sum()
ss_tot = ((y_true - y_true.mean()) ** 2).sum()
r2 = 1 - ss_res / ss_tot
print(f"Test R²: {r2:.4f}")

Test MSE: 0.2233
Test R²: -0.0603


In [34]:
import torch.nn as nn

class Simple1DCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv1d(1, 32, kernel_size=5, padding=2),  # 1 input channel, 32 output channels
            nn.ReLU(),
            nn.MaxPool1d(2),  # Downsample by 2 (output: [batch, 32, 320])
            nn.Conv1d(32, 64, kernel_size=5, padding=2),
            nn.ReLU(),
            nn.MaxPool1d(2)   # Downsample by 2 (output: [batch, 64, 160])
        )
        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64 * 160, 128),
            nn.ReLU(),
            nn.Linear(128, 1)  # For regression
        )

    def forward(self, x):
        x = x.unsqueeze(1)  # Add channel dimension: [batch, 1, 640]
        x = self.conv(x)
        x = self.fc(x)
        return x

model = Simple1DCNN()


# Loss and optimizer
loss_fn = nn.MSELoss() #specifies the loss function 
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3) #defines the optimizer to update the model's parameters

# Training loop
#makes 5 full passes over the training data
#for each epoch, it iterates over the training data, where X are images and y are labels
# loss computes the loss between the predicted outputs and the true labels
# optimizer.zero_grad() resets the gradients to zero before backpropagation
# loss.backward() computes the gradients of the loss with respect to the model parameters
# optimizer.step() updates the model parameters based on the computed gradients
mse = 0
total = 0

y_true = []
y_pred = []
for epoch in range(5):
    for X, y in train_loader:
        pred = model(X)
        loss = loss_fn(pred, y)
    
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    print(f"Epoch {epoch+1}: loss = {loss.item():.4f}")

import numpy as np
mse = 0
total = 0
y_true = []
y_pred = []

with torch.no_grad():
    for X, y in test_loader:
        pred = model(X).squeeze()
        mse += ((pred - y) ** 2).sum().item()
        total += y.size(0)
        y_true.extend(y.cpu().numpy())
        y_pred.extend(pred.cpu().numpy())

print(f"Test MSE: {mse / total:.4f}")

# Calculate R²
y_true = np.array(y_true)
y_pred = np.array(y_pred)
ss_res = ((y_true - y_pred) ** 2).sum()
ss_tot = ((y_true - y_true.mean()) ** 2).sum()
r2 = 1 - ss_res / ss_tot
print(f"Test R²: {r2:.4f}")


  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 1: loss = 0.3057
Epoch 2: loss = 0.1672
Epoch 3: loss = 0.2254
Epoch 4: loss = 0.2913
Epoch 5: loss = 0.1556
Test MSE: 0.2212
Test R²: -0.0502


In [37]:

import torch.nn as nn

class DenseRegressor(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(640, 256),   # First hidden layer
            nn.ReLU(),
            nn.Linear(256, 128),   # Second hidden layer
            nn.ReLU(),
            nn.Linear(128, 64),    # Third hidden layer
            nn.ReLU(),
            nn.Linear(64, 1)       # Output layer for regression
        )

    def forward(self, x):
        return self.net(x)

model = DenseRegressor()


# Loss and optimizer
loss_fn = nn.MSELoss() #specifies the loss function 
optimizer = torch.optim.Adam(model.parameters(), lr=5e-4) #defines the optimizer to update the model's parameters

# Training loop
#makes 5 full passes over the training data
#for each epoch, it iterates over the training data, where X are images and y are labels
# loss computes the loss between the predicted outputs and the true labels
# optimizer.zero_grad() resets the gradients to zero before backpropagation
# loss.backward() computes the gradients of the loss with respect to the model parameters
# optimizer.step() updates the model parameters based on the computed gradients
mse = 0
total = 0

y_true = []
y_pred = []
for epoch in range(20):
    for X, y in train_loader:
        pred = model(X)
        loss = loss_fn(pred, y)
    
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    print(f"Epoch {epoch+1}: loss = {loss.item():.4f}")

import numpy as np
mse = 0
total = 0
y_true = []
y_pred = []

with torch.no_grad():
    for X, y in test_loader:
        pred = model(X).squeeze()
        mse += ((pred - y) ** 2).sum().item()
        total += y.size(0)
        y_true.extend(y.cpu().numpy())
        y_pred.extend(pred.cpu().numpy())

print(f"Test MSE: {mse / total:.4f}")

# Calculate R²
y_true = np.array(y_true)
y_pred = np.array(y_pred)
ss_res = ((y_true - y_pred) ** 2).sum()
ss_tot = ((y_true - y_true.mean()) ** 2).sum()
r2 = 1 - ss_res / ss_tot
print(f"Test R²: {r2:.4f}")




  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 1: loss = 0.3006
Epoch 2: loss = 0.3010
Epoch 3: loss = 0.4072
Epoch 4: loss = 0.2466
Epoch 5: loss = 0.2265
Epoch 6: loss = 0.1589
Epoch 7: loss = 0.2211
Epoch 8: loss = 0.1623
Epoch 9: loss = 0.1887
Epoch 10: loss = 0.1333
Epoch 11: loss = 0.3216
Epoch 12: loss = 0.1600
Epoch 13: loss = 0.1798
Epoch 14: loss = 0.3246
Epoch 15: loss = 0.3052
Epoch 16: loss = 0.1264
Epoch 17: loss = 0.1255
Epoch 18: loss = 0.1166
Epoch 19: loss = 0.2537
Epoch 20: loss = 0.5330
Test MSE: 0.2190
Test R²: -0.0400


In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np

# =========================
# Load and preprocess data
# =========================
data_df = pd.read_csv('data/embeddings/mutation_embeddings_esm2_t30_150M_UR50D.csv', index_col=0)
X = data_df.drop(columns=['ID', 'Effect'])
y = data_df['Effect']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Use GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# =========================
# Model definition
# =========================
class DenseRegressor(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(0.3),

            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.3),

            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.3),

            nn.Linear(128, 1)
        )
        # Weight initialization
        self.net.apply(self._init_weights)

    def _init_weights(self, m):
        if isinstance(m, nn.Linear):
            nn.init.kaiming_normal_(m.weight, nonlinearity='relu')
            if m.bias is not None:
                nn.init.zeros_(m.bias)

    def forward(self, x):
        return self.net(x)

# Instantiate model
input_dim = X_train_tensor.shape[1]
model = DenseRegressor(input_dim).to(device)

# =========================
# Loss and optimizer
# =========================
loss_fn = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

# =========================
# Training loop
# =========================
epochs = 100
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)

        optimizer.zero_grad()
        preds = model(X_batch)
        loss = loss_fn(preds, y_batch)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * X_batch.size(0)
    
    epoch_loss = running_loss / len(train_loader.dataset)

    # ===== Validation Evaluation =====
    model.eval()
    val_loss = 0.0
    y_true_val, y_pred_val = [], []
    with torch.no_grad():
        for X_val, y_val in test_loader:
            X_val, y_val = X_val.to(device), y_val.to(device)
            outputs = model(X_val)
            val_loss += loss_fn(outputs, y_val).item() * X_val.size(0)
            y_true_val.extend(y_val.cpu().numpy())
            y_pred_val.extend(outputs.cpu().numpy())
    val_loss /= len(test_loader.dataset)

    y_true_val = np.array(y_true_val)
    y_pred_val = np.array(y_pred_val)
    ss_res_val = ((y_true_val - y_pred_val) ** 2).sum()
    ss_tot_val = ((y_true_val - y_true_val.mean()) ** 2).sum()
    r2_val = 1 - ss_res_val / ss_tot_val

    # ===== Train Evaluation (R²) =====
    y_true_train, y_pred_train = [], []
    with torch.no_grad():
        for X_tr, y_tr in train_loader:
            X_tr, y_tr = X_tr.to(device), y_tr.to(device)
            outputs = model(X_tr)
            y_true_train.extend(y_tr.cpu().numpy())
            y_pred_train.extend(outputs.cpu().numpy())
    y_true_train = np.array(y_true_train)
    y_pred_train = np.array(y_pred_train)
    ss_res_train = ((y_true_train - y_pred_train) ** 2).sum()
    ss_tot_train = ((y_true_train - y_true_train.mean()) ** 2).sum()
    r2_train = 1 - ss_res_train / ss_tot_train

    print(f"Epoch {epoch+1}/{epochs} - "
          f"Train Loss: {epoch_loss:.4f} | "
          f"Train R²: {r2_train:.4f} | "
          f"Val Loss: {val_loss:.4f} | "
          f"Val R²: {r2_val:.4f}")
# =========================
# Final test evaluation
# =========================
print("Training complete.")


  data_df = pd.read_csv('data/embeddings/mutation_embeddings_esm2_t30_150M_UR50D.csv', index_col=0)


Using device: cpu
Epoch 1/100 - Train Loss: 2.2595 | Train R²: -3.4957 | Val Loss: 0.9512 | Val R²: -3.5170
Epoch 2/100 - Train Loss: 1.0221 | Train R²: -2.0333 | Val Loss: 0.7373 | Val R²: -2.5015
Epoch 3/100 - Train Loss: 0.7529 | Train R²: -0.4448 | Val Loss: 0.3105 | Val R²: -0.4745
Epoch 4/100 - Train Loss: 0.6970 | Train R²: -0.3159 | Val Loss: 0.3215 | Val R²: -0.5265
Epoch 5/100 - Train Loss: 0.6292 | Train R²: 0.0469 | Val Loss: 0.2071 | Val R²: 0.0164
Epoch 6/100 - Train Loss: 0.6186 | Train R²: 0.0861 | Val Loss: 0.2245 | Val R²: -0.0661
Epoch 7/100 - Train Loss: 0.5155 | Train R²: 0.1368 | Val Loss: 0.2397 | Val R²: -0.1381
Epoch 8/100 - Train Loss: 0.4561 | Train R²: 0.3445 | Val Loss: 0.1982 | Val R²: 0.0589
Epoch 9/100 - Train Loss: 0.4493 | Train R²: 0.2619 | Val Loss: 0.2077 | Val R²: 0.0137
Epoch 10/100 - Train Loss: 0.4498 | Train R²: 0.3812 | Val Loss: 0.1886 | Val R²: 0.1045
Epoch 11/100 - Train Loss: 0.4121 | Train R²: 0.3180 | Val Loss: 0.1872 | Val R²: 0.1109
Ep