In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import depthcharge as dc
import numpy as np
from depthcharge.encoders import PeakEncoder
from depthcharge.data import SpectrumDataset

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
mzml = '..//08CPTAC_C_GBM_W_PNNL_20210830_B2S3_f20.mzML'

dataset = SpectrumDataset(mzml, batch_size=54852)
encoder = PeakEncoder(100)

08CPTAC_C_GBM_W_PNNL_20210830_B2S3_f20.mzML: 100%|██████████| 54852/54852 [01:02<00:00, 880.26 spectra/s] 


In [3]:
embeddings = None
for spectrum in dataset:
    mz_values = spectrum["mz_array"]
    intensities = spectrum["intensity_array"]
    stack = torch.stack((mz_values, intensities), dim= 2)
    embedded_batch = encoder.forward(stack)
    embeddings = embedded_batch.detach()

In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np

# 1. Data Preparation
# Assuming 'labels' is a numpy array of shape (39793,) with 0 or 1 for each sample
# Example:
np.random.seed(42)
num_samples = 39793
labels = np.random.randint(0, 2, num_samples)

# Load your data (replace this with your actual data loading)
data = embeddings  # Example random data

# Convert labels to a PyTorch tensor
labels = torch.from_numpy(labels).long()

# Create TensorDataset and DataLoader
dataset = TensorDataset(data, labels)
batch_size = 64  # Adjust as needed
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# 2. Model Definition (CNN approach - recommended)
class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=(3, 3), padding=1)  # Input channels = 1 (grayscale)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(kernel_size=(2, 2))
        self.conv2 = nn.Conv2d(32, 64, kernel_size=(3, 3), padding=1)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(kernel_size=(2, 2))
        self.flatten = nn.Flatten(1)
        self.fc = nn.Linear(64 * 50 * 25, 2) # Output size is 2 for binary classification

    def forward(self, x):
        # Add channel dimension if it's missing (N, H, W) -> (N, C, H, W)
        if x.dim() == 3:
            x = x.unsqueeze(1)
        x = self.pool1(self.relu1(self.conv1(x)))
        x = self.pool2(self.relu2(self.conv2(x)))
        x = self.flatten(x)
        x = self.fc(x)
        return x

model = Classifier()

# 3. Loss Function and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 4. Training Loop
num_epochs = 10
for epoch in range(num_epochs):
    for inputs, labels in dataloader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# 5. Evaluation (example)
model.eval()  # Set the model to evaluation mode
with torch.no_grad():
    correct = 0
    total = 0
    for inputs, labels in dataloader:  # You might want a separate test dataloader
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f'Accuracy: {100 * correct / total:.2f}%')

Epoch [1/10], Loss: 0.6932


KeyboardInterrupt: 