# Networks on bacteria dataset
Notebook with first NN (LLMs helped) runs.  

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import ramanspy as rp

from torch.utils.data import DataLoader, TensorDataset
from torch.optim.lr_scheduler import CosineAnnealingLR
from sklearn.preprocessing import StandardScaler, RobustScaler

## Chapt GPT base nn.


In [21]:
# Load training and testing datasets
X_train, y_train = rp.datasets.bacteria("train", folder="../data/bacteria/")
X_test, y_test = rp.datasets.bacteria("test", folder="../data/bacteria/")
X_val, y_val = rp.datasets.bacteria("val", folder="../data/bacteria/")

# Convert to numpy arrays for randomization and PyTorch compatibility
X_train = np.array(X_train.spectral_data)
y_train = np.array(y_train)
X_test = np.array(X_test.spectral_data)
y_test = np.array(y_test)
X_val = np.array(X_val.spectral_data)
y_val = np.array(y_val)

# Randomize the training data
np.random.seed(42)  # For reproducibility
train_indices = np.random.permutation(len(X_train))
X_train = X_train[train_indices]
y_train = y_train[train_indices]

# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.long)

# Create DataLoader for batching
batch_size = 20
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

# Define a simple neural network
class SpectraNN(nn.Module):
    def __init__(self):
        super(SpectraNN, self).__init__()
        self.fc1 = nn.Linear(1000, 512)  # Adjust input size to 1000
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(512, 256)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(256, 30)  # Output layer
    
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.fc3(x)
        return x  # Return raw logits


# Initialize the model, loss function, and optimizer
model = SpectraNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
epochs = 20
for epoch in range(epochs):
    model.train()
    total_loss = 0
    for X_batch, y_batch in train_loader:
        # Forward pass
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    # Compute validation loss and accuracy
    model.eval()
    val_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for X_val_batch, y_val_batch in val_loader:
            val_outputs = model(X_val_batch)
            val_loss += criterion(val_outputs, y_val_batch).item()
            y_pred_classes = torch.argmax(val_outputs, axis=1)
            correct += (y_pred_classes == y_val_batch).sum().item()
            total += y_val_batch.size(0)

    val_accuracy = correct / total
    print(f"Epoch [{epoch+1}/{epochs}], Train Loss: {total_loss/len(train_loader):.4f}, "
          f"Val Loss: {val_loss/len(val_loader):.4f}, Val Acc: {val_accuracy:.4f}")


# Evaluate the model
model.eval()
with torch.no_grad():
    X_test_tensor = X_test_tensor
    y_pred = model(X_test_tensor)
    y_pred_classes = torch.argmax(y_pred, axis=1)
    accuracy = (y_pred_classes == y_test_tensor).float().mean()
    print(f"Test Accuracy: {accuracy:.4f}")


Epoch [1/20], Train Loss: 1.1572, Val Loss: 3.7897, Val Acc: 0.3183
Epoch [2/20], Train Loss: 0.5547, Val Loss: 5.1153, Val Acc: 0.2957
Epoch [3/20], Train Loss: 0.4326, Val Loss: 4.0506, Val Acc: 0.4427
Epoch [4/20], Train Loss: 0.3673, Val Loss: 5.3674, Val Acc: 0.3720
Epoch [5/20], Train Loss: 0.3315, Val Loss: 5.9996, Val Acc: 0.3570
Epoch [6/20], Train Loss: 0.3023, Val Loss: 5.7974, Val Acc: 0.4103
Epoch [7/20], Train Loss: 0.2793, Val Loss: 6.4810, Val Acc: 0.3973
Epoch [8/20], Train Loss: 0.2652, Val Loss: 6.3177, Val Acc: 0.3803
Epoch [9/20], Train Loss: 0.2486, Val Loss: 6.5065, Val Acc: 0.3697
Epoch [10/20], Train Loss: 0.2368, Val Loss: 8.3532, Val Acc: 0.3387
Epoch [11/20], Train Loss: 0.2317, Val Loss: 8.2538, Val Acc: 0.3747
Epoch [12/20], Train Loss: 0.2150, Val Loss: 7.6237, Val Acc: 0.3737
Epoch [13/20], Train Loss: 0.2132, Val Loss: 7.1958, Val Acc: 0.4043
Epoch [14/20], Train Loss: 0.1993, Val Loss: 8.2469, Val Acc: 0.3683
Epoch [15/20], Train Loss: 0.1969, Val Loss

## from claude (CNN part)
FCNN vs 1DCNN + FCNN


In [None]:
def augment_spectra(spectra, noise_level=0.02):
    noise = torch.randn_like(spectra) * noise_level
    augmented = spectra + noise
    return augmented

class SpectraFCNN(nn.Module):
    def __init__(self, dropout_rate=0.3):
        super(SpectraFCNN, self).__init__()
        self.features = nn.Sequential(
            nn.Linear(1000, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(dropout_rate)
        )
        self.classifier = nn.Linear(128, 30)
    
    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

class SpectraCNN(nn.Module):
    def __init__(self, dropout_rate=0.3):
        super(SpectraCNN, self).__init__()
        
        self.conv_layers = nn.Sequential(
            nn.Conv1d(1, 32, kernel_size=9, padding=4),
            nn.BatchNorm1d(32),
            nn.ReLU(),
            nn.MaxPool1d(2),
            nn.Dropout(dropout_rate),
            
            nn.Conv1d(32, 64, kernel_size=7, padding=3),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.MaxPool1d(2),
            nn.Dropout(dropout_rate),
            
            nn.Conv1d(64, 128, kernel_size=5, padding=2),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.MaxPool1d(2),
            nn.Dropout(dropout_rate)
        )
        
        self.flatten_size = 128 * 125
        
        self.fc_layers = nn.Sequential(
            nn.Linear(self.flatten_size, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            
            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(dropout_rate)
        )
        
        self.classifier = nn.Linear(128, 30)
    
    def forward(self, x):
        x = x.unsqueeze(1)
        x = self.conv_layers(x)
        x = x.view(x.size(0), -1)
        x = self.fc_layers(x)
        x = self.classifier(x)
        return x

def train_model(model_type="cnn", batch_size=50, epochs=50, learning_rate=0.001):
    X_train, y_train = rp.datasets.bacteria("train", folder="../data/bacteria/")
    X_test, y_test = rp.datasets.bacteria("test", folder="../data/bacteria/")
    X_val, y_val = rp.datasets.bacteria("val", folder="../data/bacteria/")

    X_train = np.array(X_train.spectral_data)
    y_train = np.array(y_train)
    X_test = np.array(X_test.spectral_data)
    y_test = np.array(y_test)
    X_val = np.array(X_val.spectral_data)
    y_val = np.array(y_val)

    X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
    y_train_tensor = torch.tensor(y_train, dtype=torch.long)
    X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
    y_val_tensor = torch.tensor(y_val, dtype=torch.long)
    X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
    y_test_tensor = torch.tensor(y_test, dtype=torch.long)

    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
    val_loader = DataLoader(val_dataset, batch_size=batch_size)

    model = SpectraCNN() if model_type == "cnn" else SpectraFCNN()
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    scheduler = CosineAnnealingLR(optimizer, T_max=epochs)

    best_val_loss = float('inf')
    patience = 5
    patience_counter = 0
    best_accuracy = 0.0

    for epoch in range(epochs):
        model.train()
        total_loss = 0
        for X_batch, y_batch in train_loader:
            X_batch = augment_spectra(X_batch)
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        
        model.eval()
        val_loss = 0
        val_correct = 0
        total_val = 0
        
        with torch.no_grad():
            for X_val, y_val in val_loader:
                outputs = model(X_val)
                val_loss += criterion(outputs, y_val).item()
                _, predicted = torch.max(outputs.data, 1)
                total_val += y_val.size(0)
                val_correct += (predicted == y_val).sum().item()
        
        val_accuracy = val_correct / total_val
        avg_val_loss = val_loss / len(val_loader)
        
        print(f"Epoch [{epoch+1}/{epochs}]")
        print(f"Training Loss: {total_loss/len(train_loader):.4f}")
        print(f"Validation Loss: {avg_val_loss:.4f}")
        print(f"Validation Accuracy: {val_accuracy:.4f}")
        print("-" * 50)
        
        if val_accuracy > best_accuracy:
            best_accuracy = val_accuracy
            torch.save(model.state_dict(), f'best_model_{model_type}.pth')
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print("Early stopping triggered!")
                break
        
        scheduler.step()

    model.load_state_dict(torch.load(f'best_model_{model_type}.pth'))
    model.eval()

    with torch.no_grad():
        test_outputs = model(X_test_tensor)
        _, predicted = torch.max(test_outputs.data, 1)
        test_accuracy = (predicted == y_test_tensor).float().mean()
        print(f"Final Test Accuracy: {test_accuracy:.4f}")

    n_classes = 30
    class_correct = list(0. for i in range(n_classes))
    class_total = list(0. for i in range(n_classes))
    
    with torch.no_grad():
        outputs = model(X_test_tensor)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == y_test_tensor)
        
        for i in range(len(y_test_tensor)):
            label = y_test_tensor[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1

    print("\nPer-class accuracy:")
    for i in range(n_classes):
        if class_total[i] > 0:
            print(f'Accuracy of class {i}: {100 * class_correct[i] / class_total[i]:.2f}%')

    return model, test_accuracy

print("Training CNN model...")
cnn_model, cnn_accuracy = train_model(model_type="cnn")

In [26]:
# Train FCNN model
print("\nTraining FCNN model...")
fcnn_model, fcnn_accuracy = train_model(model_type="fcnn")

print(f"\nFinal Results:")
print(f"CNN Model Accuracy: {cnn_accuracy:.4f}")
print(f"FCNN Model Accuracy: {fcnn_accuracy:.4f}")

Epoch [1/50]
Training Loss: 0.6917
Validation Loss: 2.9440
Validation Accuracy: 0.4527
--------------------------------------------------
Epoch [2/50]
Training Loss: 0.3194
Validation Loss: 3.4289
Validation Accuracy: 0.4207
--------------------------------------------------
Epoch [3/50]
Training Loss: 0.2645
Validation Loss: 3.7003
Validation Accuracy: 0.4170
--------------------------------------------------
Epoch [4/50]
Training Loss: 0.2313
Validation Loss: 5.0119
Validation Accuracy: 0.3410
--------------------------------------------------
Epoch [5/50]
Training Loss: 0.2072
Validation Loss: 3.9318
Validation Accuracy: 0.4417
--------------------------------------------------
Epoch [6/50]
Training Loss: 0.1905
Validation Loss: 4.0163
Validation Accuracy: 0.4430
--------------------------------------------------
Early stopping triggered!
Final Test Accuracy: 0.4360

Per-class accuracy:
Accuracy of class 0: 94.00%
Accuracy of class 1: 0.00%
Accuracy of class 2: 3.00%
Accuracy of cla

## Claude again
Enhanced data augmentation + stronger regularization

In [28]:
# Enhanced data augmentation function
def augment_spectra(spectra, noise_level=0.05):
    noise = torch.randn_like(spectra) * noise_level
    scale = 1.0 + (torch.rand_like(spectra) * 0.1 - 0.05)
    augmented = spectra * scale + noise
    return augmented

# Improved neural network with stronger regularization
class SpectraNN(nn.Module):
    def __init__(self, dropout_rate=0.5):
        super(SpectraNN, self).__init__()
        self.features = nn.Sequential(
            nn.Linear(1000, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(dropout_rate)
        )
        self.classifier = nn.Linear(128, 30)
    
    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

# Load and prepare data
print("Loading data...")
X_train, y_train = rp.datasets.bacteria("train", folder="../data/bacteria/")
X_test, y_test = rp.datasets.bacteria("test", folder="../data/bacteria/")
X_val, y_val = rp.datasets.bacteria("val", folder="../data/bacteria/")

# Convert to numpy arrays
X_train = np.array(X_train.spectral_data)
y_train = np.array(y_train)
X_test = np.array(X_test.spectral_data)
y_test = np.array(y_test)
X_val = np.array(X_val.spectral_data)
y_val = np.array(y_val)

# Print initial class distribution
print("\nInitial class distribution:")
for split_name, y_data in [('Train', y_train), ('Validation', y_val), ('Test', y_test)]:
    unique, counts = np.unique(y_data, return_counts=True)
    print(f"\n{split_name} set class distribution:")
    for class_idx, count in zip(unique, counts):
        print(f"Class {class_idx}: {count}")

# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

# Create DataLoaders
batch_size = 32  # Reduced batch size
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
val_loader = DataLoader(val_dataset, batch_size=batch_size)

# Calculate class weights for balanced loss
unique, counts = np.unique(y_train, return_counts=True)
weights = 1.0 / torch.tensor(counts, dtype=torch.float)
weights = weights / weights.sum()

# Initialize model, loss function, optimizer, and scheduler
print("\nInitializing model...")
model = SpectraNN()
criterion = nn.CrossEntropyLoss(weight=weights)
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)  # Added L2 regularization
epochs = 50
scheduler = CosineAnnealingLR(optimizer, T_max=epochs)

# Training loop with early stopping
best_val_loss = float('inf')
patience = 7  # Increased patience
patience_counter = 0
best_accuracy = 0.0

print("\nStarting training...")
for epoch in range(epochs):
    # Training phase
    model.train()
    total_loss = 0
    train_correct = 0
    train_total = 0
    
    for X_batch, y_batch in train_loader:
        # Apply data augmentation
        X_batch = augment_spectra(X_batch)
        
        # Forward pass
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        
        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        train_total += y_batch.size(0)
        train_correct += (predicted == y_batch).sum().item()
    
    # Validation phase
    model.eval()
    val_loss = 0
    val_correct = 0
    val_total = 0
    
    with torch.no_grad():
        for X_val, y_val in val_loader:
            outputs = model(X_val)
            val_loss += criterion(outputs, y_val).item()
            _, predicted = torch.max(outputs.data, 1)
            val_total += y_val.size(0)
            val_correct += (predicted == y_val).sum().item()
    
    train_accuracy = train_correct / train_total
    val_accuracy = val_correct / val_total
    avg_val_loss = val_loss / len(val_loader)
    
    # Print metrics
    print(f"Epoch [{epoch+1}/{epochs}]")
    print(f"Training Loss: {total_loss/len(train_loader):.4f}")
    print(f"Training Accuracy: {train_accuracy:.4f}")
    print(f"Validation Loss: {avg_val_loss:.4f}")
    print(f"Validation Accuracy: {val_accuracy:.4f}")
    print("-" * 50)
    
    # Early stopping check
    if val_accuracy > best_accuracy:
        best_accuracy = val_accuracy
        torch.save(model.state_dict(), 'best_model.pth')
        patience_counter = 0
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print("Early stopping triggered!")
            break
    
    scheduler.step()

# Load best model and evaluate on test set
print("\nEvaluating final model...")
model.load_state_dict(torch.load('best_model.pth'))
model.eval()

with torch.no_grad():
    test_outputs = model(X_test_tensor)
    _, predicted = torch.max(test_outputs.data, 1)
    test_accuracy = (predicted == y_test_tensor).float().mean()
    print(f"Final Test Accuracy: {test_accuracy:.4f}")


Loading data...

Initial class distribution:

Train set class distribution:
Class 0: 2000
Class 1: 2000
Class 2: 2000
Class 3: 2000
Class 4: 2000
Class 5: 2000
Class 6: 2000
Class 7: 2000
Class 8: 2000
Class 9: 2000
Class 10: 2000
Class 11: 2000
Class 12: 2000
Class 13: 2000
Class 14: 2000
Class 15: 2000
Class 16: 2000
Class 17: 2000
Class 18: 2000
Class 19: 2000
Class 20: 2000
Class 21: 2000
Class 22: 2000
Class 23: 2000
Class 24: 2000
Class 25: 2000
Class 26: 2000
Class 27: 2000
Class 28: 2000
Class 29: 2000

Validation set class distribution:
Class 0: 100
Class 1: 100
Class 2: 100
Class 3: 100
Class 4: 100
Class 5: 100
Class 6: 100
Class 7: 100
Class 8: 100
Class 9: 100
Class 10: 100
Class 11: 100
Class 12: 100
Class 13: 100
Class 14: 100
Class 15: 100
Class 16: 100
Class 17: 100
Class 18: 100
Class 19: 100
Class 20: 100
Class 21: 100
Class 22: 100
Class 23: 100
Class 24: 100
Class 25: 100
Class 26: 100
Class 27: 100
Class 28: 100
Class 29: 100

Test set class distribution:
Class 0:

## Claude againnn
Residual connections model

In [None]:
# Enhanced data augmentation with more variations
def augment_spectra(spectra, noise_level=0.03):
    # Add random noise
    noise = torch.randn_like(spectra) * noise_level
    # Random scaling
    scale = 1.0 + (torch.rand_like(spectra) * 0.1 - 0.05)
    # Random shift
    shift = torch.rand_like(spectra) * 0.02 - 0.01
    augmented = spectra * scale + noise + shift
    return augmented

# Modified network architecture with residual connections
class SpectraNN(nn.Module):
    def __init__(self, dropout_rate=0.4):
        super(SpectraNN, self).__init__()
        self.input_bn = nn.BatchNorm1d(1000)
        
        # First block
        self.block1 = nn.Sequential(
            nn.Linear(1000, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(dropout_rate)
        )
        
        # Second block with residual connection
        self.block2_1 = nn.Sequential(
            nn.Linear(512, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(512, 512),
            nn.BatchNorm1d(512)
        )
        
        # Output layers
        self.final = nn.Sequential(
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(256, 30)
        )
    
    def forward(self, x):
        x = self.input_bn(x)
        x1 = self.block1(x)
        x2 = self.block2_1(x1)
        x = x1 + x2  # Residual connection
        x = self.final(x)
        return x

# Load and prepare data
print("Loading data...")
X_train, y_train = rp.datasets.bacteria("train", folder="../data/bacteria/")
X_test, y_test = rp.datasets.bacteria("test", folder="../data/bacteria/")
X_val, y_val = rp.datasets.bacteria("val", folder="../data/bacteria/")

# Convert to numpy arrays
X_train = np.array(X_train.spectral_data)
y_train = np.array(y_train)
X_test = np.array(X_test.spectral_data)
y_test = np.array(y_test)
X_val = np.array(X_val.spectral_data)
y_val = np.array(y_val)

# Apply StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

# Create DataLoaders with smaller batch size
batch_size = 10
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
val_loader = DataLoader(val_dataset, batch_size=batch_size)

# Initialize model, loss function, optimizer, and scheduler
print("\nInitializing model...")
model = SpectraNN()

# Calculate class weights for balanced loss
weights = torch.ones(30) # Equal weights since data is balanced
criterion = nn.CrossEntropyLoss(weight=weights)
optimizer = optim.AdamW(model.parameters(), lr=0.0005, weight_decay=1e-4)
epochs = 100  # Increased epochs
scheduler = CosineAnnealingLR(optimizer, T_max=epochs)

# Training loop with early stopping
best_val_loss = float('inf')
patience = 10  # Increased patience
patience_counter = 0
best_accuracy = 0.0

print("\nStarting training...")
for epoch in range(epochs):
    # Training phase
    model.train()
    total_loss = 0
    train_correct = 0
    train_total = 0
    
    for X_batch, y_batch in train_loader:
        # Apply data augmentation
        X_batch = augment_spectra(X_batch)
        
        # Forward pass
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        
        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        # Gradient clipping
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()
        
        total_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        train_total += y_batch.size(0)
        train_correct += (predicted == y_batch).sum().item()
    
    # Validation phase
    model.eval()
    val_loss = 0
    val_correct = 0
    val_total = 0
    
    with torch.no_grad():
        for X_val, y_val in val_loader:
            outputs = model(X_val)
            val_loss += criterion(outputs, y_val).item()
            _, predicted = torch.max(outputs.data, 1)
            val_total += y_val.size(0)
            val_correct += (predicted == y_val).sum().item()
    
    train_accuracy = train_correct / train_total
    val_accuracy = val_correct / val_total
    avg_val_loss = val_loss / len(val_loader)
    
    # Print metrics
    print(f"Epoch [{epoch+1}/{epochs}]")
    print(f"Training Loss: {total_loss/len(train_loader):.4f}")
    print(f"Training Accuracy: {train_accuracy:.4f}")
    print(f"Validation Loss: {avg_val_loss:.4f}")
    print(f"Validation Accuracy: {val_accuracy:.4f}")
    print("-" * 50)
    
    # Early stopping check
    if val_accuracy > best_accuracy:
        best_accuracy = val_accuracy
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'scheduler_state_dict': scheduler.state_dict(),
        }, 'best_model.pth')
        patience_counter = 0
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print("Early stopping triggered!")
            break
    
    scheduler.step()

# Load best model and evaluate on test set
print("\nEvaluating final model...")
checkpoint = torch.load('best_model.pth')
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()

# Evaluate with test-time augmentation
def test_time_augment(model, X, num_augments=5):
    predictions = []
    with torch.no_grad():
        # Original prediction
        outputs = model(X)
        predictions.append(outputs)
        
        # Augmented predictions
        for _ in range(num_augments - 1):
            aug_X = augment_spectra(X)
            outputs = model(aug_X)
            predictions.append(outputs)
    
    # Average predictions
    return torch.stack(predictions).mean(0)

# Final evaluation with test-time augmentation
outputs = test_time_augment(model, X_test_tensor)
_, predicted = torch.max(outputs.data, 1)
test_accuracy = (predicted == y_test_tensor).float().mean()
print(f"Final Test Accuracy: {test_accuracy:.4f}")

# Print per-class accuracy
print("\nPer-class test accuracy:")
n_classes = 30
class_correct = list(0. for i in range(n_classes))
class_total = list(0. for i in range(n_classes))

with torch.no_grad():
    c = (predicted == y_test_tensor)
    for i in range(len(y_test_tensor)):
        label = y_test_tensor[i]
        class_correct[label] += c[i].item()
        class_total[label] += 1

for i in range(n_classes):
    if class_total[i] > 0:
        print(f'Accuracy of class {i}: {100 * class_correct[i] / class_total[i]:.2f}%')

Loading data...

Initializing model...

Starting training...
Epoch [1/100]
Training Loss: 1.0214
Training Accuracy: 0.6675
Validation Loss: 4.3725
Validation Accuracy: 0.0810
--------------------------------------------------
Epoch [2/100]
Training Loss: 0.6525
Training Accuracy: 0.7845
Validation Loss: 4.3053
Validation Accuracy: 0.1347
--------------------------------------------------
Epoch [3/100]
Training Loss: 0.5731
Training Accuracy: 0.8096
Validation Loss: 4.3579
Validation Accuracy: 0.0770
--------------------------------------------------
Epoch [4/100]
Training Loss: 0.5293
Training Accuracy: 0.8279
Validation Loss: 4.2040
Validation Accuracy: 0.1593
--------------------------------------------------
Epoch [5/100]
Training Loss: 0.4963
Training Accuracy: 0.8388
Validation Loss: 4.6408
Validation Accuracy: 0.1007
--------------------------------------------------
Epoch [6/100]
Training Loss: 0.4805
Training Accuracy: 0.8427
Validation Loss: 4.4679
Validation Accuracy: 0.0943


## Claude again againnnnnn
Self-attention + focal loss (?)

In [109]:
# Focal Loss implementation
class FocalLoss(nn.Module):
    def __init__(self, alpha=1, gamma=2):
        super(FocalLoss, self).__init__()
        self.gamma = gamma
        self.alpha = alpha
        self.ce = nn.CrossEntropyLoss(reduction='none')

    def forward(self, inputs, targets):
        ce_loss = self.ce(inputs, targets)
        pt = torch.exp(-ce_loss)
        focal_loss = self.alpha * (1-pt)**self.gamma * ce_loss
        return focal_loss.mean()

# Self-attention module
class SelfAttention(nn.Module):
    def __init__(self, in_dim):
        super(SelfAttention, self).__init__()
        self.query = nn.Linear(in_dim, in_dim)
        self.key = nn.Linear(in_dim, in_dim)
        self.value = nn.Linear(in_dim, in_dim)
        
    def forward(self, x):
        q = self.query(x)
        k = self.key(x)
        v = self.value(x)
        
        scores = torch.matmul(q, k.transpose(-2, -1)) / np.sqrt(x.size(-1))
        attention = torch.softmax(scores, dim=-1)
        out = torch.matmul(attention, v)
        return out

# Enhanced network architecture
class SpectraNN(nn.Module):
    def __init__(self, dropout_rate=0.3):
        super(SpectraNN, self).__init__()
        
        # Input processing
        self.input_bn = nn.BatchNorm1d(1000)
        self.input_attention = SelfAttention(1000)
        
        # Deeper architecture with residual connections
        self.encoder = nn.Sequential(
            nn.Linear(1000, 512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU(),
            nn.Dropout(dropout_rate),
            
            nn.Linear(512, 512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU(),
            nn.Dropout(dropout_rate)
        )
        
        self.attention = SelfAttention(512)
        
        self.decoder = nn.Sequential(
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.LeakyReLU(),
            nn.Dropout(dropout_rate),
            
            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.LeakyReLU(),
            nn.Dropout(dropout_rate),
            
            nn.Linear(128, 30)
        )
        
    def forward(self, x):
        x = self.input_bn(x)
        x = x.unsqueeze(1)
        x = self.input_attention(x)
        x = x.squeeze(1)
        
        x1 = self.encoder(x)
        x2 = self.attention(x1.unsqueeze(1)).squeeze(1)
        x = x1 + x2  # Residual connection
        
        x = self.decoder(x)
        return x

# Advanced data augmentation
def augment_spectra(spectra, noise_level=0.02):
    augmented = spectra.clone()
    
    # Random noise
    noise = torch.randn_like(spectra) * noise_level
    augmented += noise
    
    # Random scaling
    scale = 1.0 + (torch.rand_like(spectra) * 0.1 - 0.05)
    augmented *= scale
    
    # Random shifts
    shift = (torch.rand_like(spectra) * 0.02 - 0.01)
    augmented += shift
    
    # Random smoothing (randomly applied)
    if torch.rand(1) > 0.5:
        kernel_size = 3
        pad_size = kernel_size // 2
        padded = torch.nn.functional.pad(augmented.unsqueeze(1), (pad_size, pad_size), mode='reflect')
        augmented = torch.nn.functional.avg_pool1d(padded, kernel_size=kernel_size, stride=1).squeeze(1)
    
    return augmented

# Load and prepare data
print("Loading data...")
X_train, y_train = rp.datasets.bacteria("train", folder="../data/bacteria/")
X_test, y_test = rp.datasets.bacteria("test", folder="../data/bacteria/")
X_val, y_val = rp.datasets.bacteria("val", folder="../data/bacteria/")

# Convert to numpy arrays
X_train = np.array(X_train.spectral_data)
y_train = np.array(y_train)
X_test = np.array(X_test.spectral_data)
y_test = np.array(y_test)
X_val = np.array(X_val.spectral_data)
y_val = np.array(y_val)

# Apply StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

# Create DataLoaders
batch_size = 32
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
val_loader = DataLoader(val_dataset, batch_size=batch_size)

# Initialize model and training components
print("\nInitializing model...")
model = SpectraNN()
criterion = FocalLoss(gamma=2)
optimizer = optim.AdamW(model.parameters(), lr=0.0003, weight_decay=1e-4)
scheduler = CosineAnnealingLR(optimizer, T_max=100)

# Training loop
best_val_accuracy = 0
patience = 15
patience_counter = 0
epochs = 10

print("\nStarting training...")
for epoch in range(epochs):
    # Training phase
    model.train()
    train_loss = 0
    train_correct = 0
    train_total = 0
    
    for batch_idx, (X_batch, y_batch) in enumerate(train_loader):
        # Multiple augmentations per batch
        X_aug = torch.cat([augment_spectra(X_batch) for _ in range(2)])
        y_aug = torch.cat([y_batch for _ in range(2)])
        
        outputs = model(X_aug)
        loss = criterion(outputs, y_aug)
        
        optimizer.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()
        
        train_loss += loss.item()
        _, predicted = outputs.max(1)
        train_total += y_aug.size(0)
        train_correct += predicted.eq(y_aug).sum().item()
        
    # Validation phase
    model.eval()
    val_loss = 0
    val_correct = 0
    val_total = 0
    
    with torch.no_grad():
        for X_val, y_val in val_loader:
            outputs = model(X_val)
            loss = criterion(outputs, y_val)
            
            val_loss += loss.item()
            _, predicted = outputs.max(1)
            val_total += y_val.size(0)
            val_correct += predicted.eq(y_val).sum().item()
    
    # Print metrics
    train_accuracy = train_correct / train_total
    val_accuracy = val_correct / val_total
    
    print(f'Epoch: {epoch+1}')
    print(f'Train Loss: {train_loss/len(train_loader):.4f} | Train Acc: {train_accuracy:.4f}')
    print(f'Val Loss: {val_loss/len(val_loader):.4f} | Val Acc: {val_accuracy:.4f}')
    print('-' * 50)
    
    # Save best model
    if val_accuracy > best_val_accuracy:
        best_val_accuracy = val_accuracy
        torch.save(model.state_dict(), 'best_model.pth')
        patience_counter = 0
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print('Early stopping!')
            break
    
    scheduler.step()

# Test evaluation with ensemble predictions
print("\nEvaluating final model...")
model.load_state_dict(torch.load('best_model.pth'))
model.eval()

def ensemble_predict(model, X, n_augmentations=5):
    predictions = []
    with torch.no_grad():
        # Original prediction
        pred = model(X)
        predictions.append(pred)
        
        # Augmented predictions
        for _ in range(n_augmentations-1):
            aug_X = augment_spectra(X)
            pred = model(aug_X)
            predictions.append(pred)
    
    return torch.stack(predictions).mean(0)

# Final evaluation
outputs = ensemble_predict(model, X_test_tensor)
_, predicted = outputs.max(1)
accuracy = predicted.eq(y_test_tensor).float().mean()
print(f'\nFinal Test Accuracy: {accuracy:.4f}')

# Per-class accuracy
class_correct = list(0. for i in range(30))
class_total = list(0. for i in range(30))

with torch.no_grad():
    for i in range(len(y_test_tensor)):
        label = y_test_tensor[i]
        class_correct[label] += (predicted[i] == label).item()
        class_total[label] += 1

print('\nPer-class accuracy:')
for i in range(30):
    if class_total[i] > 0:
        print(f'Accuracy of class {i}: {100 * class_correct[i] / class_total[i]:.2f}%')

Loading data...

Splitting data...

Initializing model...

Starting training...
Epoch: 1
Train Loss: 0.5521 | Train Acc: 0.7592
Val Loss: 0.1264 | Val Acc: 0.9042
--------------------------------------------------
Epoch: 2
Train Loss: 0.2202 | Train Acc: 0.8583
Val Loss: 0.1099 | Val Acc: 0.9171
--------------------------------------------------
Epoch: 3
Train Loss: 0.1802 | Train Acc: 0.8804
Val Loss: 0.1029 | Val Acc: 0.9194
--------------------------------------------------
Epoch: 4
Train Loss: 0.1622 | Train Acc: 0.8898
Val Loss: 0.0959 | Val Acc: 0.9230
--------------------------------------------------
Epoch: 5
Train Loss: 0.1468 | Train Acc: 0.8989
Val Loss: 0.0869 | Val Acc: 0.9287
--------------------------------------------------
Epoch: 6
Train Loss: 0.1385 | Train Acc: 0.9020
Val Loss: 0.0912 | Val Acc: 0.9270
--------------------------------------------------
Epoch: 7
Train Loss: 0.1260 | Train Acc: 0.9089
Val Loss: 0.0934 | Val Acc: 0.9274
---------------------------------

  model.load_state_dict(torch.load('best_model.pth'))



Final Test Accuracy: 0.4000

Per-class accuracy:
Accuracy of class 0: 92.00%
Accuracy of class 1: 0.00%
Accuracy of class 2: 2.00%
Accuracy of class 3: 79.00%
Accuracy of class 4: 17.00%
Accuracy of class 5: 100.00%
Accuracy of class 6: 55.00%
Accuracy of class 7: 35.00%
Accuracy of class 8: 0.00%
Accuracy of class 9: 0.00%
Accuracy of class 10: 14.00%
Accuracy of class 11: 0.00%
Accuracy of class 12: 5.00%
Accuracy of class 13: 1.00%
Accuracy of class 14: 92.00%
Accuracy of class 15: 98.00%
Accuracy of class 16: 0.00%
Accuracy of class 17: 98.00%
Accuracy of class 18: 95.00%
Accuracy of class 19: 79.00%
Accuracy of class 20: 98.00%
Accuracy of class 21: 2.00%
Accuracy of class 22: 0.00%
Accuracy of class 23: 83.00%
Accuracy of class 24: 0.00%
Accuracy of class 25: 17.00%
Accuracy of class 26: 19.00%
Accuracy of class 27: 93.00%
Accuracy of class 28: 20.00%
Accuracy of class 29: 6.00%
