# Predicting SARS-CoV-2 Variants - SVM Model

In [1]:
# Importing modules
import pandas as pd
import numpy as np
from src.scripts.preprocessing import preprocess
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
import joblib

In [2]:
# Data
predict_data = preprocess("data/test_set").spectra2df()

In [3]:
# Feature matrix
X_predict = predict_data.drop(["Class", "Sample"], axis=1)

y = predict_data["Class"]

# Scaling Feature matrix
sds = joblib.load("models/scaler.pkl") # Loading scaler params
scaled_X_predict = pd.DataFrame(sds.transform(X_predict), columns=X_predict.columns)

# Loading SVM model
model = joblib.load("models/SVM_model.pkl")

# Model prediction
y_pred = model.predict(scaled_X_predict.to_numpy())

In [4]:
print(f'Predicted SARS-CoV-2 Variants: {y_pred}')

Predicted SARS-CoV-2 Variants: ['Omicron' 'Kappa']


# Predicting SARS-CoV-2 Variants - BiLSTM Model

In [5]:
# Importing modules
from sklearn.preprocessing import LabelEncoder
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
import torch.optim as optim
from sklearn.preprocessing import LabelEncoder

In [6]:
# Label Encoder
label_encoder = LabelEncoder()
label_encoder = label_encoder.fit(['Wildtype', 'Kappa', 'Omicron', 'Delta'])

# Reshaping
scaled_X_predict = scaled_X_predict.to_numpy()
scaled_X_predict = scaled_X_predict.reshape(scaled_X_predict.shape[0], 1, scaled_X_predict.shape[1])

# Tensor
X_predict_tensor = torch.tensor(scaled_X_predict, dtype=torch.float32)
y_predict_tensor = torch.tensor(label_encoder.transform(predict_data["Class"]), dtype=torch.long)

# Dataloader
batch_size = 16
predict_loader = DataLoader(TensorDataset(X_predict_tensor, y_predict_tensor), batch_size=batch_size, shuffle=False)

In [7]:
# Attention
class Attention(nn.Module):
    def __init__(self, hidden_size):
        super(Attention, self).__init__()
        self.attn_weights = nn.Linear(hidden_size, 1)

    def forward(self, lstm_output):
        attn_scores = self.attn_weights(lstm_output).squeeze(-1)
        attn_weights = torch.softmax(attn_scores, dim=1)
        context = torch.sum(lstm_output * attn_weights.unsqueeze(-1), dim=1)
        return context

# BiLSTM Model
class BiLSTMClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes, dropout_prob=0.3):
        super(BiLSTMClassifier, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, 
                            dropout=dropout_prob, bidirectional=True)
        self.attention = Attention(hidden_size * 2)
        self.layer_norm = nn.LayerNorm(hidden_size * 2)
        self.dropout = nn.Dropout(dropout_prob)
        self.fc = nn.Linear(hidden_size * 2, num_classes)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.attention(out)
        out = self.layer_norm(out)
        out = self.dropout(out)
        out = self.fc(out)
        return out

# Model (BiLSTM Model with attention mechanism)
model = BiLSTMClassifier(1400, 256, 3, len(label_encoder.classes_))
print(model)

# Loss function
criterion = nn.CrossEntropyLoss()

# Optimizer
optimizer = optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-4)

# Learning rate scheduler
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5)

BiLSTMClassifier(
  (lstm): LSTM(1400, 256, num_layers=3, batch_first=True, dropout=0.3, bidirectional=True)
  (attention): Attention(
    (attn_weights): Linear(in_features=512, out_features=1, bias=True)
  )
  (layer_norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
  (dropout): Dropout(p=0.3, inplace=False)
  (fc): Linear(in_features=512, out_features=4, bias=True)
)


In [8]:
# Loading BiLSTM model
model = torch.load("models/BiLSTM_model.pth") 

# Model prediction
model.eval()
y_test, y_pred = [], []
with torch.no_grad():
    for X_batch, y_batch in predict_loader:
        X_batch, y_batch = X_batch.to("cpu"), y_batch.to("cpu")
        outputs = model(X_batch)
        _, predicted = torch.max(outputs, 1)
        y_test.extend(y_batch.cpu().numpy())
        y_pred.extend(predicted.cpu().numpy())

In [9]:
print(f'Predicted SARS-CoV-2 Variants: {list(label_encoder.inverse_transform(y_pred))}')

Predicted SARS-CoV-2 Variants: ['Kappa', 'Kappa']
