In [26]:
import numpy as np
import pandas as pd

from IPython.core.display import display, HTML
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.io as pio
from sklearn.feature_selection import SelectKBest, chi2, f_classif
from sklearn.preprocessing import StandardScaler
from scipy.stats import pearsonr
from sklearn.preprocessing import MinMaxScaler

import seaborn as sns
from importlib import reload
import matplotlib.pyplot as plt
import matplotlib
import warnings

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 500)
pd.set_option('display.expand_frame_repr', False)
# pd.set_option('max_colwidth', -1)
display(HTML("<style>div.output_scroll { height: 35em; }</style>"))

reload(plt)
%matplotlib inline
%config InlineBackend.figure_format ='retina'

warnings.filterwarnings('ignore')

# configure plotly graph objects
pio.renderers.default = 'iframe'
# pio.renderers.default = 'vscode'

pio.templates["ck_template"] = go.layout.Template(
    layout_colorway = px.colors.sequential.Viridis,
#     layout_hovermode = 'closest',
#     layout_hoverdistance = -1,
    layout_autosize=False,
    layout_width=800,
    layout_height=600,
    layout_font = dict(family="Calibri Light"),
    layout_title_font = dict(family="Calibri"),
    layout_hoverlabel_font = dict(family="Calibri Light"),
#     plot_bgcolor="white",
)

# pio.templates.default = 'seaborn+ck_template+gridon'
pio.templates.default = 'ck_template+gridon'
# pio.templates.default = 'seaborn+gridon'
# pio.templates

In [2]:
# Step 1: Install the Kaggle package
!pip install kaggle

# Step 2: Upload your Kaggle API token
from google.colab import files
files.upload()




Saving kaggle.json to kaggle.json


{'kaggle.json': b'{"username":"mrsiddy","key":"00b49b189856b15793e505e3bd64fb56"}'}

In [3]:

# Step 3: Create the Kaggle directory and move the API token there
!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

# Step 4: Download the dataset using the Kaggle API
!kaggle datasets download -d cnrieiit/mqttset

# Step 5: Unzip the downloaded dataset
!unzip mqttset.zip


Dataset URL: https://www.kaggle.com/datasets/cnrieiit/mqttset
License(s): CC-BY-NC-SA-4.0
Downloading mqttset.zip to /content
 99% 873M/879M [00:13<00:00, 64.3MB/s]
100% 879M/879M [00:13<00:00, 68.7MB/s]
Archive:  mqttset.zip
  inflating: Data/CSV/bruteforce.csv  
  inflating: Data/CSV/flood.csv      
  inflating: Data/CSV/legitimate_1w.csv  
  inflating: Data/CSV/malaria.csv    
  inflating: Data/CSV/malformed.csv  
  inflating: Data/CSV/slowite.csv    
  inflating: Data/FINAL_CSV/mqttdataset_reduced.csv  
  inflating: Data/FINAL_CSV/test30.csv  
  inflating: Data/FINAL_CSV/test30_augmented.csv  
  inflating: Data/FINAL_CSV/test30_reduced.csv  
  inflating: Data/FINAL_CSV/train70.csv  
  inflating: Data/FINAL_CSV/train70_augmented.csv  
  inflating: Data/FINAL_CSV/train70_reduced.csv  
  inflating: Data/PCAP/bruteforce.pcapng  
  inflating: Data/PCAP/capture_1w.pcap  
  inflating: Data/PCAP/capture_flood.pcap  
  inflating: Data/PCAP/capture_malariaDoS.pcap  
  inflating: Data/PCAP/ma

In [27]:
# Load datasets
dftrain = pd.read_csv("./Data/FINAL_CSV/train70_reduced.csv")
dftest = pd.read_csv("./Data/FINAL_CSV/test30_reduced.csv")


In [28]:
dftrain.head()

Unnamed: 0,tcp.flags,tcp.time_delta,tcp.len,mqtt.conack.flags,mqtt.conack.flags.reserved,mqtt.conack.flags.sp,mqtt.conack.val,mqtt.conflag.cleansess,mqtt.conflag.passwd,mqtt.conflag.qos,mqtt.conflag.reserved,mqtt.conflag.retain,mqtt.conflag.uname,mqtt.conflag.willflag,mqtt.conflags,mqtt.dupflag,mqtt.hdrflags,mqtt.kalive,mqtt.len,mqtt.msg,mqtt.msgid,mqtt.msgtype,mqtt.proto_len,mqtt.protoname,mqtt.qos,mqtt.retain,mqtt.sub.qos,mqtt.suback.qos,mqtt.ver,mqtt.willmsg,mqtt.willmsg_len,mqtt.willtopic,mqtt.willtopic_len,target
0,0x00000018,0.998867,10,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0x00000030,0.0,8.0,32,0.0,3.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,legitimate
1,0x00000010,6.7e-05,1460,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0x00000032,0.0,169.0,6361653943666144654266454162444634326230633041...,2714.0,3.0,0.0,0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,dos
2,0x00000010,5.8e-05,1460,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0x00000032,0.0,163.0,4232646141394333463334613232626446326646383446...,1548.0,3.0,0.0,0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,dos
3,0x00000018,0.000227,10,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0x00000030,0.0,8.0,32,0.0,3.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,legitimate
4,0x00000018,0.000236,16,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0x00000040,0.0,2.0,0,2800.0,4.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,dos


In [29]:
dftrain['target'].unique()

array(['legitimate', 'dos', 'malformed', 'bruteforce', 'slowite', 'flood'],
      dtype=object)

In [30]:
# Function to preprocess data
def preprocess_data(df):
    df = df.astype('category')
    cat_columns = df.select_dtypes(['category']).columns
    df[cat_columns] = df[cat_columns].apply(lambda x: x.cat.codes)
    return df

In [31]:
# Preprocess training data
dftrain = preprocess_data(dftrain)
dftest = preprocess_data(dftest)

In [79]:
# Preprocess test data
x_train = dftrain.drop('target', axis=1)
y_train = dftrain['target']
x_test = dftest.drop('target', axis=1)
y_test = dftest['target']

print("Ready to generate train and test datasets")

Ready to generate train and test datasets


In [35]:
from sklearn.feature_selection import RFE
from sklearn.ensemble import RandomForestClassifier
def select_top_k_pearson(X, y, k=20):
    correlations = []
    for col in X.columns:
        corr, _ = pearsonr(X[col], y)
        correlations.append(abs(corr))

    top_k_indices = np.argsort(correlations)[-k:]
    return X.columns[top_k_indices]

def select_top_k_chi2(X, y, k=20):
    scaler = MinMaxScaler()
    X_scaled = scaler.fit_transform(X)
    chi2_selector = SelectKBest(chi2, k=k)
    chi2_selector.fit(X_scaled, y)
    return X.columns[chi2_selector.get_support()]

def select_top_k_anova(X, y, k=20):
    anova_selector = SelectKBest(f_classif, k=k)
    anova_selector.fit(X, y)
    return X.columns[anova_selector.get_support()]

def select_top_k_rfe(X, y, k=20):
    model = RandomForestClassifier(random_state=42)
    rfe_selector = RFE(estimator=model, n_features_to_select=k, step=1)
    rfe_selector.fit(X, y)
    return X.columns[rfe_selector.get_support()]


top_20_pearson = select_top_k_pearson(x_train, y_train, k=20)
top_20_chi2 = select_top_k_chi2(x_train, y_train, k=20)
top_20_anova = select_top_k_anova(x_train, y_train, k=20)
top_20_rfe = select_top_k_rfe(x_train, y_train, k=20)

#test
top_20_pearson = select_top_k_pearson(x_test, y_test, k=20)
top_20_chi2 = select_top_k_chi2(x_test, y_test, k=20)
top_20_anova = select_top_k_anova(x_test, y_test, k=20)
top_20_rfe = select_top_k_rfe(x_test, y_test, k=20)

In [80]:
selected_features = set(top_20_rfe)
final_selected_features = list(selected_features)[:20]

X_selected = x_train[final_selected_features]

# Combine the selected features
# selected_features = set(top_20_pearson).union(set(top_20_chi2)).union(set(top_20_anova))
# Only pearson features
selected_features = set(top_20_rfe)
final_selected_features = list(selected_features)[:20]  # If you want exactly 20 features

# Filter the dataset to keep only the selected features
X_selected_test = x_test[final_selected_features]

In [81]:
x_train, x_test, y_train, y_test = X_selected.values, X_selected_test.values, y_train.values, y_test.values

In [82]:
import torch
X_train_tensor = torch.tensor(x_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_test_tensor = torch.tensor(x_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

In [83]:
X_train_tensor.shape

torch.Size([231646, 20])

In [69]:
!pip install lora-adapters peft



# TABNET with LoRA

In [70]:
pip install --upgrade peft



In [None]:
from sparsemax import Sparsemax
import torch
from peft import LoraConfig

In [84]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import math
import numpy as np
from torch.utils.data import TensorDataset, DataLoader
from torch.optim import Adam

# GLU Block
def glu(act, n_units):
    return act[:, :n_units] * torch.sigmoid(act[:, n_units:])

class LoRALinear(nn.Module):
    def __init__(self, in_features, out_features, rank, alpha=1):
        super(LoRALinear, self).__init__()
        self.rank = rank
        self.alpha = alpha

        # Original weight is frozen during training
        self.orig_weight = nn.Parameter(torch.Tensor(out_features, in_features), requires_grad=False)
        nn.init.kaiming_uniform_(self.orig_weight, a=math.sqrt(5))

        # Low-rank matrices U and V
        self.U = nn.Parameter(torch.Tensor(out_features, rank))
        self.V = nn.Parameter(torch.Tensor(rank, in_features))
        nn.init.kaiming_normal_(self.U)
        nn.init.kaiming_normal_(self.V)

    def forward(self, x):
        weight = self.orig_weight + self.alpha * self.U @ self.V
        return F.linear(x, weight)

    def extra_repr(self):
        return 'in_features={}, out_features={}, rank={}, alpha={}'.format(
            self.orig_weight.size(1), self.orig_weight.size(0), self.rank, self.alpha
        )

class TabNetModel(nn.Module):
    def __init__(
        self,
        num_features=56,
        feature_dims=56,
        output_dim=56,
        num_decision_steps=6,
        relaxation_factor=0.5,
        batch_momentum=0.001,
        virtual_batch_size=2,
        num_classes=2,
        epsilon=0.00001
    ):
        super().__init__()

        self.num_features = num_features
        self.feature_dims = feature_dims
        self.output_dim = output_dim
        self.num_decision_steps = num_decision_steps
        self.relaxation_factor = relaxation_factor
        self.batch_momentum = batch_momentum
        self.virtual_batch_size = virtual_batch_size
        self.num_classes = num_classes
        self.epsilon = epsilon

        self.feature_transform_linear1 = LoRALinear(num_features, self.feature_dims * 2, rank=16)
        self.BN = torch.nn.BatchNorm1d(num_features, momentum=batch_momentum)
        self.BN1 = torch.nn.BatchNorm1d(self.feature_dims * 2, momentum=batch_momentum)

        self.feature_transform_linear2 = torch.nn.Linear(self.feature_dims * 2, self.feature_dims * 2, bias=False)
        self.feature_transform_linear3 = torch.nn.Linear(self.feature_dims, self.feature_dims * 2, bias=False)
        self.feature_transform_linear4 = torch.nn.Linear(self.feature_dims * 2, self.feature_dims * 2, bias=False)

        self.mask_linear_layer = torch.nn.Linear(self.feature_dims * 2 - output_dim, self.num_features, bias=False)
        self.BN2 = torch.nn.BatchNorm1d(self.num_features, momentum=batch_momentum)

        self.final_classifier_layer = torch.nn.Linear(self.output_dim, self.num_classes, bias=False)
        self.sparsemax = nn.Softmax(dim=1)  # Changed to nn.Softmax for compatibility

    def encoder(self, data):
        batch_size = data.shape[0]
        features = self.BN(data)
        output_aggregated = torch.zeros([batch_size, self.output_dim], dtype=torch.float).to(device)

        masked_features = features
        mask_values = torch.zeros([batch_size, self.num_features]).to(device)

        aggregated_mask_values = torch.zeros([batch_size, self.num_features]).to(device)
        complemantary_aggregated_mask_values = torch.ones([batch_size, self.num_features]).to(device)

        total_entropy = 0

        for ni in range(self.num_decision_steps):
            if ni == 0:
                transform_f1 = self.feature_transform_linear1(masked_features)
                norm_transform_f1 = self.BN1(transform_f1)
                transform_f2 = self.feature_transform_linear2(norm_transform_f1)
                norm_transform_f2 = self.BN1(transform_f2)
            else:
                transform_f1 = self.feature_transform_linear1(masked_features)
                norm_transform_f1 = self.BN1(transform_f1)
                transform_f2 = self.feature_transform_linear2(norm_transform_f1)
                norm_transform_f2 = self.BN1(transform_f2)
                transform_f2 = (glu(norm_transform_f2, self.feature_dims) + transform_f1[:, :self.feature_dims]) * np.sqrt(0.5)
                transform_f3 = self.feature_transform_linear3(transform_f2)
                norm_transform_f3 = self.BN1(transform_f3)
                transform_f4 = self.feature_transform_linear4(norm_transform_f3)
                norm_transform_f4 = self.BN1(transform_f4)
                transform_f4 = (glu(norm_transform_f4, self.feature_dims) + transform_f3[:, :self.feature_dims]) * np.sqrt(0.5)
                decision_out = torch.nn.ReLU(inplace=True)(transform_f4[:, :self.output_dim])
                output_aggregated = torch.add(decision_out, output_aggregated)
                scale_agg = torch.sum(decision_out, axis=1, keepdim=True) / (self.num_decision_steps - 1)
                aggregated_mask_values = torch.add(aggregated_mask_values, mask_values * scale_agg)
                features_for_coef = transform_f4[:, :]
                if ni < (self.num_decision_steps - 1):
                    mask_linear_layer = self.mask_linear_layer(features_for_coef)
                    mask_linear_norm = self.BN2(mask_linear_layer)
                    mask_linear_norm = torch.mul(mask_linear_norm, complemantary_aggregated_mask_values)
                    mask_values = self.sparsemax(mask_linear_norm)
                    complemantary_aggregated_mask_values = torch.mul(complemantary_aggregated_mask_values, self.relaxation_factor - mask_values)
                    total_entropy = torch.add(total_entropy, torch.mean(torch.sum(-mask_values * torch.log(mask_values + self.epsilon), axis=1)) / (self.num_decision_steps - 1))
                    masked_features = torch.mul(mask_values, features)

        return output_aggregated, total_entropy

    def classify(self, output_logits):
        logits = self.final_classifier_layer(output_logits)
        predictions = F.softmax(logits, dim=1)
        return logits, predictions

In [72]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [85]:
import torch

# Load saved model
num_features = X_test_tensor.shape[1]
model = TabNetModel(num_features=num_features, num_classes=2)

# Load model weights onto CPU
model.load_state_dict(torch.load("/content/drive/MyDrive/TabLoRA_pretrained_models/tabnet_unsw_v2_rfe.pt", map_location=torch.device('cpu')))
model = model.to(device)
model.eval()

# Zero-shot testing
with torch.no_grad():
    X_test_tensor = X_test_tensor.to(device)
    y_test_tensor = y_test_tensor.to(device)

    # Forward pass
    output, _ = model.encoder(X_test_tensor)
    logits, predictions = model.classify(output)
    predicted_labels = torch.argmax(predictions, dim=1)

In [86]:
y_train_tensor = torch.where(y_train_tensor > 0, torch.tensor(1), y_train_tensor)
y_test_tensor = torch.where(y_test_tensor > 0, torch.tensor(1), y_test_tensor)


In [44]:
# person
from sklearn.metrics import f1_score, precision_score, recall_score

# Zero-shot testing
with torch.no_grad():
    X_test_tensor = X_test_tensor.to(device)
    y_test_tensor = y_test_tensor.to(device)

    # Forward pass
    output, _ = model.encoder(X_test_tensor)
    logits, predictions = model.classify(output)
    predicted_labels = torch.argmax(predictions, dim=1)

y_true = y_test_tensor.cpu().numpy()
y_pred = predicted_labels.cpu().numpy()

# Calculate accuracy
accuracy = (predicted_labels == y_test_tensor).sum().item() / y_test_tensor.size(0)
print(f'Zero-shot testing accuracy: {accuracy:.4f}')

# Calculate precision, recall, and F1 score
precision = precision_score(y_true, y_pred, average='weighted')
recall = recall_score(y_true, y_pred, average='weighted')
f1 = f1_score(y_true, y_pred, average='weighted')

print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1 Score: {f1:.4f}')

Zero-shot testing accuracy: 0.7192
Precision: 0.9035
Recall: 0.7192
F1 Score: 0.8002


In [23]:
from sklearn.metrics import f1_score, precision_score, recall_score
#anova

# Zero-shot testing
with torch.no_grad():
    X_test_tensor = X_test_tensor.to(device)
    y_test_tensor = y_test_tensor.to(device)

    # Forward pass
    output, _ = model.encoder(X_test_tensor)
    logits, predictions = model.classify(output)
    predicted_labels = torch.argmax(predictions, dim=1)

y_true = y_test_tensor.cpu().numpy()
y_pred = predicted_labels.cpu().numpy()

# Calculate accuracy
accuracy = (predicted_labels == y_test_tensor).sum().item() / y_test_tensor.size(0)
print(f'Zero-shot testing accuracy: {accuracy:.4f}')

# Calculate precision, recall, and F1 score
precision = precision_score(y_true, y_pred, average='weighted')
recall = recall_score(y_true, y_pred, average='weighted')
f1 = f1_score(y_true, y_pred, average='weighted')

print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1 Score: {f1:.4f}')

Zero-shot testing accuracy: 0.4621
Precision: 0.8787
Recall: 0.4621
F1 Score: 0.6034


In [87]:
# rfe k selection

from sklearn.metrics import f1_score, precision_score, recall_score

# Zero-shot testing
with torch.no_grad():
    X_test_tensor = X_test_tensor.to(device)
    y_test_tensor = y_test_tensor.to(device)

    # Forward pass
    output, _ = model.encoder(X_test_tensor)
    logits, predictions = model.classify(output)
    predicted_labels = torch.argmax(predictions, dim=1)

y_true = y_test_tensor.cpu().numpy()
y_pred = predicted_labels.cpu().numpy()

# Calculate accuracy
accuracy = (predicted_labels == y_test_tensor).sum().item() / y_test_tensor.size(0)
print(f'Zero-shot testing accuracy: {accuracy:.4f}')

# Calculate precision, recall, and F1 score
precision = precision_score(y_true, y_pred, average='weighted')
recall = recall_score(y_true, y_pred, average='weighted')
f1 = f1_score(y_true, y_pred, average='weighted')

print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1 Score: {f1:.4f}')

Zero-shot testing accuracy: 0.5368
Precision: 0.9505
Recall: 0.5368
F1 Score: 0.6589


## Few-shots transfer learning

In [88]:
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

num_features = X_train_tensor.shape[1]
model = TabNetModel(num_features=num_features, num_classes=2)
model.load_state_dict(torch.load("/content/drive/MyDrive/TabLoRA_pretrained_models/tabnet_bot_iot_v2_rfe.pt"))
model = model.to(device)

for name, param in model.named_parameters():
    if 'U' in name or 'V' in name:
        param.requires_grad = True
    else:
        param.requires_grad = False

optimizer = Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.001)
loss_function = torch.nn.CrossEntropyLoss()

def train_model(model, train_loader, epochs=10):
    for epoch in range(epochs):
        model.train()
        total_loss = 0
        for X_batch, y_batch in train_loader:
            X_batch = X_batch.to(device)
            y_batch = y_batch.to(device)
            optimizer.zero_grad()
            output, _ = model.encoder(X_batch)
            loss = loss_function(model.classify(output)[0], y_batch)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch {epoch+1}, Loss: {total_loss / len(train_loader)}")

train_model(model, train_loader)

Epoch 1, Loss: 0.11543825326194132
Epoch 2, Loss: 0.08810775496749959
Epoch 3, Loss: 0.07763617301683935
Epoch 4, Loss: 0.07035979143541965
Epoch 5, Loss: 0.06755153415210093
Epoch 6, Loss: 0.06572565134152904
Epoch 7, Loss: 0.06459729390975232
Epoch 8, Loss: 0.06375047953873976
Epoch 9, Loss: 0.06303270533256418
Epoch 10, Loss: 0.06229372694753934


In [42]:
#pearson
from sklearn.metrics import accuracy_score, f1_score, precision_recall_curve, average_precision_score
import matplotlib.pyplot as plt

def evaluate_model(model, X_test_tensor, y_test_tensor):
  model.eval()
  with torch.no_grad():
    X_test_batch, y_test_batch = X_test_tensor.numpy(), y_test_tensor.numpy()
    X_test_batch = torch.tensor(X_test_batch)
    y_test_batch = torch.tensor(y_test_batch)
    X_test_batch, y_test_batch = X_test_batch.to(device), y_test_batch.to(device)
    output, _ = model.encoder(X_test_batch)
    logits, predictions = model.classify(output)
    print(logits.shape, predictions.shape)
    accuracy = accuracy_score(y_test_batch.cpu(), logits.argmax(dim=1).cpu())
    f1 = f1_score(y_test_batch.cpu(), predictions.argmax(dim=1).cpu())
    precision, recall, _ = precision_recall_curve(y_test_batch.cpu(), predictions[:, 1].cpu())
    average_precision = average_precision_score(y_test_batch.cpu(), predictions[:, 1].cpu())

    print("Accuracy:", accuracy)
    print("F1 Score:", f1)
    print("Average Precision:", average_precision)

  return accuracy, f1, average_precision

accuracy, f1, average_precision = evaluate_model(model, X_test_tensor, y_test_tensor)

torch.Size([99290, 2]) torch.Size([99290, 2])
Accuracy: 0.9565817302850237
F1 Score: 0.9777981490732491
Average Precision: 0.9901014167554516


In [25]:
#anova
from sklearn.metrics import accuracy_score, f1_score, precision_recall_curve, average_precision_score, recall_score

def evaluate_model(model, X_test_tensor, y_test_tensor):
    model.eval()
    with torch.no_grad():
        X_test_batch, y_test_batch = X_test_tensor.numpy(), y_test_tensor.numpy()

        X_test_batch = torch.tensor(X_test_batch).to(device)
        y_test_batch = torch.tensor(y_test_batch).to(device)

        output, _ = model.encoder(X_test_batch)
        logits, predictions = model.classify(output)

        accuracy = accuracy_score(y_test_batch.cpu(), logits.argmax(dim=1).cpu())
        f1 = f1_score(y_test_batch.cpu(), predictions.argmax(dim=1).cpu())
        recall = recall_score(y_test_batch.cpu(), predictions.argmax(dim=1).cpu())
        precision, recall_curve, _ = precision_recall_curve(y_test_batch.cpu(), predictions[:, 1].cpu())
        average_precision = average_precision_score(y_test_batch.cpu(), predictions[:, 1].cpu())

        print("Accuracy:", accuracy)
        print("F1 Score:", f1)
        print("Recall:", recall)
        print("Average Precision:", average_precision)

    return accuracy, f1, recall, average_precision

accuracy, f1, recall, average_precision = evaluate_model(model, X_test_tensor, y_test_tensor)


Accuracy: 0.443498841776614
F1 Score: 0.6141653108394026
Recall: 0.4632132211209303
Average Precision: 0.919195917158424


In [92]:
#rfe
from sklearn.metrics import accuracy_score, f1_score, precision_recall_curve, average_precision_score, recall_score

def evaluate_model(model, X_test_tensor, y_test_tensor):
    model.eval()
    with torch.no_grad():
        X_test_batch, y_test_batch = X_test_tensor.numpy(), y_test_tensor.numpy()

        X_test_batch = torch.tensor(X_test_batch).to(device)
        y_test_batch = torch.tensor(y_test_batch).to(device)

        output, _ = model.encoder(X_test_batch)
        logits, predictions = model.classify(output)

        accuracy = accuracy_score(y_test_batch.cpu(), logits.argmax(dim=1).cpu())
        f1 = f1_score(y_test_batch.cpu(), predictions.argmax(dim=1).cpu())
        recall = recall_score(y_test_batch.cpu(), predictions.argmax(dim=1).cpu())
        precision, recall_curve, _ = precision_recall_curve(y_test_batch.cpu(), predictions[:, 1].cpu())
        average_precision = average_precision_score(y_test_batch.cpu(), predictions[:, 1].cpu())

        print("Accuracy:", accuracy)
        print("F1 Score:", f1)
        print("Recall:", recall)
        print("Average Precision:", average_precision)

    return accuracy, f1, recall, average_precision

accuracy, f1, recall, average_precision = evaluate_model(model, X_test_tensor, y_test_tensor)


Accuracy: 0.6645986504179675
F1 Score: 0.6818245748294922
Recall: 0.7
Average Precision: 0.6946559224564999


## Going forward

In [None]:
# what if we want to train more lora layers :)
class TabNetModel(nn.Module):
    def __init__(
        self,
        num_features=56,
        feature_dims=56,
        output_dim=56,
        num_decision_steps=6,
        relaxation_factor=0.5,
        batch_momentum=0.001,
        virtual_batch_size=2,
        num_classes=2,
        epsilon=0.00001
    ):
        super().__init__()

        self.num_features = num_features
        self.feature_dims = feature_dims
        self.output_dim = output_dim
        self.num_decision_steps = num_decision_steps
        self.relaxation_factor = relaxation_factor
        self.batch_momentum = batch_momentum
        self.virtual_batch_size = virtual_batch_size
        self.num_classes = num_classes
        self.epsilon = epsilon

        self.feature_transform_linear1 = LoRALinear(num_features, self.feature_dims * 2, rank=16)
        self.feature_transform_linear2 = LoRALinear(self.feature_dims * 2, self.feature_dims * 2, rank=16)
        self.feature_transform_linear3 = LoRALinear(self.feature_dims, self.feature_dims * 2, rank=16)
        self.feature_transform_linear4 = LoRALinear(self.feature_dims * 2, self.feature_dims * 2, rank=16)

        self.BN = torch.nn.BatchNorm1d(num_features, momentum=batch_momentum)
        self.BN1 = torch.nn.BatchNorm1d(self.feature_dims * 2, momentum=batch_momentum)
        self.BN2 = torch.nn.BatchNorm1d(self.num_features, momentum=batch_momentum)

        self.mask_linear_layer = torch.nn.Linear(self.feature_dims * 2 - output_dim, self.num_features, bias=False)
        self.final_classifier_layer = torch.nn.Linear(self.output_dim, self.num_classes, bias=False)
        self.sparsemax = nn.Softmax(dim=1)

    def encoder(self, data):
        batch_size = data.shape[0]
        features = self.BN(data)
        output_aggregated = torch.zeros([batch_size, self.output_dim], dtype=torch.float).to(device)

        masked_features = features
        mask_values = torch.zeros([batch_size, self.num_features]).to(device)

        aggregated_mask_values = torch.zeros([batch_size, self.num_features]).to(device)
        complemantary_aggregated_mask_values = torch.ones([batch_size, self.num_features]).to(device)

        total_entropy = 0

        for ni in range(self.num_decision_steps):
            if ni == 0:
                transform_f1 = self.feature_transform_linear1(masked_features)
                norm_transform_f1 = self.BN1(transform_f1)
                transform_f2 = self.feature_transform_linear2(norm_transform_f1)
                norm_transform_f2 = self.BN1(transform_f2)
            else:
                transform_f1 = self.feature_transform_linear1(masked_features)
                norm_transform_f1 = self.BN1(transform_f1)
                transform_f2 = self.feature_transform_linear2(norm_transform_f1)
                norm_transform_f2 = self.BN1(transform_f2)
                transform_f2 = (glu(norm_transform_f2, self.feature_dims) + transform_f1[:, :self.feature_dims]) * np.sqrt(0.5)
                transform_f3 = self.feature_transform_linear3(transform_f2)
                norm_transform_f3 = self.BN1(transform_f3)
                transform_f4 = self.feature_transform_linear4(norm_transform_f3)
                norm_transform_f4 = self.BN1(transform_f4)
                transform_f4 = (glu(norm_transform_f4, self.feature_dims) + transform_f3[:, :self.feature_dims]) * np.sqrt(0.5)
                decision_out = torch.nn.ReLU(inplace=True)(transform_f4[:, :self.output_dim])
                output_aggregated = torch.add(decision_out, output_aggregated)
                scale_agg = torch.sum(decision_out, axis=1, keepdim=True) / (self.num_decision_steps - 1)
                aggregated_mask_values = torch.add(aggregated_mask_values, mask_values * scale_agg)
                features_for_coef = transform_f4[:, :]
                if ni < (self.num_decision_steps - 1):
                    mask_linear_layer = self.mask_linear_layer(features_for_coef)
                    mask_linear_norm = self.BN2(mask_linear_layer)
                    mask_linear_norm = torch.mul(mask_linear_norm, complemantary_aggregated_mask_values)
                    mask_values = self.sparsemax(mask_linear_norm)
                    complemantary_aggregated_mask_values = torch.mul(complemantary_aggregated_mask_values, self.relaxation_factor - mask_values)
                    total_entropy = torch.add(total_entropy, torch.mean(torch.sum(-mask_values * torch.log(mask_values + self.epsilon), axis=1)) / (self.num_decision_steps - 1))
                    masked_features = torch.mul(mask_values, features)

        return output_aggregated, total_entropy

    def classify(self, output_logits):
        logits = self.final_classifier_layer(output_logits)
        predictions = F.softmax(logits, dim=1)
        return logits, predictions


In [None]:
import torch
from torch.utils.data import TensorDataset, DataLoader
from torch.optim import Adam
import math
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Load the saved model
num_features = X_train_tensor.shape[1]
model = TabNetModel(num_features=num_features, num_classes=2)
model.load_state_dict(torch.load('path_to_save_model/tabnet_with_lora.pt'))
model = model.to(device)

# Freeze the first LoRA layer
for name, param in model.named_parameters():
    if 'feature_transform_linear1.U' in name or 'feature_transform_linear1.V' in name:
        param.requires_grad = False
    else:
        param.requires_grad = True

# Create DataLoader for the new dataset
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

# Define optimizer and loss function
optimizer = Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.02)
loss_function = torch.nn.CrossEntropyLoss()

# Training loop
def train_model(model, train_loader, epochs=10):
    for epoch in range(epochs):
        model.train()
        total_loss = 0
        for X_batch, y_batch in train_loader:
            X_batch = X_batch.to(device)
            y_batch = y_batch.to(device)
            optimizer.zero_grad()
            output, _ = model.encoder(X_batch)
            loss = loss_function(model.classify(output)[0], y_batch)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch {epoch+1}, Loss: {total_loss / len(train_loader)}")

train_model(model, train_loader)

# Save the model with the newly trained LoRA layers
torch.save(model.state_dict(), 'path_to_save_model/tabnet_with_new_lora.pt')

# Zero-shot testing on the new dataset
model.eval()
with torch.no_grad():
    X_test_tensor = X_test_tensor.to(device)
    y_test_tensor = y_test_tensor.to(device)
    output, _ = model.encoder(X_test_tensor)
    logits, predictions = model.classify(output)
    predicted_labels = torch.argmax(predictions, dim=1)

# Convert predictions and labels to CPU for sklearn metrics
predicted_labels_cpu = predicted_labels.cpu()
y_test_tensor_cpu = y_test_tensor.cpu()

# Evaluate performance
accuracy = accuracy_score(y_test_tensor_cpu, predicted_labels_cpu)
precision = precision_score(y_test_tensor_cpu, predicted_labels_cpu, average='weighted')
recall = recall_score(y_test_tensor_cpu, predicted_labels_cpu, average='weighted')
f1 = f1_score(y_test_tensor_cpu, predicted_labels_cpu, average='weighted')

print(f'Zero-shot testing accuracy: {accuracy:.4f}')
print(f'Zero-shot testing precision: {precision:.4f}')
print(f'Zero-shot testing recall: {recall:.4f}')
print(f'Zero-shot testing F1 score: {f1:.4f}')
