In [1]:
import time
import random
import torch
import pandas as pd
from sklearn.model_selection import ParameterSampler
from sklearn.preprocessing import LabelEncoder, StandardScaler
from torch.utils.data import DataLoader, Dataset
from transformers import BertModel, AdamW
from sklearn.metrics import f1_score
import warnings
warnings.filterwarnings("ignore")
feature =['HH_0.01_covariance_0_1',
  'HH_0.01_mean_0',
  'HH_0.01_radius_0_1',
  'HH_0.01_std_0',
  'HH_0.1_covariance_0_1',
  'HH_0.1_magnitude_0_1',
  'HH_0.1_std_0',
  'HH_0.1_weight_0',
  'HH_1_magnitude_0_1',
  'HH_1_pcc_0_1',
  'HH_1_radius_0_1',
  'HH_1_std_0',
  'HH_1_weight_0',
  'HH_3_covariance_0_1',
  'HH_3_magnitude_0_1',
  'HH_3_radius_0_1',
  'HH_3_std_0',
  'HH_3_weight_0',
  'HH_5_mean_0',
  'HH_5_pcc_0_1',
  'HH_5_std_0',
  'HH_5_weight_0',
  'HH_jit_0.01_std',
  'HH_jit_0.1_mean',
  'HH_jit_0.1_std',
  'HH_jit_1_mean',
  'HH_jit_1_std',
  'HH_jit_1_weight',
  'HH_jit_3_std',
  'HH_jit_5_std',
  'HpHp_0.01_magnitude_0_1',
  'HpHp_0.01_radius_0_1',
  'HpHp_0.01_weight_0',
  'HpHp_0.1_covariance_0_1',
  'HpHp_0.1_magnitude_0_1',
  'HpHp_0.1_mean_0',
  'HpHp_0.1_pcc_0_1',
  'HpHp_0.1_std_0',
  'HpHp_0.1_weight_0',
  'HpHp_1_covariance_0_1',
  'HpHp_1_magnitude_0_1',
  'HpHp_1_mean_0',
  'HpHp_1_pcc_0_1',
  'HpHp_1_radius_0_1',
  'HpHp_1_std_0',
  'HpHp_3_pcc_0_1',
  'HpHp_3_radius_0_1',
  'HpHp_5_pcc_0_1',
  'HpHp_5_radius_0_1',
  'HpHp_5_std_0',
  'HpHp_5_weight_0',
  'MI_dir_0.01_mean',
  'MI_dir_0.01_std',
  'MI_dir_0.01_weight',
  'MI_dir_0.1_mean',
  'MI_dir_0.1_std',
  'MI_dir_0.1_weight',
  'MI_dir_1_std',
  'MI_dir_1_weight',
  'MI_dir_3_mean',
  'MI_dir_5_mean',
  'MI_dir_5_std',
  'MI_dir_5_weight',
  'Label']
# Define the Transformer-based model
class IoTClassifier(torch.nn.Module):
    def __init__(self, input_dim, num_classes):
        super(IoTClassifier, self).__init__()
        self.fc1 = torch.nn.Linear(input_dim, 768)
        self.transformer = BertModel.from_pretrained('bert-base-uncased')
        self.fc2 = torch.nn.Linear(768, num_classes)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.transformer(inputs_embeds=x.unsqueeze(1)).last_hidden_state
        x = torch.mean(x, dim=1)  # Pooling
        x = self.fc2(x)
        return x

# Dataset class
class IoTDataset(Dataset):
    def __init__(self, features, labels):
        self.features = torch.tensor(features, dtype=torch.float32)
        self.labels = torch.tensor(labels, dtype=torch.long)

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

# Load data
train = pd.read_csv('./small/AD-S1.csv', usecols=feature)
test = pd.read_csv('./small/AD-S2.csv', usecols=feature)
train = train.replace(-9999, 0).fillna(0)
test = test.replace(-9999, 0).fillna(0)

X_train = train.drop(['Label'], axis=1)
y_train = train['Label']
X_test = test.drop(['Label'], axis=1)
y_test = test['Label']

# Encode labels and scale features
label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(y_train)
y_test = label_encoder.transform(y_test)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Hyperparameters to search over
param_grid = {
    'batch_size': [16, 32, 64],
    'learning_rate': [1e-5, 3e-5, 5e-5],
    'epochs': [3, 5, 7],
    'weight_decay': [0, 0.01, 0.1]
}
param_list = list(ParameterSampler(param_grid, n_iter=5, random_state=42))

# Train function with hyperparameters
def train_and_evaluate(params):
    print(f"Training with params: {params}")
    batch_size = params['batch_size']
    learning_rate = params['learning_rate']
    epochs = params['epochs']
    weight_decay = params['weight_decay']

    # Prepare DataLoader
    train_dataset = IoTDataset(X_train, y_train)
    test_dataset = IoTDataset(X_test, y_test)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size)

    # Initialize model, loss, optimizer
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = IoTClassifier(X_train.shape[1], len(label_encoder.classes_)).to(device)
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

    # Training loop
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for features, labels in train_loader:
            features, labels = features.to(device), labels.to(device)

            # Forward pass
            outputs = model(features)
            loss = criterion(outputs, labels)

            # Backpropagation and optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
        print(f'Epoch [{epoch + 1}/{epochs}], Loss: {total_loss / len(train_loader):.4f}')

    # Evaluation
    model.eval()
    all_preds, all_labels = [], []
    with torch.no_grad():
        for features, labels in test_loader:
            features, labels = features.to(device), labels.to(device)
            outputs = model(features)
            _, predicted = torch.max(outputs, 1)

            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    f1 = f1_score(all_labels, all_preds, average='macro')
    print(f'F1-Score: {f1:.4f}')
    return f1

# Perform Random Search
best_f1 = 0
best_params = None

for params in param_list:
    f1 = train_and_evaluate(params)
    if f1 > best_f1:
        best_f1 = f1
        best_params = params

print(f'Best F1-Score: {best_f1:.4f} with params: {best_params}')


Training with params: {'weight_decay': 0, 'learning_rate': 3e-05, 'epochs': 3, 'batch_size': 32}
Epoch [1/3], Loss: 1.6505
Epoch [2/3], Loss: 1.0269
Epoch [3/3], Loss: 0.8035
F1-Score: 0.7229
Training with params: {'weight_decay': 0, 'learning_rate': 1e-05, 'epochs': 3, 'batch_size': 16}
Epoch [1/3], Loss: 1.9111
Epoch [2/3], Loss: 1.2580
Epoch [3/3], Loss: 1.0174
F1-Score: 0.6513
Training with params: {'weight_decay': 0.01, 'learning_rate': 3e-05, 'epochs': 7, 'batch_size': 16}
Epoch [1/7], Loss: 1.6092
Epoch [2/7], Loss: 1.0050
Epoch [3/7], Loss: 0.7701
Epoch [4/7], Loss: 0.6550
Epoch [5/7], Loss: 0.5595
Epoch [6/7], Loss: 0.5052
Epoch [7/7], Loss: 0.4499
F1-Score: 0.8305
Training with params: {'weight_decay': 0.01, 'learning_rate': 3e-05, 'epochs': 3, 'batch_size': 32}
Epoch [1/3], Loss: 1.6331
Epoch [2/3], Loss: 1.0103
Epoch [3/3], Loss: 0.7837
F1-Score: 0.6788
Training with params: {'weight_decay': 0, 'learning_rate': 1e-05, 'epochs': 7, 'batch_size': 16}
Epoch [1/7], Loss: 1.9000

In [2]:
best_params

{'weight_decay': 0.01, 'learning_rate': 3e-05, 'epochs': 7, 'batch_size': 16}