# MFCC PCA 실험

In [4]:
import pandas as pd 

train_merge_pca = pd.read_csv('nsl_kdd_train_mfccpca.csv')
test_merge_pca = pd.read_csv('nsl_kdd_test_mfccpca.csv')

In [5]:
import torch
from torch.utils.data import Dataset, DataLoader
import pandas as pd

class CustomDataset(Dataset):
    def __init__(self, dataframe):
        self.data = dataframe.iloc[:, :-1].values  # 마지막 열을 제외한 데이터
        self.labels = dataframe.iloc[:, -1].values  # 마지막 열을 라벨 데이터로 사용

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sample = {'data': torch.tensor(self.data[idx], dtype=torch.float32),
                  'label': torch.tensor(self.labels[idx], dtype=torch.long)}
        return sample

In [6]:
train_dataset = CustomDataset(train_merge_pca)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, drop_last=True)

test_dataset = CustomDataset(test_merge_pca)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=True, drop_last=True)

# Swin Transformer

In [None]:
import torch.nn as nn
import torch.optim as optim
from swin_transformer import SwinTimeSeriesTransformer 
import torch

# Check if GPU is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = SwinTimeSeriesTransformer(
    input_dim=1,
    seq_len=15,  # Now divisible by patch_size
    patch_size=3,  # Adjusted patch size
    num_classes=2,
    dim=64,
    depth=6,
    num_heads=8,
    window_size=3, 
    mlp_ratio=4.0,
    dropout=0.1
    
)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

from sklearn.metrics import f1_score, classification_report
import numpy as np
from tqdm.notebook import trange

valid_loss_min = np.inf
val_loss = []
val_acc = []
train_loss = []
train_acc = []
total_step = len(train_loader)
valid_loss = np.inf  # train acc
f1_ = 0  # val f1
epoch_in = trange(100, desc='training')
best_acc=0

for epoch in epoch_in:
    model.to(device)
    model.train()
    running_loss = 0.0
    correct = 0
    total=0
    
    preds_ = []
    targets_ = []

    for batch_idx, train_dict in enumerate(train_loader):
        print(f"Batch {batch_idx}, data shape: {train_dict['data'].shape}")

        seq_len = 12

        inputs = train_dict['data'].to(device).float()
        inputs = inputs.reshape(inputs.size(0), seq_len, 1)  # Reshape to (batch_size, seq_len, input_dim)

        labels = train_dict['label'].to(device).long()

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        _,pred = torch.max(outputs, dim=1)
        correct += torch.sum(pred==labels).item()
        total += labels.size(0)
        # if (batch_idx) % 1000 == 0:
        #     print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
        #            .format(epoch, n_epochs, batch_idx, total_step, loss.item()))

    train_acc.append(100 * correct / total)
    train_loss.append(running_loss/total_step)
    print(f'\ntrain loss: {np.mean(train_loss):.4f}, train acc: {(100 * correct / total):.4f}')

    
    batch_loss = 0
    total_t = 0
    correct_t = 0

preds_ = []
targets_ = []

with torch.no_grad():
    model.to(device)
    model.eval()
    for test_dict in test_loader:  # You might want to use a validation loader here
        data_t = test_dict['data'].to(device).float()
        inputs = data_t.reshape(data_t.size(0), seq_len, 1)  # Reshaping to match input_dim (1)

        target_t = test_dict['label'].to(device).long()

        outputs_t = model(inputs)  # Pass the reshaped inputs

        pred = outputs_t.argmax(dim=1).to(device)
        preds_.append(pred)  # Append tensors to list
        targets_.append(target_t)  # Append tensors to list

        loss_t = criterion(outputs_t, target_t)
        batch_loss += loss_t.item()
        _, pred_t = torch.max(outputs_t, dim=1)
        correct_t += torch.sum(pred_t == target_t).item()
        total_t += target_t.size(0)

    # Concatenate all predictions and targets after the loop
    preds_ = torch.cat(preds_).detach().cpu().numpy()
    targets_ = torch.cat(targets_).detach().cpu().numpy()

    f1score = f1_score(targets_, preds_, average='macro')
    if best_acc < f1score:
        best_acc = f1score
        with open("0117_nslkdd_swin_transformer_mfcc.txt", "a") as text_file:
            print('epoch=====', epoch, file=text_file)
            print(classification_report(targets_, preds_, digits=4), file=text_file)
        torch.save(model, f'0117_nslkdd_swin_transformer_mfcc.pt')
    epoch_in.set_postfix_str(f"epoch = {epoch},  f1_score = {f1score}, best_f1 = {best_acc}")

training:   0%|          | 0/100 [00:00<?, ?it/s]

Batch 0, data shape: torch.Size([64, 12])
Batch 1, data shape: torch.Size([64, 12])
Batch 2, data shape: torch.Size([64, 12])
Batch 3, data shape: torch.Size([64, 12])
Batch 4, data shape: torch.Size([64, 12])
Batch 5, data shape: torch.Size([64, 12])
Batch 6, data shape: torch.Size([64, 12])
Batch 7, data shape: torch.Size([64, 12])
Batch 8, data shape: torch.Size([64, 12])
Batch 9, data shape: torch.Size([64, 12])
Batch 10, data shape: torch.Size([64, 12])
Batch 11, data shape: torch.Size([64, 12])
Batch 12, data shape: torch.Size([64, 12])
Batch 13, data shape: torch.Size([64, 12])
Batch 14, data shape: torch.Size([64, 12])
Batch 15, data shape: torch.Size([64, 12])
Batch 16, data shape: torch.Size([64, 12])
Batch 17, data shape: torch.Size([64, 12])
Batch 18, data shape: torch.Size([64, 12])
Batch 19, data shape: torch.Size([64, 12])
Batch 20, data shape: torch.Size([64, 12])
Batch 21, data shape: torch.Size([64, 12])
Batch 22, data shape: torch.Size([64, 12])
Batch 23, data shape: