# MFCC PCA 실험 

In [10]:
import pandas as pd

In [11]:
train_merge_pca = pd.read_csv('cic_2023_train_mfcc_pca.csv')
test_merge_pca = pd.read_csv('cic_2023_test_mfcc_pca.csv')

In [15]:
train_merge_pca

Unnamed: 0,pca_0,pca_1,pca_2,pca_3,pca_4,pca_5,pca_6,pca_7,pca_8,pca_9,pca_10,pca_11,label
0,1.297027,-1.445063,0.653462,0.749018,0.420317,0.509381,0.914462,-0.386266,0.019797,-0.434370,0.172632,0.030953,0.0
1,-1.899322,-1.878189,2.746801,-0.866124,1.297453,-0.301020,-0.272851,-0.746504,-0.395779,1.183710,-0.036660,-0.967965,0.0
2,1.320798,0.788518,0.988543,-0.839429,0.001624,-1.489243,1.517843,1.069401,-0.120957,0.111761,-0.206164,0.048264,0.0
3,1.295655,-1.446690,0.654972,0.749146,0.422111,0.509073,0.913354,-0.386600,0.021497,-0.433797,0.171625,0.031289,0.0
4,-1.911771,-1.066615,2.015438,-1.372473,-1.642822,-1.151493,-0.716750,-0.311838,0.320702,-0.578859,1.063975,0.103759,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
88507,-0.237353,-1.060654,-3.448346,-0.334442,-0.689181,0.549381,1.012990,-0.358220,0.363269,0.317598,0.125820,-0.316169,7.0
88508,-2.381057,-1.354230,-0.335943,0.541857,-0.349280,-1.015842,-0.605987,0.116241,-0.398470,-0.307919,0.090308,-0.028359,7.0
88509,-2.478433,-1.474713,0.259903,0.531056,-0.536327,-1.270346,-0.039930,0.198460,-0.263484,-0.540966,0.306293,0.266891,7.0
88510,-2.664738,1.819847,-0.155207,2.463774,-1.241872,-0.088288,0.347725,0.075879,0.554924,-0.533411,-0.321004,-0.079987,7.0


In [12]:
import torch
from torch.utils.data import Dataset, DataLoader
import pandas as pd

class CustomDataset(Dataset):
    def __init__(self, dataframe):
        self.data = dataframe.iloc[:, :-1].values  # 마지막 열을 제외한 데이터
        self.labels = dataframe.iloc[:, -1].values  # 마지막 열을 라벨 데이터로 사용

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sample = {'data': torch.tensor(self.data[idx], dtype=torch.float32),
                  'label': torch.tensor(self.labels[idx], dtype=torch.long)}
        return sample

In [13]:
train_dataset_pca = CustomDataset(train_merge_pca)
train_loader_pca = DataLoader(train_dataset_pca, batch_size=64, shuffle=True, drop_last=True)

test_dataset_pca = CustomDataset(test_merge_pca)
test_loader_pca = DataLoader(test_dataset_pca, batch_size=64, shuffle=False, drop_last=True)

In [14]:
import torch.nn as nn
import torch.optim as optim
from swin_transformer import SwinTimeSeriesTransformer 
import torch

# Check if GPU is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = SwinTimeSeriesTransformer(
    input_dim=1,
    seq_len=15,  # Now divisible by patch_size
    patch_size=3,  # Adjusted patch size
    num_classes=8,
    dim=64,
    depth=6,
    num_heads=8,
    window_size=3, 
    mlp_ratio=4.0,
    dropout=0.1
    
)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

from sklearn.metrics import f1_score, classification_report
import numpy as np
from tqdm.notebook import trange

valid_loss_min = np.inf
val_loss = []
val_acc = []
train_loss = []
train_acc = []
total_step = len(train_loader_pca)
valid_loss = np.inf  # train acc
f1_ = 0  # val f1
epoch_in = trange(100, desc='training')
best_acc=0

for epoch in epoch_in:
    model.to(device)
    model.train()
    running_loss = 0.0
    correct = 0
    total=0
    
    preds_ = []
    targets_ = []

    for batch_idx, train_dict in enumerate(train_loader_pca):
        print(f"Batch {batch_idx}, data shape: {train_dict['data'].shape}")

        seq_len = 12

        inputs = train_dict['data'].to(device).float()
        inputs = inputs.reshape(inputs.size(0), seq_len, 1)  # Reshape to (batch_size, seq_len, input_dim)

        labels = train_dict['label'].to(device).long()

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        _,pred = torch.max(outputs, dim=1)
        correct += torch.sum(pred==labels).item()
        total += labels.size(0)
        # if (batch_idx) % 1000 == 0:
        #     print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
        #            .format(epoch, n_epochs, batch_idx, total_step, loss.item()))

    train_acc.append(100 * correct / total)
    train_loss.append(running_loss/total_step)
    print(f'\ntrain loss: {np.mean(train_loss):.4f}, train acc: {(100 * correct / total):.4f}')

    
    batch_loss = 0
    total_t = 0
    correct_t = 0

preds_ = []
targets_ = []

with torch.no_grad():
    model.to(device)
    model.eval()
    for test_dict in train_loader_pca:  # You might want to use a validation loader here
        data_t = test_dict['data'].to(device).float()
        inputs = data_t.reshape(data_t.size(0), seq_len, 1)  # Reshaping to match input_dim (1)

        target_t = test_dict['label'].to(device).long()

        outputs_t = model(inputs)  # Pass the reshaped inputs

        pred = outputs_t.argmax(dim=1).to(device)
        preds_.append(pred)  # Append tensors to list
        targets_.append(target_t)  # Append tensors to list

        loss_t = criterion(outputs_t, target_t)
        batch_loss += loss_t.item()
        _, pred_t = torch.max(outputs_t, dim=1)
        correct_t += torch.sum(pred_t == target_t).item()
        total_t += target_t.size(0)

    # Concatenate all predictions and targets after the loop
    preds_ = torch.cat(preds_).detach().cpu().numpy()
    targets_ = torch.cat(targets_).detach().cpu().numpy()

    f1score = f1_score(targets_, preds_, average='macro')
    if best_acc < f1score:
        best_acc = f1score
        with open("0117_cic_swin_transformer_mfcc.txt", "a") as text_file:
            print('epoch=====', epoch, file=text_file)
            print(classification_report(targets_, preds_, digits=4), file=text_file)
        torch.save(model, f'0117_cic_swin_transformer_mfcc.pt')
    epoch_in.set_postfix_str(f"epoch = {epoch},  f1_score = {f1score}, best_f1 = {best_acc}")

training:   0%|          | 0/100 [00:00<?, ?it/s]

Batch 0, data shape: torch.Size([64, 12])


RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
