# MFCC PCA 실험 

In [4]:
import pandas as pd
train_merge_pca = pd.read_csv('iot20d_reshaped_train_sample_merge.csv', index_col=None)
test_merge_pca = pd.read_csv('iot20d_reshaped_test_sample_merge.csv', index_col=None)

In [6]:
train_merge_pca

Unnamed: 0.1,Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,label
0,0,-33.710038,-6.334452,-27.143144,-0.606679,-11.370514,2.695571,-4.664715,-2.641940,6.632191,-6.338020,1.612597,5.552410,-4.544200,0
1,1,-40.375017,14.212651,1.763182,12.016703,-24.316610,37.053350,-6.720660,-14.689805,8.472120,5.485937,-7.179709,-2.824660,9.077924,0
2,2,-16.618506,13.368126,-4.060041,6.187063,-10.548125,33.493583,8.215179,6.972005,28.188855,10.602588,-2.771691,-1.992006,10.744815,0
3,3,-45.711610,10.236180,4.542680,11.547588,-24.644597,37.200975,-6.018291,-15.080594,9.141643,3.231752,-4.850000,-4.608707,8.939402,0
4,4,-92.399702,-25.770358,-21.386170,29.173253,-8.811529,13.414745,-23.272788,-7.771816,34.476214,-15.358442,12.999773,14.267405,-8.601142,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
89995,89995,3.489452,21.259421,-22.061928,6.216940,27.196671,26.796052,30.684325,12.904987,12.772483,6.841569,2.535513,-2.041178,-13.075402,8
89996,89996,-15.538074,26.465661,-10.533717,21.023354,-0.615558,10.622823,5.836665,7.514305,-1.012314,-6.034214,6.938872,-0.135282,-2.873483,8
89997,89997,-8.765533,11.772582,-15.241978,-0.311987,48.621343,44.858219,33.289573,15.510735,-4.239626,-6.390871,-15.229858,-13.748360,-9.639236,8
89998,89998,-20.447929,25.419256,-4.054054,25.962007,9.082178,-4.157606,7.856613,2.439329,6.916420,-14.584213,3.181416,-2.542657,-3.054895,8


In [7]:
import os
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"



import torch
from torch.utils.data import Dataset, DataLoader
import pandas as pd

class CustomDataset(Dataset):
    def __init__(self, dataframe):
        self.data = dataframe.iloc[:, :-1].values  # 마지막 열을 제외한 데이터
        self.labels = dataframe.iloc[:, -1].values  # 마지막 열을 라벨 데이터로 사용

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sample = {'data': torch.tensor(self.data[idx], dtype=torch.float32),
                  'label': torch.tensor(self.labels[idx], dtype=torch.long)}
        return sample

In [8]:
train_dataset_pca = CustomDataset(train_merge_pca)
train_loader_pca = DataLoader(train_dataset_pca, batch_size=64, shuffle=True, drop_last=True)

test_dataset_pca = CustomDataset(test_merge_pca)
test_loader_pca = DataLoader(test_dataset_pca, batch_size=64, shuffle=False, drop_last=True)

In [12]:
import torch.nn as nn
import torch.optim as optim
from swin_transformer import SwinTimeSeriesTransformer 
import torch

# Check if GPU is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

model = SwinTimeSeriesTransformer(
    input_dim=1,
    seq_len=14,  # Now divisible by patch_size
    patch_size=2,  # Adjusted patch size
    num_classes=9,
    dim=64,
    depth=6,
    num_heads=8,
    window_size=3, 
    mlp_ratio=4.0,
    dropout=0.1
    
)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

from sklearn.metrics import f1_score, classification_report
import numpy as np
from tqdm.notebook import trange

valid_loss_min = np.inf
val_loss = []
val_acc = []
train_loss = []
train_acc = []
total_step = len(train_loader_pca)
valid_loss = np.inf  # train acc
f1_ = 0  # val f1
epoch_in = trange(10, desc='training')
best_acc=0

for epoch in epoch_in:
    model.to(device)
    model.train()
    running_loss = 0.0
    correct = 0
    total=0
    
    preds_ = []
    targets_ = []

    for batch_idx, train_dict in enumerate(train_loader_pca):
        print(f"Batch {batch_idx}, data shape: {train_dict['data'].shape}")

        seq_len = 14

        inputs = train_dict['data'].to(device).float()
        inputs = inputs.reshape(inputs.size(0), seq_len, 1)
        labels = train_dict['label'].to(device).long()

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        _,pred = torch.max(outputs, dim=1)
        correct += torch.sum(pred==labels).item()
        total += labels.size(0)
        # if (batch_idx) % 1000 == 0:
        #     print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
        #            .format(epoch, n_epochs, batch_idx, total_step, loss.item()))

    train_acc.append(100 * correct / total)
    train_loss.append(running_loss/total_step)
    print(f'\ntrain loss: {np.mean(train_loss):.4f}, train acc: {(100 * correct / total):.4f}')

    
    batch_loss = 0
    total_t = 0
    correct_t = 0

preds_ = []
targets_ = []

with torch.no_grad():
    model.to(device)
    model.eval()
    for test_dict in test_loader_pca:  # You might want to use a validation loader here
        data_t = test_dict['data'].to(device).float()
        inputs = data_t.reshape(data_t.size(0), seq_len, 1)  # Reshaping to match input_dim (1)

        target_t = test_dict['label'].to(device).long()

        outputs_t = model(inputs)  # Pass the reshaped inputs

        pred = outputs_t.argmax(dim=1).to(device)
        preds_.append(pred)  # Append tensors to list
        targets_.append(target_t)  # Append tensors to list

        loss_t = criterion(outputs_t, target_t)
        batch_loss += loss_t.item()
        _, pred_t = torch.max(outputs_t, dim=1)
        correct_t += torch.sum(pred_t == target_t).item()
        total_t += target_t.size(0)

    # Concatenate all predictions and targets after the loop
    preds_ = torch.cat(preds_).detach().cpu().numpy()
    targets_ = torch.cat(targets_).detach().cpu().numpy()

    f1score = f1_score(targets_, preds_, average='macro')
    if best_acc < f1score:
        best_acc = f1score
        with open("0120_iotid_swin_transformer.txt", "a") as text_file:
            print('epoch=====', epoch, file=text_file)
            print(classification_report(targets_, preds_, digits=4), file=text_file)
        torch.save(model, f'0120_iotid_swin_transformer.pt')
    epoch_in.set_postfix_str(f"epoch = {epoch},  f1_score = {f1score}, best_f1 = {best_acc}")

Using device: cuda


training:   0%|          | 0/1 [00:00<?, ?it/s]

Batch 0, data shape: torch.Size([64, 14])
Batch 1, data shape: torch.Size([64, 14])
Batch 2, data shape: torch.Size([64, 14])
Batch 3, data shape: torch.Size([64, 14])
Batch 4, data shape: torch.Size([64, 14])
Batch 5, data shape: torch.Size([64, 14])
Batch 6, data shape: torch.Size([64, 14])
Batch 7, data shape: torch.Size([64, 14])
Batch 8, data shape: torch.Size([64, 14])
Batch 9, data shape: torch.Size([64, 14])
Batch 10, data shape: torch.Size([64, 14])
Batch 11, data shape: torch.Size([64, 14])
Batch 12, data shape: torch.Size([64, 14])
Batch 13, data shape: torch.Size([64, 14])
Batch 14, data shape: torch.Size([64, 14])
Batch 15, data shape: torch.Size([64, 14])
Batch 16, data shape: torch.Size([64, 14])
Batch 17, data shape: torch.Size([64, 14])
Batch 18, data shape: torch.Size([64, 14])
Batch 19, data shape: torch.Size([64, 14])
Batch 20, data shape: torch.Size([64, 14])
Batch 21, data shape: torch.Size([64, 14])
Batch 22, data shape: torch.Size([64, 14])
Batch 23, data shape:

KeyboardInterrupt: 

In [None]:
import torch.nn as nn
import torch.optim as optim
from swin_transformer import SwinTimeSeriesTransformer 
import torch

# Check if GPU is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

model = SwinTimeSeriesTransformer(
    input_dim=1,
    seq_len=12,  # Now divisible by patch_size
    patch_size=3,  # Adjusted patch size
    num_classes=2,
    dim=64,
    depth=6,
    num_heads=8,
    window_size=3, 
    mlp_ratio=4.0,
    dropout=0.1
    
)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

from sklearn.metrics import f1_score, classification_report
import numpy as np
from tqdm.notebook import trange

valid_loss_min = np.inf
val_loss = []
val_acc = []
train_loss = []
train_acc = []
total_step = len(train_loader_pca)
valid_loss = np.inf  # train acc
f1_ = 0  # val f1
epoch_in = trange(100, desc='training')
best_acc=0

for epoch in epoch_in:
    model.to(device)
    model.train()
    running_loss = 0.0
    correct = 0
    total=0
    
    preds_ = []
    targets_ = []

    for batch_idx, train_dict in enumerate(train_loader_pca):
        print(f"Batch {batch_idx}, data shape: {train_dict['data'].shape}")

        seq_len = 12

        inputs = train_dict['data'].to(device).float()
        inputs = inputs.reshape(64,12,1)
        labels = train_dict['label'].to(device).long()
        print(f"Batch {batch_idx}, Inputs shape: {inputs.shape}, Labels: {labels}")

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        _,pred = torch.max(outputs, dim=1)
        correct += torch.sum(pred==labels).item()
        total += labels.size(0)
        # if (batch_idx) % 1000 == 0:
        #     print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
        #            .format(epoch, n_epochs, batch_idx, total_step, loss.item()))

    train_acc.append(100 * correct / total)
    train_loss.append(running_loss/total_step)
    print(f'\ntrain loss: {np.mean(train_loss):.4f}, train acc: {(100 * correct / total):.4f}')

    
    batch_loss = 0
    total_t = 0
    correct_t = 0

preds_ = []
targets_ = []

with torch.no_grad():
    model.to(device)
    model.eval()
    for test_dict in test_loader_pca:  # You might want to use a validation loader here
        data_t = test_dict['data'].to(device).float()
        data_t = data_t.reshape(64,12,1)  # Reshaping to match input_dim (1)

        target_t = test_dict['label'].to(device).long()

        outputs_t = model(data_t)  # Pass the reshaped inputs

        pred = outputs_t.argmax(dim=1).to(device)
        preds_.append(pred)  # Append tensors to list
        targets_.append(target_t)  # Append tensors to list

        loss_t = criterion(outputs_t, target_t)
        batch_loss += loss_t.item()
        _, pred_t = torch.max(outputs_t, dim=1)
        correct_t += torch.sum(pred_t == target_t).item()
        total_t += target_t.size(0)

    # Concatenate all predictions and targets after the loop
    preds_ = torch.cat(preds_).detach().cpu().numpy()
    targets_ = torch.cat(targets_).detach().cpu().numpy()

    f1score = f1_score(targets_, preds_, average='macro')
    if best_acc < f1score:
        best_acc = f1score
        with open("0117_iotid_swin_transformer_mfcc.txt", "a") as text_file:
            print('epoch=====', epoch, file=text_file)
            print(classification_report(targets_, preds_, digits=4), file=text_file)
        torch.save(model, f'0117_iotid_swin_transformer_mfcc.pt')
    epoch_in.set_postfix_str(f"epoch = {epoch},  f1_score = {f1score}, best_f1 = {best_acc}")

# Mel spectrogram 이미지 생성 

# Test dataset

In [11]:
import os

# 이미지 저장 디렉토리 생성
output_directory_n = './melspec/test/normal'
os.makedirs(output_directory_n, exist_ok=True)

output_directory_1= './melspec/test/label1'
os.makedirs(output_directory_1, exist_ok=True)

output_directory_2= './melspec/test/label2'
os.makedirs(output_directory_2, exist_ok=True)

output_directory_3= './melspec/test/label3'
os.makedirs(output_directory_3, exist_ok=True)

output_directory_4= './melspec/test/label4'
os.makedirs(output_directory_4, exist_ok=True)

output_directory_5= './melspec/test/label5'
os.makedirs(output_directory_5, exist_ok=True)

output_directory_6= './melspec/test/label6'
os.makedirs(output_directory_6, exist_ok=True)

output_directory_7= './melspec/test/label7'
os.makedirs(output_directory_7, exist_ok=True)

output_directory_8= './melspec/test/label8'
os.makedirs(output_directory_8, exist_ok=True)

In [13]:
import os

# 이미지 저장 디렉토리 생성
output_directory_n = './melspec/train/normal'
#os.makedirs(output_directory_n, exist_ok=True)

output_directory_1= './melspec/train/label1'
#os.makedirs(output_directory_1, exist_ok=True)

output_directory_2= './melspec/train/label2'
#os.makedirs(output_directory_2, exist_ok=True)

output_directory_3= './melspec/train/label3'
#os.makedirs(output_directory_3, exist_ok=True)

output_directory_4= './melspec/train/label4'
#os.makedirs(output_directory_4, exist_ok=True)

output_directory_5= './melspec/train/label5'
#os.makedirs(output_directory_5, exist_ok=True)

output_directory_6= './melspec/train/label6'
#os.makedirs(output_directory_6, exist_ok=True)

output_directory_7= './melspec/train/label7'
#os.makedirs(output_directory_7, exist_ok=True)

output_directory_8= './melspec/train/label8'
#os.makedirs(output_directory_8, exist_ok=True)

In [14]:
import pandas as pd 

train_df = pd.read_csv('./iotid20_train_pca+mfcc.csv')

In [15]:
zero_label_rows = train_df[train_df['label'] == 0] # 정상데이터 
one_label_rows = train_df[train_df['label'] == 1] # 비정상 데이터 
two_label_rows = train_df[train_df['label'] == 2] # 비정상 데이터 
three_label_rows = train_df[train_df['label'] == 3] # 비정상 데이터 
four_label_rows = train_df[train_df['label'] == 4] # 비정상 데이터 
five_label_rows = train_df[train_df['label'] == 5] # 비정상 데이터 
six_label_rows = train_df[train_df['label'] == 6] # 비정상 데이터 
sev_label_rows = train_df[train_df['label'] == 7] # 비정상 데이터 
eig_label_rows = train_df[train_df['label'] == 8] # 비정상 데이터 

In [17]:
train__sample_val_normal = zero_label_rows.iloc[:,:-1].values
test_1 = one_label_rows.iloc[:,:-1].values
test_2 = two_label_rows.iloc[:,:-1].values
test_3 = three_label_rows.iloc[:,:-1].values
test_4 = four_label_rows.iloc[:,:-1].values
test_5 = five_label_rows.iloc[:,:-1].values
test_6 = six_label_rows.iloc[:,:-1].values
test_7 = sev_label_rows.iloc[:,:-1].values
test_8 = eig_label_rows.iloc[:,:-1].values

In [12]:
import librosa 
import librosa.display 
import matplotlib.pyplot as plt 
import tqdm 
import os
import numpy as np

SAMPLE_RATE = 1365 
DURATION = 0.75 
win_length = int(1365/40)
n_fft = 1024
hop_length = 4 
n_mels = 40

def run (df, path):
    for i in tqdm.tqdm(range(df.shape[0])):
        data = df[i:i+1, :].reshape(-1)
        S = librosa.feature.melspectrogram(y=data, n_mels=n_mels, n_fft=n_fft, hop_length=hop_length)
        mel_spec_db = librosa.power_to_db(S, ref=np.max)

        # 플롯 생성
        plt.figure(figsize=(10, 4))  # 이미지 크기 조절
        librosa.display.specshow(mel_spec_db, x_axis='time', y_axis='mel',\
            sr=SAMPLE_RATE, hop_length=hop_length, cmap='viridis')

        # 눈금 및 레이블 제거
        plt.axis('off')

        # 이미지 저장
        plt.savefig(f'{path}/image_{i}.png', bbox_inches='tight', pad_inches=0)

        # 플롯 닫기 (메모리 누수 방지)
        plt.close()

run(train__sample_val_normal, output_directory_n)
run(test_1, output_directory_1)
run(test_2, output_directory_2)
run(test_3, output_directory_3)
run(test_4, output_directory_4)
run(test_5, output_directory_5)
run(test_6, output_directory_6)
run(test_7, output_directory_7)
run(test_8, output_directory_8)

100%|██████████| 1000/1000 [00:57<00:00, 17.34it/s]
100%|██████████| 1000/1000 [00:56<00:00, 17.70it/s]
100%|██████████| 1000/1000 [01:04<00:00, 15.62it/s]
100%|██████████| 1000/1000 [01:06<00:00, 14.95it/s]
100%|██████████| 1000/1000 [01:10<00:00, 14.23it/s]
100%|██████████| 1000/1000 [01:14<00:00, 13.41it/s]
100%|██████████| 1000/1000 [01:20<00:00, 12.49it/s]
100%|██████████| 1000/1000 [01:22<00:00, 12.14it/s]
100%|██████████| 1000/1000 [01:34<00:00, 10.59it/s]
