In [99]:
import sys
sys.path.append('./models/')

import os
import torch
import torch.nn as nn
import torch.optim as optim
import torchaudio
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from models import Cnn14
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from tqdm import tqdm

In [100]:
print(torch.__version__)
print(torch.cuda.is_available())

2.6.0+cu126
True


In [101]:
DATASET = "./Dataset/" # 데이터셋 경로
CHECKPOINT = "./checkpoint/Cnn14_16k_mAP=0.438.pth" # 모델의 사전학습된 가중치
SAVE_PATH = "./result/best.pt" # 학습완료된 모델 저장 위치
CLASS_NAME = ["danger", "fire", "gas", "non", "tsunami"] # 분류할 클래스
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") # torch에 전달할 디바이스 종류

# 하이퍼 파라미터
CLASS_NUM = len(CLASS_NAME) # 분류할 클래스의 개수
SAMPLE_RATE = 16000 # 샘플링 레이트
DURATION = 1 # 샘플의 길이
BATCH_SIZE = 32 # 배치사이즈
EPOCHS = 20  # 학습 에폭 수
LEARNING_RATE = 1e-4 # 학습률

In [102]:
print(DEVICE)

cuda


In [103]:
class ClassNameError(Exception):
    def __init__(self):
        super().__init__("폴더이름과 클래스이름이 일치 하지 않습니다.")

In [104]:
# 커스텀 데이터셋 정의
class AudioDataset(Dataset):
    def __init__(self, filepaths, labels, sample_rate=SAMPLE_RATE, duration=DURATION):
        self.filepaths = filepaths # 데이터 경로
        self.labels = labels # 라벨
        self.sample_rate = sample_rate # 샘플링 레이트
        self.num_samples = int(sample_rate * duration) # 오디오 샘플의 길이

    # 데이터셋의 길이(파일 개수) 반환
    def __len__(self):
        return len(self.filepaths)

    def __getitem__(self, idx):
        filepath = self.filepaths[idx]
        label = self.labels[idx]

        waveform, sr = torchaudio.load(filepath) # waveform의 shape(channel, length)

        # 모노(1채널)로 변환
        if waveform.shape[0] > 1:
            waveform = waveform.mean(dim=0, keepdim=True)

        # 원하는 샘플링 레이트가 아니면 리샘플링
        if sr != self.sample_rate:
            resampler = torchaudio.transforms.Resample(sr, self.sample_rate)
            waveform = resampler(waveform)

        # 오디오 샘플의 길이조정
        if waveform.shape[1] < self.num_samples: # 길이가 부족하면 0(무음)을 채워 길이를 연장
            waveform = F.pad(waveform, (0, self.num_samples - waveform.shape[1])) # num_samples와 현재의 길이의 차 만큼 0을 패딩
        else:
            waveform = waveform[:, :self.num_samples] # 길이가 길면 슬라이싱

        # 절댓값 정규화(-1 ~ 1)
        waveform = waveform / (waveform.abs().max() + 1e-9)
        '''
        샘플의 shape를 모델에 맞추기
        샘플은 전처리과정을 거쳐 모노화(1채널)와 
        길이가 고정 되어 (1, 160000) 형태
        모델은 (batch_size, data_length)형태의 입력을 받는다
        여기서 필요한 부분은 data_length뿐 채널의 정보는 필요없다
        그래서 채널정보를 제거하기위해 squeeze()
        '''
        waveform = waveform.squeeze(0)
        return waveform, label


In [105]:
# 학습함수
def train_epoch(model, loader, optimizer, criterion, device):
    model.train() # 학습모드
    running_loss = 0.0 # 손실
    total_samples = 0
    total_correct = 0

    # 배치 단위로 데이터로드
    for waveforms, labels in tqdm(loader, desc="학습중", leave=True):
        waveforms = waveforms.to(device) # gpu로 전달
        labels = labels.to(device).float() # BCE 손실함수에는 반드시 실수형필요

        optimizer.zero_grad() # 경사값 초기화

        outputs = model(waveforms)["clipwise_output"] # 모델에 데이터입력

        loss = criterion(outputs, labels) # 손실계산
        loss.backward() # 역전파
        optimizer.step() # 가중치 갱신

        # 배치 손실을 누적
        running_loss += loss.item() * waveforms.size(0)

        # 예측 임계값 0.5 이상을 양성 클래스라 판단하여 이진 예측 생성
        predicts = (outputs > 0.5).int()
        targets = labels.int()

        # 모든 클래스가 일치하는 샘플 개수 카운트, 모든 클래스 다 맞아야 정답 처리
        total_correct += (predicts == targets).all(dim=1).sum().item()
        total_samples += labels.size(0)

    avg_loss = running_loss / total_samples # 평균손실
    accuracy = total_correct  / total_samples # 정답 개수 / 전체 예측값 개수

    return avg_loss, accuracy

# 검증함수
def validate(model, loader, criterion, device):
    model.eval() #평가모드
    running_loss = 0.0
    total_samples = 0
    total_correct = 0

    # 검증에는 기울기 계산필요없음, 즉 역전파 없음
    with torch.no_grad():
        for waveforms, labels in tqdm(loader, desc="검증중", leave=True):
            waveforms = waveforms.to(device)
            labels = labels.to(device).float()

            outputs = model(waveforms)["clipwise_output"]
            loss = criterion(outputs, labels)

            running_loss += loss.item() * waveforms.size(0)

            predicts = (outputs > 0.5).int()
            targets = labels.int()

            total_correct += (predicts == targets).all(dim=1).sum().item()
            total_samples += labels.size(0)

    avg_loss = running_loss / total_samples
    accuracy = total_correct  / total_samples
    
    return avg_loss, accuracy

In [106]:
# 데이터와 라벨
filepaths = []
labels = []

# 데이터셋과 하위폴더에서 확장자가 "wav"인 파일의 경로와 라벨(폴더이름) 저장
for root, _, files in os.walk(DATASET):
    folder_name = os.path.basename(root)

    # 폴더명이 클래스 리스트에 없는 경우 에러
    if folder_name not in CLASS_NAME and folder_name != "":
        raise ClassNameError
    
    for file in files:
        if not file.lower().endswith(".wav"):
            continue
        
        filepath = os.path.join(root, file)
        filepaths.append(filepath)
        labels.append(folder_name)

# 문자열 라벨 → 정수인코딩 → 원핫인코딩
label_encoder = LabelEncoder()
integer_labels = label_encoder.fit_transform(labels).reshape(-1, 1)

onehot_encoder = OneHotEncoder(sparse_output=False)
encoded_labels = onehot_encoder.fit_transform(integer_labels)

In [107]:
print(encoded_labels)

[[1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 ...
 [0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 1.]]


In [108]:
# 데이터셋을 8:1:1의 비율을 가진 학습, 검증, 테스트로 나누기
X_train, X_temp, y_train, y_temp = train_test_split(
    filepaths, # X
    encoded_labels, # y
    test_size=0.2, # train과 임시데이터셋 비율 8:2
    stratify=labels, # 기준값을 기준으로 동일한 클래스의 비율로 나누기
    random_state=42 # seed
)

X_val, X_test, y_val, y_test = train_test_split(
    X_temp,
    y_temp,
    test_size=0.5, # 임시데이터셋을 1:1 비율로 val과 test로 나누기
    stratify=y_temp,
    random_state=42
)

In [109]:
print(len(X_train), len(y_train))
print(len(X_val), len(y_val))
print(len(X_test), len(y_test))

2400 2400
300 300
300 300


In [110]:
# Dataset 객체
train_dataset = AudioDataset(X_train, y_train) # X 독립변수, y 종속변수
val_dataset   = AudioDataset(X_val,   y_val)
test_dataset  = AudioDataset(X_test,  y_test)

# 각각의 데이터셋 로드
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader   = DataLoader(val_dataset,   batch_size=BATCH_SIZE, shuffle=False)
test_loader  = DataLoader(test_dataset,  batch_size=BATCH_SIZE, shuffle=False)

In [111]:
train_iter = iter(train_loader)
inputs, labels = next(train_iter)

print("Inputs:", inputs.shape)
print("Labels:", labels)

Inputs: torch.Size([32, 16000])
Labels: tensor([[0., 0., 1., 0., 0.],
        [0., 0., 0., 0., 1.],
        [1., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1.],
        [1., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [0., 0., 0., 0., 1.],
        [0., 0., 1., 0., 0.],
        [0., 1., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 1., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 1., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.],
        [1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [1., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1.],
        [1., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 1.]],

In [112]:
# 모델 로드
model = Cnn14(
    sample_rate=16000, # 샘플링 레이트 16k
    window_size=512, # 윈도우 사이즈
    hop_size=160, # 홉 사이즈
    mel_bins=64, # mel 주파수 채널 수
    fmin=50, # mel 주파수 최소치
    fmax=8000, # mel 주파수 최대치
    classes_num=527 # 분류할 클래스 숫자(원본 모델의 클래스 숫자)
)

 # 사전학습된 가중치 로드
checkpoint = torch.load(CHECKPOINT, map_location=DEVICE, weights_only=False)
model.load_state_dict(checkpoint['model']) # 체크포인트에서 model 가중치만 가져옴

# 마지막 완전연결층을 학습할 클래스 개수로 수정 (527 -> CLASS_NUM)
model.fc_audioset = torch.nn.Linear(model.fc_audioset.in_features, CLASS_NUM) 

model = model.to(DEVICE) # gpu로 전달

In [113]:
'''
최종적으로 아래와 같은 모델이 로드됨

Cnn14(
  ---------------------------------------------------------------------------------------------
  (비학습 계층)
  # 인코더 계층
  # 슬라이딩 윈도우 -> 푸리에변환 -> mel -> log-scale 순으로 처리
  (spectrogram_extractor): Spectrogram(
    (stft): STFT(                                                                         
      (conv_real): Conv1d(1, 257, kernel_size=(512,), stride=(160,), bias=False)
      (conv_imag): Conv1d(1, 257, kernel_size=(512,), stride=(160,), bias=False)
    )
  )
  (logmel_extractor): LogmelFilterBank()
  ---------------------------------------------------------------------------------------------
  (비학습 계층)
  # 드롭아웃 계층
  # 일부 뉴런의 연결을 해제하여 과적합의 가능성을 낮춤
  (spec_augmenter): SpecAugmentation(
    (time_dropper): DropStripes()
    (freq_dropper): DropStripes()
  )
  ---------------------------------------------------------------------------------------------
  (학습 계층)
  # 배치정규화 계층
  # 각각의 미니배치마다 평균과 분산을 이용하여 통계적인 배치값를 사용하여 지역최솟값을 방지
  (bn0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  ---------------------------------------------------------------------------------------------
  (학습 계층)
  # 컨볼루션 계층
  (conv_block1): ConvBlock(
    (conv1): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (conv_block2): ConvBlock(
    (conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (conv_block3): ConvBlock(
    (conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (conv_block4): ConvBlock(
    (conv1): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (conv_block5): ConvBlock(
    (conv1): Conv2d(512, 1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (conv2): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (bn2): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (conv_block6): ConvBlock(
    (conv1): Conv2d(1024, 2048, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (conv2): Conv2d(2048, 2048, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (bn2): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  ---------------------------------------------------------------------------------------------
  (학습 계층)
  # 완전연결 계층
  (fc1): Linear(in_features=2048, out_features=2048, bias=True)
  (fc_audioset): Linear(in_features=2048, out_features=5, bias=True)
  ---------------------------------------------------------------------------------------------

  배치정규화 1층 + 컨볼루션 6*4 + 완전연결층 2 총 27개의 레이어
)
'''
print(model)

Cnn14(
  (spectrogram_extractor): Spectrogram(
    (stft): STFT(
      (conv_real): Conv1d(1, 257, kernel_size=(512,), stride=(160,), bias=False)
      (conv_imag): Conv1d(1, 257, kernel_size=(512,), stride=(160,), bias=False)
    )
  )
  (logmel_extractor): LogmelFilterBank()
  (spec_augmenter): SpecAugmentation(
    (time_dropper): DropStripes()
    (freq_dropper): DropStripes()
  )
  (bn0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv_block1): ConvBlock(
    (conv1): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (conv_block2): ConvBlock(
    (conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (con

In [114]:
criterion = nn.BCELoss() # 다중 라벨 이진교차엔트로피 손실함수
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE) # Adam 최적화함수

In [115]:
# 학습 루프
best_val_accuracy = 0.0

for epoch in range(EPOCHS):
    print(f"\nEpoch[{epoch+1}/{EPOCHS}]")

    # 학습
    train_loss, train_acc = train_epoch(model=model, loader=train_loader, optimizer=optimizer, criterion=criterion, device=DEVICE)
    print(f"Train Loss: {train_loss:.4f}, Accuracy: {train_acc:.4f}", end="")

    # 검증
    val_loss, val_acc = validate(model=model, loader=val_loader, criterion=criterion, device=DEVICE)
    print(f"Validation Loss: {val_loss:.4f}, Accuracy: {val_acc:.4f}")

    # 검증 정확도가 가장 좋으면 모델 저장
    # if val_acc > best_val_accuracy:
    #     best_val_accuracy = val_acc
    #     torch.save(model, SAVE_PATH)
    
torch.save(model, SAVE_PATH)


Epoch[1/20]


학습중: 100%|██████████| 75/75 [00:05<00:00, 13.88it/s]


Train Loss: 0.5267, Accuracy: 0.0992

검증중: 100%|██████████| 10/10 [00:00<00:00, 47.99it/s]


Validation Loss: 0.3647, Accuracy: 0.2833

Epoch[2/20]


학습중: 100%|██████████| 75/75 [00:05<00:00, 14.09it/s]


Train Loss: 0.1971, Accuracy: 0.6729

검증중: 100%|██████████| 10/10 [00:00<00:00, 48.88it/s]


Validation Loss: 0.0585, Accuracy: 0.9367

Epoch[3/20]


학습중: 100%|██████████| 75/75 [00:05<00:00, 14.26it/s]


Train Loss: 0.0947, Accuracy: 0.8662

검증중: 100%|██████████| 10/10 [00:00<00:00, 48.55it/s]


Validation Loss: 0.0272, Accuracy: 0.9733

Epoch[4/20]


학습중: 100%|██████████| 75/75 [00:05<00:00, 14.25it/s]


Train Loss: 0.0639, Accuracy: 0.9179

검증중: 100%|██████████| 10/10 [00:00<00:00, 49.81it/s]


Validation Loss: 0.0126, Accuracy: 0.9933

Epoch[5/20]


학습중: 100%|██████████| 75/75 [00:05<00:00, 14.08it/s]


Train Loss: 0.0547, Accuracy: 0.9254

검증중: 100%|██████████| 10/10 [00:00<00:00, 47.94it/s]


Validation Loss: 0.0074, Accuracy: 1.0000

Epoch[6/20]


학습중: 100%|██████████| 75/75 [00:05<00:00, 14.04it/s]


Train Loss: 0.0415, Accuracy: 0.9429

검증중: 100%|██████████| 10/10 [00:00<00:00, 48.62it/s]


Validation Loss: 0.0054, Accuracy: 1.0000

Epoch[7/20]


학습중: 100%|██████████| 75/75 [00:05<00:00, 14.05it/s]


Train Loss: 0.0417, Accuracy: 0.9417

검증중: 100%|██████████| 10/10 [00:00<00:00, 48.43it/s]


Validation Loss: 0.0027, Accuracy: 1.0000

Epoch[8/20]


학습중: 100%|██████████| 75/75 [00:05<00:00, 14.08it/s]


Train Loss: 0.0319, Accuracy: 0.9617

검증중: 100%|██████████| 10/10 [00:00<00:00, 46.75it/s]


Validation Loss: 0.0021, Accuracy: 1.0000

Epoch[9/20]


학습중: 100%|██████████| 75/75 [00:05<00:00, 14.07it/s]


Train Loss: 0.0232, Accuracy: 0.9708

검증중: 100%|██████████| 10/10 [00:00<00:00, 49.45it/s]


Validation Loss: 0.0027, Accuracy: 1.0000

Epoch[10/20]


학습중: 100%|██████████| 75/75 [00:05<00:00, 14.06it/s]


Train Loss: 0.0336, Accuracy: 0.9554

검증중: 100%|██████████| 10/10 [00:00<00:00, 49.09it/s]


Validation Loss: 0.0011, Accuracy: 1.0000

Epoch[11/20]


학습중: 100%|██████████| 75/75 [00:05<00:00, 13.93it/s]


Train Loss: 0.0252, Accuracy: 0.9675

검증중: 100%|██████████| 10/10 [00:00<00:00, 48.51it/s]


Validation Loss: 0.0021, Accuracy: 1.0000

Epoch[12/20]


학습중: 100%|██████████| 75/75 [00:05<00:00, 13.94it/s]


Train Loss: 0.0199, Accuracy: 0.9742

검증중: 100%|██████████| 10/10 [00:00<00:00, 48.67it/s]


Validation Loss: 0.0012, Accuracy: 1.0000

Epoch[13/20]


학습중: 100%|██████████| 75/75 [00:05<00:00, 14.06it/s]


Train Loss: 0.0261, Accuracy: 0.9663

검증중: 100%|██████████| 10/10 [00:00<00:00, 48.01it/s]


Validation Loss: 0.0008, Accuracy: 1.0000

Epoch[14/20]


학습중: 100%|██████████| 75/75 [00:05<00:00, 14.05it/s]


Train Loss: 0.0163, Accuracy: 0.9783

검증중: 100%|██████████| 10/10 [00:00<00:00, 49.18it/s]


Validation Loss: 0.0005, Accuracy: 1.0000

Epoch[15/20]


학습중: 100%|██████████| 75/75 [00:05<00:00, 14.03it/s]


Train Loss: 0.0252, Accuracy: 0.9721

검증중: 100%|██████████| 10/10 [00:00<00:00, 49.93it/s]


Validation Loss: 0.0017, Accuracy: 1.0000

Epoch[16/20]


학습중: 100%|██████████| 75/75 [00:05<00:00, 14.07it/s]


Train Loss: 0.0148, Accuracy: 0.9767

검증중: 100%|██████████| 10/10 [00:00<00:00, 47.25it/s]


Validation Loss: 0.0009, Accuracy: 1.0000

Epoch[17/20]


학습중: 100%|██████████| 75/75 [00:05<00:00, 14.01it/s]


Train Loss: 0.0183, Accuracy: 0.9788

검증중: 100%|██████████| 10/10 [00:00<00:00, 48.62it/s]


Validation Loss: 0.0006, Accuracy: 1.0000

Epoch[18/20]


학습중: 100%|██████████| 75/75 [00:05<00:00, 13.99it/s]


Train Loss: 0.0196, Accuracy: 0.9746

검증중: 100%|██████████| 10/10 [00:00<00:00, 47.48it/s]


Validation Loss: 0.0004, Accuracy: 1.0000

Epoch[19/20]


학습중: 100%|██████████| 75/75 [00:05<00:00, 14.06it/s]


Train Loss: 0.0152, Accuracy: 0.9788

검증중: 100%|██████████| 10/10 [00:00<00:00, 47.52it/s]


Validation Loss: 0.0007, Accuracy: 1.0000

Epoch[20/20]


학습중: 100%|██████████| 75/75 [00:05<00:00, 14.07it/s]


Train Loss: 0.0131, Accuracy: 0.9833

검증중: 100%|██████████| 10/10 [00:00<00:00, 49.51it/s]


Validation Loss: 0.0007, Accuracy: 1.0000


In [116]:
# 저장한 모델 로드
model = torch.load("./result/best.pt", weights_only=False)
model.to(DEVICE)

Cnn14(
  (spectrogram_extractor): Spectrogram(
    (stft): STFT(
      (conv_real): Conv1d(1, 257, kernel_size=(512,), stride=(160,), bias=False)
      (conv_imag): Conv1d(1, 257, kernel_size=(512,), stride=(160,), bias=False)
    )
  )
  (logmel_extractor): LogmelFilterBank()
  (spec_augmenter): SpecAugmentation(
    (time_dropper): DropStripes()
    (freq_dropper): DropStripes()
  )
  (bn0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv_block1): ConvBlock(
    (conv1): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (conv_block2): ConvBlock(
    (conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (con

In [118]:
# 모델 테스트
test_loss, test_acc = validate(model, test_loader, criterion, DEVICE)
print(f"Test Loss: {test_loss:.4f}, Accuracy: {test_acc:.4f}")

검증중: 100%|██████████| 10/10 [00:00<00:00, 45.12it/s]

Test Loss: 0.0013, Accuracy: 1.0000



