In [None]:
import os
import preprocessor as pp
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import models
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tqdm import tqdm

In [3]:
print(torch.__version__)
print(torch.cuda.is_available())

2.7.0+cu126
True


In [8]:
DATASET = "./Dataset/"
SAVE_BEST_PATH = "./result/resnet50_best.pt"
SAVE_LAST_PATH = "./result/resnet50_last.pt"
CLASS_NAME = ["danger", "fire", "gas", "non", "tsunami"]
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

CLASS_NUM = len(CLASS_NAME)
SAMPLE_RATE = 16000
DURATION = 1
BATCH_SIZE = 32
EPOCHS = 10
LEARNING_RATE = 1e-4
WINDOW_SIZE = 512
HOP_SIZE = 160
MEL_BINS = 64
FMIN = 50
FMAX = 8000

In [6]:
class ClassNameError(Exception):
    def __init__(self):
        super().__init__("폴더이름과 클래스이름이 일치 하지 않습니다.")

In [7]:
# 커스텀 데이터셋 정의
class AudioDataset(Dataset):
    def __init__(self, filepaths, labels, training=False, sample_rate=SAMPLE_RATE, duration=DURATION):
        self.filepaths = filepaths
        self.labels = labels
        self.sample_rate = sample_rate
        self.num_samples = int(sample_rate * duration)
        self.training = training

    def __len__(self):
        return len(self.filepaths)

    def __getitem__(self, idx):
        filepath = self.filepaths[idx]
        label = self.labels[idx]

        logmel = pp.logmel(filepath, self.num_samples) # [batch, channel, mel_bins, time]
        logmel = torch.from_numpy(logmel).squeeze(0)
        
        if self.training:
            logmel = pp.spec_augmentation(logmel)

        return logmel, label


In [9]:
# 데이터와 라벨
filepaths, labels = [], []

# 데이터셋과 하위폴더에서 확장자가 "wav"인 파일의 경로와 라벨(폴더이름) 저장
for root, _, files in os.walk(DATASET):
    folder_name = os.path.basename(root)

    # 폴더명이 클래스 리스트에 없는 경우 에러
    if folder_name not in CLASS_NAME and folder_name != "":
        raise ClassNameError
    
    for file in files:
        if not file.lower().endswith(".wav"):
            continue
        
        filepaths.append(os.path.join(root, file))
        labels.append(folder_name)

In [11]:
# 정수 인코딩
label_encoder = LabelEncoder()
integer_labels = label_encoder.fit_transform(labels)

In [12]:
print(integer_labels.shape)

(3000,)


In [13]:
# 데이터셋을 8:1:1의 비율을 가진 학습, 검증, 테스트로 나누기
X_train, X_temp, y_train, y_temp = train_test_split(
    filepaths, # X
    integer_labels, # y
    test_size=0.2, # train과 임시데이터셋 비율 8:2
    stratify=labels, # 기준값을 기준으로 동일한 클래스의 비율로 나누기
    random_state=42 # seed
)

X_val, X_test, y_val, y_test = train_test_split(
    X_temp,
    y_temp,
    test_size=0.5, # 임시데이터셋을 1:1 비율로 val과 test로 나누기
    stratify=y_temp,
    random_state=42
)

In [14]:
print(len(X_train), len(y_train))
print(len(X_val), len(y_val))
print(len(X_test), len(y_test))

2400 2400
300 300
300 300


In [15]:
# Dataset 객체
train_dataset = AudioDataset(X_train, y_train, training=True) # X 독립변수, y 종속변수
val_dataset   = AudioDataset(X_val,   y_val, training=False)
test_dataset  = AudioDataset(X_test,  y_test, training=False)

# 각각의 데이터셋 로드
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader   = DataLoader(val_dataset,   batch_size=BATCH_SIZE, shuffle=False)
test_loader  = DataLoader(test_dataset,  batch_size=BATCH_SIZE, shuffle=False)

In [16]:
train_iter = iter(train_loader)
inputs, labels = next(train_iter)
print(f"Inputs: {inputs.shape}")
print(f"Labels: {labels.shape}\n")

val_iter = iter(val_loader)
inputs, labels = next(val_iter)
print(f"Inputs: {inputs.shape}")
print(f"Labels: {labels.shape}\n")

test_iter = iter(test_loader)
inputs, labels = next(test_iter)
print(f"Inputs: {inputs.shape}")
print(f"Labels: {labels.shape}\n")

Inputs: torch.Size([32, 1, 64, 101])
Labels: torch.Size([32])

Inputs: torch.Size([32, 1, 64, 101])
Labels: torch.Size([32])

Inputs: torch.Size([32, 1, 64, 101])
Labels: torch.Size([32])



In [17]:
# 학습 함수
def train_epoch(model, loader, optimizer, criterion, device):
    model.train()
    running_loss = 0.0
    total_samples = 0
    total_correct = 0

    # 배치 단위로 데이터로드
    for waveforms, labels in tqdm(loader, desc="학습중", leave=True):
        waveforms = waveforms.to(device) # gpu로 전달
        labels = labels.to(device).long()

        optimizer.zero_grad() # 경사값 초기화

        outputs = model(waveforms) # 모델에 데이터입력

        loss = criterion(outputs, labels) # 손실계산
        loss.backward() # 역전파
        optimizer.step() # 가중치 갱신

        # 손실 누적
        running_loss += loss.item() * waveforms.size(0)

        # 이진 예측
        predicts = outputs.argmax(dim=1)
        targets = labels.int()

        # 모든 클래스가 일치하는 샘플 개수 카운트
        total_correct += (predicts == targets).sum().item()
        total_samples += labels.size(0)

    avg_loss = running_loss / total_samples # 평균손실
    accuracy = total_correct  / total_samples # 정답 개수 / 전체 예측값 개수

    return avg_loss, accuracy

# 검증 함수
def validate(model, loader, criterion, device):
    model.eval()
    running_loss = 0.0
    total_samples = 0
    total_correct = 0

    with torch.no_grad():
        for waveforms, labels in tqdm(loader, desc="검증중", leave=True):
            waveforms = waveforms.to(device)
            labels = labels.to(device).long()

            outputs = model(waveforms)
            loss = criterion(outputs, labels)

            running_loss += loss.item() * waveforms.size(0)

            predicts = outputs.argmax(dim=1)
            targets = labels.int()

            total_correct += (predicts == targets).sum().item()
            total_samples += labels.size(0)

    avg_loss = running_loss / total_samples
    accuracy = total_correct  / total_samples
    
    return avg_loss, accuracy

In [18]:
model = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V2)
conv1 = model.conv1
replace_conv1 = nn.Conv2d(in_channels=1,
                          out_channels=conv1.out_channels,
                          kernel_size=conv1.kernel_size,
                          stride=conv1.stride,
                          padding=conv1.padding,
                          bias=conv1.bias)

with torch.no_grad():
    replace_conv1.weight.copy_(conv1.weight.mean(dim=1, keepdim=True))
    if conv1.bias is not None:
        replace_conv1.bias.copy_(conv1.bias)

model.conv1 = replace_conv1
model.fc =nn.Linear(model.fc.in_features, CLASS_NUM)
model.to(DEVICE)
print(model)

ResNet(
  (conv1): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [19]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

In [20]:
# 학습 루프
best_val_accuracy = 0.0

for epoch in range(EPOCHS):
    print(f"\nEpoch[{epoch+1}/{EPOCHS}]")

    # 학습
    train_loss, train_acc = train_epoch(model=model, loader=train_loader, optimizer=optimizer, criterion=criterion, device=DEVICE)
    print(f"Train Loss: {train_loss:.4f}, Accuracy: {train_acc:.4f}", end="")

    # 검증
    val_loss, val_acc = validate(model=model, loader=val_loader, criterion=criterion, device=DEVICE)
    print(f"Validation Loss: {val_loss:.4f}, Accuracy: {val_acc:.4f}")

    # 검증 정확도가 가장 좋으면 모델 저장
    if val_acc > best_val_accuracy:
        best_val_accuracy = val_acc
        torch.save(model, SAVE_BEST_PATH)
    
torch.save(model, SAVE_LAST_PATH)


Epoch[1/10]


학습중: 100%|██████████| 75/75 [00:17<00:00,  4.33it/s]


Train Loss: 0.7784, Accuracy: 0.7892

검증중: 100%|██████████| 10/10 [00:01<00:00,  6.97it/s]


Validation Loss: 0.3090, Accuracy: 0.9633

Epoch[2/10]


학습중: 100%|██████████| 75/75 [00:06<00:00, 12.01it/s]


Train Loss: 0.0958, Accuracy: 0.9758

검증중: 100%|██████████| 10/10 [00:00<00:00, 25.19it/s]


Validation Loss: 0.0621, Accuracy: 0.9900

Epoch[3/10]


학습중: 100%|██████████| 75/75 [00:06<00:00, 11.61it/s]


Train Loss: 0.0397, Accuracy: 0.9917

검증중: 100%|██████████| 10/10 [00:00<00:00, 24.50it/s]


Validation Loss: 0.1004, Accuracy: 0.9900

Epoch[4/10]


학습중: 100%|██████████| 75/75 [00:06<00:00, 11.26it/s]


Train Loss: 0.0350, Accuracy: 0.9908

검증중: 100%|██████████| 10/10 [00:00<00:00, 24.47it/s]


Validation Loss: 0.0744, Accuracy: 0.9900

Epoch[5/10]


학습중: 100%|██████████| 75/75 [00:06<00:00, 11.38it/s]


Train Loss: 0.0211, Accuracy: 0.9958

검증중: 100%|██████████| 10/10 [00:00<00:00, 23.53it/s]


Validation Loss: 0.0614, Accuracy: 0.9900

Epoch[6/10]


학습중: 100%|██████████| 75/75 [00:06<00:00, 11.34it/s]


Train Loss: 0.0094, Accuracy: 0.9983

검증중: 100%|██████████| 10/10 [00:00<00:00, 24.98it/s]


Validation Loss: 0.0350, Accuracy: 0.9967

Epoch[7/10]


학습중: 100%|██████████| 75/75 [00:06<00:00, 11.79it/s]


Train Loss: 0.0117, Accuracy: 0.9979

검증중: 100%|██████████| 10/10 [00:00<00:00, 24.87it/s]


Validation Loss: 0.4554, Accuracy: 0.9700

Epoch[8/10]


학습중: 100%|██████████| 75/75 [00:06<00:00, 11.69it/s]


Train Loss: 0.0187, Accuracy: 0.9954

검증중: 100%|██████████| 10/10 [00:00<00:00, 24.67it/s]


Validation Loss: 0.1156, Accuracy: 0.9833

Epoch[9/10]


학습중: 100%|██████████| 75/75 [00:06<00:00, 11.36it/s]


Train Loss: 0.0165, Accuracy: 0.9967

검증중: 100%|██████████| 10/10 [00:00<00:00, 25.94it/s]


Validation Loss: 0.0258, Accuracy: 0.9933

Epoch[10/10]


학습중: 100%|██████████| 75/75 [00:06<00:00, 11.38it/s]


Train Loss: 0.0111, Accuracy: 0.9971

검증중: 100%|██████████| 10/10 [00:00<00:00, 24.46it/s]


Validation Loss: 0.1668, Accuracy: 0.9767


In [21]:
# 저장한 모델 로드
model = torch.load(SAVE_BEST_PATH, map_location=DEVICE, weights_only=False)
model.to(DEVICE)
model.eval()

ResNet(
  (conv1): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [22]:
# 모델 테스트
test_loss, test_acc = validate(model, test_loader, criterion, DEVICE)
print(f"Test Loss: {test_loss:.4f}, Accuracy: {test_acc:.4f}")

검증중: 100%|██████████| 10/10 [00:01<00:00,  6.54it/s]

Test Loss: 0.0052, Accuracy: 0.9967



