In [None]:
import os
import librosa
import numpy as np

# 데이터 경로 설정
train_path = "/data/audio1/unzipped_TS1"
val_path = "/data/audio1/unzipped_VS1"

# Mel-Spectrogram 변환 함수
def preprocess_audio(file_path, n_mels=128):
    """오디오 데이터를 Mel-Spectrogram으로 변환."""
    y, sr = librosa.load(file_path, sr=16000)
    mel_spectrogram = librosa.feature.melspectrogram(y, sr=sr, n_mels=n_mels)
    log_mel_spectrogram = librosa.power_to_db(mel_spectrogram)
    return log_mel_spectrogram


In [None]:
# 레이블 매핑 함수
def generate_labels(data_path):
    """파일 이름에서 레이블 생성."""
    label_mapping = {"happy": 0, "sad": 1, "formal": 2, "informal": 3}  # 예시 레이블 매핑
    file_paths = []
    labels = []

    for f in os.listdir(data_path):
        if f.endswith('.wav'):
            for key, value in label_mapping.items():
                if key in f.lower():
                    file_paths.append(os.path.join(data_path, f))
                    labels.append(value)
                    break
    return file_paths, labels

# 훈련 및 검증 데이터 로드
train_files, train_labels = generate_labels(train_path)
val_files, val_labels = generate_labels(val_path)

print(f"훈련 데이터 개수: {len(train_files)}")
print(f"검증 데이터 개수: {len(val_files)}")


In [None]:
from torch.utils.data import Dataset

class AudioDataset(Dataset):
    def __init__(self, file_paths, labels):
        self.file_paths = file_paths
        self.labels = labels
        assert len(self.file_paths) == len(self.labels), "파일 수와 레이블 수가 맞지 않습니다."

    def __len__(self):
        return len(self.file_paths)

    def __getitem__(self, idx):
        spectrogram = preprocess_audio(self.file_paths[idx])
        spectrogram = np.expand_dims(spectrogram, axis=0)  # 채널 추가
        label = self.labels[idx]
        return torch.tensor(spectrogram, dtype=torch.float32), torch.tensor(label)


In [None]:
import torch
from torch import nn
from torchvision import models

class AudioClassifier(nn.Module):
    def __init__(self, num_classes):
        super(AudioClassifier, self).__init__()
        self.base_model = models.mobilenet_v3_small(pretrained=True)
        self.base_model.features[0][0] = nn.Conv2d(1, 16, kernel_size=3, stride=2, padding=1, bias=False)  # 채널 조정
        self.base_model.classifier[-1] = nn.Linear(self.base_model.classifier[-1].in_features, num_classes)

    def forward(self, x):
        return self.base_model(x)


In [None]:
from torch.utils.data import DataLoader

# 데이터셋 생성
train_dataset = AudioDataset(train_files, train_labels)
val_dataset = AudioDataset(val_files, val_labels)

# 데이터 로더 생성
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)


In [None]:
def train_model(model, train_loader, val_loader, num_epochs=10, learning_rate=0.001):
    """MobileNetV3 모델 학습 및 검증."""
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    for epoch in range(num_epochs):
        # 훈련 단계
        model.train()
        train_loss = 0
        for spectrograms, labels in train_loader:
            outputs = model(spectrograms)
            loss = criterion(outputs, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_loss += loss.item()

        # 검증 단계
        model.eval()
        val_loss = 0
        correct = 0
        total = 0
        with torch.no_grad():
            for spectrograms, labels in val_loader:
                outputs = model(spectrograms)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        print(f"Epoch {epoch+1}/{num_epochs}, "
              f"Train Loss: {train_loss/len(train_loader):.4f}, "
              f"Val Loss: {val_loss/len(val_loader):.4f}, "
              f"Val Accuracy: {100 * correct / total:.2f}%")


In [None]:
# 모델 초기화
num_classes = len(set(train_labels))  # 고유 레이블 수
model = AudioClassifier(num_classes=num_classes)

# 학습 실행
train_model(model, train_loader, val_loader, num_epochs=10, learning_rate=0.001)


In [None]:
torch.save(model.state_dict(), "mobilenet_audio_model.pth")
print("Model saved as mobilenet_audio_model.pth")


In [None]:
def predict(model, file_path):
    """새로운 데이터 예측."""
    spectrogram = preprocess_audio(file_path)
    spectrogram = torch.tensor(np.expand_dims(spectrogram, axis=0), dtype=torch.float32)
    spectrogram = spectrogram.unsqueeze(0)

    model.eval()
    with torch.no_grad():
        outputs = model(spectrogram)
        _, predicted = torch.max(outputs, 1)
        return predicted.item()

# 예측 실행
test_file = "/path/to/test/audio.wav"
predicted_class = predict(model, test_file)
print(f"Predicted class: {predicted_class}")
