In [1]:
import torch
import numpy as np
import librosa
import matplotlib.pyplot as plt
from torchvision import transforms
from PIL import Image
import os
import torch.nn as nn
import torchvision.models as models

In [2]:
# GunshotClassifier 정의 (이전에 사용한 모델)
class GunshotClassifier(nn.Module):
    def __init__(self, num_classes):
        super(GunshotClassifier, self).__init__()
        self.model = models.resnet50(pretrained=True)
        self.model.fc = nn.Linear(self.model.fc.in_features, num_classes)

    def forward(self, x):
        x = self.model(x)
        return x

In [3]:
# 멜 스펙트로그램 변환 함수
def preprocess_audio(file_path, sr=22050, duration=1.0, n_mels=128):
    y, sr = librosa.load(file_path, sr=sr)
    # Pad audio to ensure it's at least as long as the desired duration
    if len(y) < sr * duration:
        y = np.pad(y, (0, sr * duration - len(y)), mode='constant')
    # Take the first 1-second clip
    y = y[:sr]
    mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels)
    mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
    return mel_spec_db

In [4]:
# 모델을 로드하고 평가 모드로 전환
num_classes = 6  # 총기 클래스 수
model = GunshotClassifier(num_classes=num_classes)
model.load_state_dict(torch.load('new_model.pth', map_location=torch.device('cpu')))
model.eval()

  model.load_state_dict(torch.load('new_model.pth', map_location=torch.device('cpu')))


GunshotClassifier(
  (model): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
  

In [5]:
# 데이터 증강 및 전처리 설정
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

In [6]:
# 샘플 음원을 예측하는 함수
def predict_gunshot(file_path, model, transform, classes):
    mel_spec_db = preprocess_audio(file_path)
    min_val = mel_spec_db.min()
    max_val = mel_spec_db.max()
    if max_val - min_val != 0:
        mel_spec_db = (mel_spec_db - min_val) / (max_val - min_val)  # Normalize to [0, 1]
    else:
        mel_spec_db = np.zeros_like(mel_spec_db)
    
    mel_spec_db = (mel_spec_db * 255).astype(np.uint8)  # Scale to [0, 255]
    image = Image.fromarray(mel_spec_db).convert('RGB')
    image = transform(image).unsqueeze(0)  # Add batch dimension

    with torch.no_grad():
        outputs = model(image)
        _, predicted = torch.max(outputs.data, 1)
        predicted_class = classes[predicted.item()]

    return predicted_class

In [9]:
# 총기 클래스 리스트 (순서는 이전과 동일해야 함)
classes = ['ACE', 'AKM', 'AUG', 'M416', 'M762', 'SCAR']

# 샘플 음원 파일 경로
sample_audio_path = 'M416_test.wav'

In [10]:
# 예측
predicted_class = predict_gunshot(sample_audio_path, model, transform, classes)
print(f'The predicted gunshot class is: {predicted_class}')

The predicted gunshot class is: M416
