In [4]:
import os
import librosa
import numpy as np
import torch
import pandas as pd
from tqdm import tqdm



In [5]:
csv_path = 'scream_dataset/metadata.csv'
audio_dir = 'scream_dataset/audio'
output_dir = 'scream_dataset/logmel_tensors'
os.makedirs(output_dir, exist_ok=True)

In [6]:
sr = 16000
n_mels = 64
duration = 10
samples = sr * duration

In [7]:
df = pd.read_csv(csv_path)

In [9]:
#Processing
for _, row in tqdm(df.iterrows(), total=len(df)):
    file_path = os.path.join(audio_dir, row['filename'])
    label = 1 if row['label'] == 'scream' else 0

    # Load + trim/pad
    y, _ = librosa.load(file_path, sr=sr)
    if len(y) > samples:
        y = y[:samples]
    else:
        y = np.pad(y, (0, max(0, samples - len(y))))

    # Mel spectrogram
    mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels)
    logmel = librosa.power_to_db(mel_spec)

    # Saving as tensor
    tensor = torch.tensor(logmel).unsqueeze(0)  # shape: [1, n_mels, time]
    out_file = os.path.join(output_dir, row['filename'].replace('.wav', '.pt'))
    torch.save({'features': tensor, 'label': label}, out_file)

100%|██████████| 143/143 [00:02<00:00, 55.67it/s]


In [10]:
from torch.utils.data import Dataset, DataLoader
import os
class ScreamDataset(Dataset):
    def __init__(self, folder):
        self.folder = folder
        self.files = [f for f in os.listdir(folder) if f.endswith('.pt')]

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        item = torch.load(os.path.join(self.folder, self.files[idx]))
        return item['features'], torch.tensor(item['label'], dtype=torch.long)

# Load dataset
dataset = ScreamDataset('scream_dataset/logmel_tensors')

# Split into train/test
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_set, test_set = torch.utils.data.random_split(dataset, [train_size, test_size])

# Dataloaders
train_loader = DataLoader(train_set, batch_size=16, shuffle=True)
test_loader = DataLoader(test_set, batch_size=16)

In [13]:
# Model (VGG-style mini network)
import torch.nn as nn
import torch.nn.functional as F

class ScreamCNN(nn.Module):
    def __init__(self):
        super(ScreamCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(39936, 128)  # got from error
        self.fc2 = nn.Linear(128, 2)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))   # [B, 16, H/2, W/2]
        x = self.pool(F.relu(self.conv2(x)))   # [B, 32, H/4, W/4]
        x = x.view(x.size(0), -1)              # flatten
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [14]:
#Training loop
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = ScreamCNN().to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

# Training
for epoch in range(10):
    model.train()
    total_loss = 0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    print(f"Epoch {epoch+1} | Loss: {total_loss/len(train_loader):.4f}")


Epoch 1 | Loss: 42.6879
Epoch 2 | Loss: 2.3397
Epoch 3 | Loss: 0.5971
Epoch 4 | Loss: 0.3160
Epoch 5 | Loss: 0.0008
Epoch 6 | Loss: 0.0008
Epoch 7 | Loss: 0.0000
Epoch 8 | Loss: 0.0000
Epoch 9 | Loss: 0.0000
Epoch 10 | Loss: 0.0000


In [15]:
# Evaluation
from sklearn.metrics import accuracy_score
model.eval()

all_preds = []
all_labels = []

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        preds = torch.argmax(outputs, dim=1)

        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

accuracy = accuracy_score(all_labels, all_preds)
print(f"Test Accuracy: {accuracy * 100:.2f}%")


Test Accuracy: 96.55%


In [16]:
# Saving the mode
torch.save(model.state_dict(), "scream_cnn_model.pth")
print("Model saved as scream_cnn_model.pth")


Model saved as scream_cnn_model.pth
