In [314]:
import librosa
import os
from pydub import AudioSegment
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
import joblib
import tqdm
import shutil
import math

In [4]:
sampling_rate = 22050 # Hz
frame_length = 256 # samples
hop_length = 128 # samples
window_length = 172 # frames

In [5]:
def framing(signal, frame_length=frame_length, hop_length=hop_length):
    n_frames = int((len(signal) - frame_length) / hop_length + 1)
    frames =[]
    for i in range(n_frames):
        frame = signal[i * hop_length : i * hop_length + frame_length]
        frames.append(frame)
    return frames

In [6]:
def windowing(frames, window_length):
    n_windows = int(len(frames) / window_length)
    windows = {}
    for i in range(n_windows):
        window = frames[i * window_length : i * window_length + window_length]
        windows[i] = {'window_frames': window}
    return windows

In [7]:
def normalize_STE(ste_s):
    cl_win_STE = sum(ste_s)
    norm_ste_s = []
    for ste in ste_s:
        norm_ste_s.append(ste / cl_win_STE)
    return norm_ste_s

In [8]:
def STE_frame(windows):
    normalized_frames_ste_s = []
    for i, window in windows.items():
        ste = np.zeros(len(window['window_frames']))
        for j, frame in enumerate(window['window_frames']):
            ste[j] = np.sum(frame**2)
        norm_ste_s = normalize_STE(ste)
        windows[i]['normalized_ste_s'] = norm_ste_s
        normalized_frames_ste_s.extend(norm_ste_s)
    return normalized_frames_ste_s

In [9]:
def preprocess(paths_to_pieces):
    pieces = {}
    for p in paths_to_pieces:
        signal, sr = librosa.load(p, sr=sampling_rate)
        pieces[p] = {'signal': signal[0:110250]}
    for i, (piece_name, data) in enumerate(pieces.items()):
        data['frames'] = framing(data['signal'])
        data['windows'] = windowing(data['frames'], window_length)
        data['normalized_frames_ste_s'] = STE_frame(data['windows'])
        data['MED_s'] = []
        for i, win in data['windows'].items():
            win_MED = min(win['normalized_ste_s'])
            win['MED'] = win_MED
            data['MED_s'].append(win_MED)
        if i % 1000 == 0:
            print(i)
    return [data['MED_s'] for data in pieces.values()]

In [52]:
speech_train_ds = {}
for name in os.listdir('/kaggle/input/train-n-test-datasets/speech_train'):
    speech_train_ds[('/kaggle/input/train-n-test-datasets/speech_train/' + name)] = 1

music_train_ds = {}
for name in os.listdir('/kaggle/input/train-n-test-datasets/music_train'):
    music_train_ds[('/kaggle/input/train-n-test-datasets/music_train/' + name)] = 0

speech_test_ds = {}
for name in os.listdir('/kaggle/input/train-n-test-datasets/speech_test'):
    speech_test_ds[('/kaggle/input/train-n-test-datasets/speech_test/' + name)] = 1

music_test_ds = {}
for name in os.listdir('/kaggle/input/train-n-test-datasets/music_test'):
    music_test_ds[('/kaggle/input/train-n-test-datasets/music_test/' + name)] = 0

In [49]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

In [218]:
len(speech_train_ds), len(music_train_ds), len(speech_test_ds), len(music_test_ds)

(13000, 10001, 2500, 2002)

In [219]:
train_ds = {**music_train_ds, **speech_train_ds}
test_ds = {**music_test_ds, **speech_test_ds}
len(train_ds), len(test_ds)

(23001, 4502)

# new

In [291]:
train_fs = [smpl for smpl in preprocess(list(train_ds.keys()))]

  norm_ste_s.append(ste / cl_win_STE)


In [323]:
bad_smpls = []
for i, ftr in enumerate(train_fs):
    smpl = []
    for v in ftr:
        if math.isnan(v) and i not in bad_smpls:
            bad_smpls.append(i)
bad_smpls

[51,
 469,
 1327,
 3788,
 4983,
 5083,
 5352,
 5842,
 6826,
 7444,
 8114,
 8848,
 8990,
 9648,
 9935,
 12722,
 14590]

In [324]:
train_features = []
for i, ftr in enumerate(train_fs):
    if not i in bad_smpls:
        train_features.append(ftr)

In [328]:
train_labels = []
for i, lbl in enumerate(list(train_ds.values())):
    if not i in bad_smpls:
        train_labels.append([lbl])

In [329]:
len(train_features), len(train_labels)

(22984, 22984)

In [331]:
train_features_tnsr = torch.tensor(train_features, dtype=torch.float32)

In [332]:
train_labels_tnsr = torch.tensor(train_labels, dtype=torch.float32)

In [352]:
torch.save(train_features_tnsr, 'train_features_tensor.pt')
torch.save(train_labels_tnsr, 'train_labels_tensor.pt')

In [364]:
class BinaryClassificationModel(nn.Module):
    def __init__(self):
        super(BinaryClassificationModel, self).__init__()
        self.fc1 = nn.Linear(5, 100)
        self.fc2 = nn.Linear(100, 1)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.sigmoid(x)
        return x

In [365]:
model = BinaryClassificationModel()
criterion = nn.BCELoss()
optimizer = optim.SGD(model.parameters(), lr=0.001)

In [366]:
for epoch in range(100):
    model.train()
    optimizer.zero_grad()
    outputs = model(train_features_tnsr)
    loss = criterion(outputs, train_labels_tnsr)
    loss.backward()
    optimizer.step()

    print(f'Epoch {epoch+1}, Loss: {loss.item()}')

Epoch 1, Loss: 0.6931473016738892
Epoch 2, Loss: 0.6931473016738892
Epoch 3, Loss: 0.6931473016738892
Epoch 4, Loss: 0.6931473016738892
Epoch 5, Loss: 0.6931473016738892
Epoch 6, Loss: 0.6931473016738892
Epoch 7, Loss: 0.6931473016738892
Epoch 8, Loss: 0.6931473016738892
Epoch 9, Loss: 0.6931473016738892
Epoch 10, Loss: 0.6931473016738892
Epoch 11, Loss: 0.6931473016738892
Epoch 12, Loss: 0.6931473016738892
Epoch 13, Loss: 0.6931473016738892
Epoch 14, Loss: 0.6931473016738892
Epoch 15, Loss: 0.6931473016738892
Epoch 16, Loss: 0.6931473016738892
Epoch 17, Loss: 0.6931473016738892
Epoch 18, Loss: 0.6931473016738892
Epoch 19, Loss: 0.6931473016738892
Epoch 20, Loss: 0.6931473016738892
Epoch 21, Loss: 0.6931473016738892
Epoch 22, Loss: 0.6931473016738892
Epoch 23, Loss: 0.6931473016738892
Epoch 24, Loss: 0.6931473016738892
Epoch 25, Loss: 0.6931473016738892
Epoch 26, Loss: 0.6931473016738892
Epoch 27, Loss: 0.6931473016738892
Epoch 28, Loss: 0.6931473016738892
Epoch 29, Loss: 0.69314730167

In [349]:
test_fs = [smpl for smpl in preprocess(list(test_ds.keys()))]

In [350]:
test_features_tnsr = torch.tensor(test_fs, dtype=torch.float32)

In [351]:
test_labels_tnsr = torch.tensor(list(test_ds.values()), dtype=torch.float32)

In [None]:
torch.save(test_features_tnsr, 'test_features_tensor.pt')
torch.save(test_labels_tnsr, 'test_labels_tensor.pt')

In [356]:
from sklearn.metrics import accuracy_score, roc_auc_score

In [367]:
model.eval()
with torch.no_grad():
    test_outputs = model(test_features_tnsr)
    
predictions = (test_outputs > 0.5).float()

accuracy = accuracy_score(test_labels_tnsr.numpy(), predictions.numpy())
print(f'Accuracy: {accuracy}')

roc_auc = roc_auc_score(test_labels_tnsr.numpy(), test_outputs.numpy())
print(f'ROC-AUC Score: {roc_auc}')

Accuracy: 0.44469124833407375
ROC-AUC Score: 0.5


In [368]:
torch.save(model.state_dict(), 'trained SMD')