In [2]:
!pip install noisereduce
!pip install librosa

Collecting noisereduce
  Downloading noisereduce-3.0.3-py3-none-any.whl.metadata (14 kB)
Downloading noisereduce-3.0.3-py3-none-any.whl (22 kB)
Installing collected packages: noisereduce
Successfully installed noisereduce-3.0.3


In [None]:
import numpy as np
import librosa
import noisereduce as nr
from scipy.signal import butter, lfilter
import os
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

In [None]:
def butter_bandpass(lowcut, highcut, fs, order=5):
    nyq = 0.5 * fs 
    low = lowcut / nyq
    high = highcut / nyq
    b, a = butter(order, [low, high], btype='band')
    return b, a

def bandpass_filter(data, lowcut=300.0, highcut=3400.0, fs=16000, order=5):
    b, a = butter_bandpass(lowcut, highcut, fs, order=order)
    y = lfilter(b, a, data)
    return y

def reduce_noise(y, sr):
    noise_sample = y[:int(0.5*sr)]
    reduced = nr.reduce_noise(y=y, sr=sr, y_noise=noise_sample)
    return reduced

def segment_audio(y, sr=16000, segment_length=3):
    total_length = segment_length * sr
    if len(y) < total_length:
        y = librosa.util.fix_length(y, total_length)
    elif len(y) > total_length:
        y = y[:total_length]
    return y

In [None]:
def preprocess_audio(file_path, sr=16000):
    y, _ = librosa.load(file_path, sr=sr)
    y = bandpass_filter(y, fs=sr)
    y = reduce_noise(y, sr=sr)
    y = segment_audio(y, sr=sr, segment_length=3)

    return y

In [None]:
def extract_features(y, sr=16000):
    # 1. MFCC
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    mfcc_mean = np.mean(mfcc, axis=1)
    mfcc_std = np.std(mfcc, axis=1)
    mfcc_median = np.median(mfcc, axis=1)

    # 2. Chroma
    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    chroma_mean = np.mean(chroma, axis=1)
    chroma_std = np.std(chroma, axis=1)

    # 3. Spectral Contrast
    spec_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
    spec_contrast_mean = np.mean(spec_contrast, axis=1)
    spec_contrast_std = np.std(spec_contrast, axis=1)

    # 4. Spectral Bandwidth
    spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr)
    spectral_bandwidth_std = np.std(spectral_bandwidth)

    # 5. Zero-Crossing Rate
    zcr = librosa.feature.zero_crossing_rate(y)
    zcr_median = np.median(zcr)

    # 6. Tonnetz
    y_harmonic = librosa.effects.harmonic(y)
    tonnetz = librosa.feature.tonnetz(y=y_harmonic, sr=sr)
    tonnetz_mean = np.mean(tonnetz, axis=1)
    tonnetz_std = np.std(tonnetz, axis=1)

    feature_vector = np.concatenate([
        mfcc_mean, mfcc_std, mfcc_median,
        chroma_mean, chroma_std,
        spec_contrast_mean, spec_contrast_std,
        [spectral_bandwidth_std],
        [zcr_median],
        tonnetz_mean, tonnetz_std
    ])

    return feature_vector


In [19]:
dataset_path = '/kaggle/input/librisevoc-1k/LibriSeVoc-1k'
os.listdir(dataset_path)

['wavenet',
 'diffwave',
 'gt',
 'melgan',
 'wavegrad',
 'wavernn',
 'parallel_wave_gan']

In [None]:
features = []
labels = []

for label_folder in os.listdir(dataset_path):
    label_folder_path = os.path.join(dataset_path, label_folder)

    if os.path.isdir(label_folder_path):
        count = 0
        for audio_file in os.listdir(label_folder_path):
            if label_folder != 'gt' and count == 200: break
            count += 1
            audio_file_path = os.path.join(label_folder_path, audio_file)

            y = preprocess_audio(audio_file_path)

            feature_vector = extract_features(y)
            features.append(feature_vector)

            labels.append(0 if label_folder == 'gt' else 1)
    print("Done folder", label_folder)

features = np.array(features)
labels = np.array(labels)

print("Features shape:", features.shape)
print("Labels shape:", labels.shape)

Done folder wavenet
Done folder diffwave
Done folder gt
Done folder melgan
Done folder wavegrad
Done folder wavernn
Done folder parallel_wave_gan
Features shape: (2200, 91)
Labels shape: (2200,)
[1 1 1 ... 1 1 1]


In [None]:
import random

full_set = list(range(91))

subsets = set()

while len(subsets) < 50:
    subset = tuple(sorted(random.sample(full_set, 60)))
    subsets.add(subset)

subsets = list(subsets)
for i, s in enumerate(subsets[:5], 1):
    print(f"Tập con {i}: {s}")


Tập con 1: (0, 2, 3, 4, 6, 9, 10, 15, 16, 17, 18, 20, 21, 23, 24, 26, 31, 32, 33, 35, 36, 37, 38, 39, 43, 45, 46, 47, 49, 50, 51, 52, 53, 55, 56, 57, 59, 60, 61, 62, 63, 64, 66, 67, 68, 70, 72, 73, 74, 75, 76, 78, 79, 82, 83, 86, 87, 88, 89, 90)
Tập con 2: (2, 4, 5, 6, 9, 12, 14, 16, 17, 19, 21, 22, 23, 24, 25, 27, 28, 29, 30, 32, 33, 34, 36, 38, 39, 40, 41, 42, 43, 45, 46, 47, 48, 51, 52, 53, 54, 55, 56, 60, 61, 63, 65, 66, 67, 68, 69, 71, 72, 73, 75, 77, 78, 79, 80, 82, 85, 86, 88, 90)
Tập con 3: (0, 1, 2, 4, 6, 7, 8, 10, 12, 13, 14, 18, 19, 20, 22, 23, 24, 26, 27, 28, 29, 31, 32, 34, 35, 36, 37, 39, 40, 45, 49, 50, 52, 53, 54, 56, 57, 58, 62, 63, 64, 65, 67, 68, 69, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 82, 83, 86, 87, 90)
Tập con 4: (0, 1, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 16, 18, 19, 20, 21, 22, 24, 25, 26, 27, 28, 30, 32, 37, 39, 40, 44, 46, 47, 49, 50, 52, 54, 55, 56, 57, 58, 60, 62, 63, 66, 67, 68, 69, 70, 71, 75, 76, 77, 78, 80, 81, 82, 84, 85, 86, 88, 89)
Tập con 5: (0, 1,

In [None]:
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

svm_models = []
best_C_values = []

for i, subset in enumerate(subsets):
    X_sub = X_train[:, list(subset)]
    y_sub = y_train
    
    pipe = Pipeline([
        ('scaler', StandardScaler()),
        ('svm', SVC(probability=True))
    ])

    grid = GridSearchCV(pipe, {'svm__C': [0.01, 0.1, 1, 10]}, cv=3)
    grid.fit(X_sub, y_sub)

    svm_models.append((subset, grid.best_estimator_))
    best_C_values.append(grid.best_params_['svm__C'])


Subset 0 done.
Subset 1 done.
Subset 2 done.
Subset 3 done.
Subset 4 done.
Subset 5 done.
Subset 6 done.
Subset 7 done.
Subset 8 done.
Subset 9 done.
Subset 10 done.
Subset 11 done.
Subset 12 done.
Subset 13 done.
Subset 14 done.
Subset 15 done.
Subset 16 done.
Subset 17 done.
Subset 18 done.
Subset 19 done.
Subset 20 done.
Subset 21 done.
Subset 22 done.
Subset 23 done.
Subset 24 done.
Subset 25 done.
Subset 26 done.
Subset 27 done.
Subset 28 done.
Subset 29 done.
Subset 30 done.
Subset 31 done.
Subset 32 done.
Subset 33 done.
Subset 34 done.
Subset 35 done.
Subset 36 done.
Subset 37 done.
Subset 38 done.
Subset 39 done.
Subset 40 done.
Subset 41 done.
Subset 42 done.
Subset 43 done.
Subset 44 done.
Subset 45 done.
Subset 46 done.
Subset 47 done.
Subset 48 done.
Subset 49 done.
Subset 50 done.
Subset 51 done.
Subset 52 done.
Subset 53 done.
Subset 54 done.
Subset 55 done.
Subset 56 done.
Subset 57 done.
Subset 58 done.
Subset 59 done.
Subset 60 done.
Subset 61 done.
Subset 62 done.
Su

In [None]:
import numpy as np

def ensemble_predict(X):
    probs = []
    for subset, model in svm_models:
        X_sub = X[:, list(subset)]
        probs.append(model.predict_proba(X_sub))
    avg_probs = np.mean(probs, axis=0)
    return np.argmax(avg_probs, axis=1)

y_pred = ensemble_predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.65      0.66      0.65       193
           1       0.73      0.72      0.73       247

    accuracy                           0.70       440
   macro avg       0.69      0.69      0.69       440
weighted avg       0.70      0.70      0.70       440

