## ML Classifier

In [52]:
import os
import librosa
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
from tqdm import tqdm
import torchaudio.functional as F
import torchaudio.transforms as T
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, balanced_accuracy_score, precision_recall_fscore_support, confusion_matrix, ConfusionMatrixDisplay

## Featrue Extraction

In [4]:
sampling_rate = 16000
n_fft = int(0.025 * sampling_rate)
win_length = int(0.025 * sampling_rate)
hop_length = int(0.01 * sampling_rate)
n_mels = 96
n_mfcc = 13
melkwargs={
      'n_fft': n_fft,
      'n_mels': n_mels,
      'hop_length': hop_length,
    }
dirs = "../dataset/feature/npy"
fnames = os.listdir(dirs)

In [36]:
import torchaudio.functional as F
import torchaudio.transforms as T
features = {}
rms_dict = {}
pitch_dict = {}
mfccs_dict = {}
for fname in tqdm(fnames):
    _id = fname.replace(".npy","")
    y = np.load(os.path.join(dirs, fname))
    mfcc_emb = librosa.feature.mfcc(
        y = y.squeeze(0), 
        n_mfcc =n_mfcc,
        sr=sampling_rate, 
        n_fft=n_fft, 
        hop_length=hop_length, 
        n_mels=n_mels,
    )
    mean_mfcc = list(mfcc_emb.mean(axis=1)) # temporal pooling
    std_mfcc = list(mfcc_emb.std(axis=1)) # temporal pooling
    pitchs = F.detect_pitch_frequency(torch.from_numpy(y.squeeze(0)), sampling_rate).numpy()
    mean_pitch = pitch.mean()
    std_pitch = pitch.std()
    rms = librosa.feature.rms(y=y.squeeze(0))
    mean_rms = rms.mean()
    std_rms = rms.std()
    feature = mean_mfcc + std_mfcc + [mean_pitch, std_pitch, mean_rms, std_rms]
    features[_id] = feature
    rms_dict[_id] = rms
    pitch_dict[_id] = pitch
    mfccs_dict[_id] = mfcc_emb

100%|██████████| 488/488 [00:53<00:00,  9.07it/s]


In [54]:
torch.save(features, "../dataset/feature/handcraft/all_features.pt")
torch.save(rms_dict, "../dataset/feature/handcraft/energy.pt")
torch.save(pitch_dict, "../dataset/feature/handcraft/pitch.pt")
torch.save(mfccs_dict, "../dataset/feature/handcraft/mfccs.pt")

In [46]:
fold_case = ['M1','M2','M3','M4','F5','F6','F7','F8']
all_samples = []
all_labels = []
all_preds = []
label_dist = {}
for fold in fold_case:
    df_tr = pd.read_csv(f"../dataset/split/{fold}_train.csv",index_col=0)
    df_va = pd.read_csv(f"../dataset/split/{fold}_valid.csv",index_col=0)
    df_train = pd.concat([df_tr, df_va])
    df_eval = pd.read_csv(f"../dataset/split/{fold}_eval.csv",index_col=0)
    label_dist[fold] = {"tr":df_train.sum(), "te":df_eval.sum()}
    X_train = np.stack([features[idx] for idx in df_train.index])
    y_train = np.stack(list(df_train.idxmax(axis=1)))
    X_test = np.stack([features[idx] for idx in df_eval.index])
    y_test = np.stack(list(df_eval.idxmax(axis=1)))
    
    classifier = make_pipeline(StandardScaler(),LogisticRegression(random_state=42))
    classifier.fit(X_train, y_train)
    predictions = classifier.predict(X_test)
    WA = accuracy_score(y_test, predictions)
    UA = balanced_accuracy_score(y_test, predictions)
    print(WA, UA)
    # WA, UA evaluation
    all_labels.extend(list(y_test))
    all_preds.extend(list(predictions))
    all_samples.extend(list(df_eval.index))

0.5714285714285714 0.46977124183006536
0.5084745762711864 0.5068627450980392
0.55 0.484375
0.42424242424242425 0.4
0.5416666666666666 0.4494949494949495
0.6865671641791045 0.6739028944911298
0.8064516129032258 0.7879464285714286
0.4 0.4035947712418301


In [47]:
results = pd.DataFrame(index=all_samples)
results['all_preds'] = all_preds
results['all_labels'] = all_labels

In [49]:
# weighted_acc & un-weighted acc
WA = accuracy_score(results['all_labels'], results['all_preds'])
UA = balanced_accuracy_score(results['all_labels'], results['all_preds'])

In [50]:
WA, UA

(0.5594262295081968, 0.5499829405904139)