In [16]:
import pandas as pd
import numpy as np

import os
import sys

# librosa is a Python library for analyzing audio and music. It can be used to extract the data from the audio files we will see it later.
import librosa
import librosa.display
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split

# to play the audio files
from IPython.display import Audio



import warnings
if not sys.warnoptions:
    warnings.simplefilter("ignore")
warnings.filterwarnings("ignore", category=DeprecationWarning) 

In [17]:
metadata=pd.read_csv('BVC_Voice_Bio_Public.csv')
metadata.drop(metadata.columns[metadata.columns.str.contains('unnamed',case = False)],axis = 1, inplace = True)
metadata

Unnamed: 0,New_ID,Sex,Age,Ethnicity
0,4001,'Male',19,'Igbo'
1,4002,'Male',23,'Igbo'
2,4003,'Female',18,'Ikwerre'
3,4004,'Male',23,'Annang'
4,4005,'Female',17,'Igbo'
...,...,...,...,...
555,4993,'Male',18,'Igede'
556,4994,'Male',17,'Okirika'
557,4995,'Male',22,'Igbo'
558,4998,'Female',23,'Igbo'


# prepare the data 

In [18]:
import os
import pandas as pd

new_df = pd.DataFrame(columns=['ID', 'AudioPath', 'Sex'])

cdir = 'one_sentence/one_sentence/'

for new_id, Sex in zip(metadata['New_ID'], metadata['Sex']):
    audio_filenames = os.listdir(cdir)
    
    audio_filename = next((filename for filename in audio_filenames if str(new_id) in filename), None)

    if audio_filename is not None:
        audio_path = os.path.join(cdir, audio_filename)
        new_df = new_df.append({'ID': new_id, 'AudioPath': audio_path, 'Sex': Sex}, ignore_index=True)

print(new_df)


       ID                                   AudioPath       Sex
0    4001  one_sentence/one_sentence/S_01_4001_VE.wav    'Male'
1    4003  one_sentence/one_sentence/S_01_4003_VE.wav  'Female'
2    4004  one_sentence/one_sentence/S_01_4004_VE.wav    'Male'
3    4006  one_sentence/one_sentence/S_01_4006_VE.wav  'Female'
4    4007  one_sentence/one_sentence/S_01_4007_VE.wav  'Female'
..    ...                                         ...       ...
330  4990  one_sentence/one_sentence/S_01_4990_VE.wav    'Male'
331  4991  one_sentence/one_sentence/S_01_4991_VE.wav    'Male'
332  4994  one_sentence/one_sentence/S_01_4994_VE.wav    'Male'
333  4998  one_sentence/one_sentence/S_01_4998_VE.wav  'Female'
334  4999  one_sentence/one_sentence/S_01_4999_VE.wav    'Male'

[335 rows x 3 columns]


In [19]:
new_df

Unnamed: 0,ID,AudioPath,Sex
0,4001,one_sentence/one_sentence/S_01_4001_VE.wav,'Male'
1,4003,one_sentence/one_sentence/S_01_4003_VE.wav,'Female'
2,4004,one_sentence/one_sentence/S_01_4004_VE.wav,'Male'
3,4006,one_sentence/one_sentence/S_01_4006_VE.wav,'Female'
4,4007,one_sentence/one_sentence/S_01_4007_VE.wav,'Female'
...,...,...,...
330,4990,one_sentence/one_sentence/S_01_4990_VE.wav,'Male'
331,4991,one_sentence/one_sentence/S_01_4991_VE.wav,'Male'
332,4994,one_sentence/one_sentence/S_01_4994_VE.wav,'Male'
333,4998,one_sentence/one_sentence/S_01_4998_VE.wav,'Female'


# MFCC Extraction feature 

In [20]:
df = pd.DataFrame(columns=['feature'])

counter=0
for index,path in enumerate(new_df.AudioPath):
    X, sample_rate = librosa.load(path)
    
    mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate,n_mfcc=13) axis=0)
    df.loc[counter] = [mfccs]
    counter=counter+1   

print(len(df))
df.head()

335


Unnamed: 0,feature
0,"[-18.175413, -18.762264, -20.23277, -19.647663..."
1,"[-20.833927, -21.177696, -24.323574, -25.75362..."
2,"[-23.72864, -23.236788, -24.520565, -27.393017..."
3,"[-26.813719, -26.398699, -27.694794, -28.77627..."
4,"[-17.551044, -14.641887, -13.771718, -14.99221..."


In [21]:
pip install numpy==1.21

Note: you may need to restart the kernel to use updated packages.


In [22]:
df = pd.concat([new_df,pd.DataFrame(df['feature'].values.tolist())],axis=1)
df[:5]

Unnamed: 0,ID,AudioPath,Sex,0,1,2,3,4,5,6,...,334,335,336,337,338,339,340,341,342,343
0,4001,one_sentence/one_sentence/S_01_4001_VE.wav,'Male',-18.175413,-18.762264,-20.232771,-19.647663,-18.840992,-19.883558,-19.45681,...,,,,,,,,,,
1,4003,one_sentence/one_sentence/S_01_4003_VE.wav,'Female',-20.833927,-21.177696,-24.323574,-25.753622,-26.207659,-25.526674,-23.309019,...,,,,,,,,,,
2,4004,one_sentence/one_sentence/S_01_4004_VE.wav,'Male',-23.72864,-23.236788,-24.520565,-27.393017,-27.991961,-27.391615,-24.116264,...,,,,,,,,,,
3,4006,one_sentence/one_sentence/S_01_4006_VE.wav,'Female',-26.813719,-26.398699,-27.694794,-28.776274,-28.260632,-26.799578,-26.709593,...,,,,,,,,,,
4,4007,one_sentence/one_sentence/S_01_4007_VE.wav,'Female',-17.551044,-14.641887,-13.771718,-14.992212,-14.953177,-15.66591,-15.971464,...,,,,,,,,,,


In [23]:
df=df.fillna(0)
print(df.shape)
df[:5]

(335, 347)


Unnamed: 0,ID,AudioPath,Sex,0,1,2,3,4,5,6,...,334,335,336,337,338,339,340,341,342,343
0,4001,one_sentence/one_sentence/S_01_4001_VE.wav,'Male',-18.175413,-18.762264,-20.232771,-19.647663,-18.840992,-19.883558,-19.45681,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,4003,one_sentence/one_sentence/S_01_4003_VE.wav,'Female',-20.833927,-21.177696,-24.323574,-25.753622,-26.207659,-25.526674,-23.309019,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,4004,one_sentence/one_sentence/S_01_4004_VE.wav,'Male',-23.72864,-23.236788,-24.520565,-27.393017,-27.991961,-27.391615,-24.116264,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,4006,one_sentence/one_sentence/S_01_4006_VE.wav,'Female',-26.813719,-26.398699,-27.694794,-28.776274,-28.260632,-26.799578,-26.709593,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,4007,one_sentence/one_sentence/S_01_4007_VE.wav,'Female',-17.551044,-14.641887,-13.771718,-14.992212,-14.953177,-15.66591,-15.971464,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [24]:
df.drop(['AudioPath', 'ID'], axis='columns', inplace=True)
df

Unnamed: 0,Sex,0,1,2,3,4,5,6,7,8,...,334,335,336,337,338,339,340,341,342,343
0,'Male',-18.175413,-18.762264,-20.232771,-19.647663,-18.840992,-19.883558,-19.456810,-16.310200,-16.304497,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,'Female',-20.833927,-21.177696,-24.323574,-25.753622,-26.207659,-25.526674,-23.309019,-21.799061,-22.417704,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,'Male',-23.728640,-23.236788,-24.520565,-27.393017,-27.991961,-27.391615,-24.116264,-22.924234,-21.550385,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,'Female',-26.813719,-26.398699,-27.694794,-28.776274,-28.260632,-26.799578,-26.709593,-27.302380,-27.737309,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,'Female',-17.551044,-14.641887,-13.771718,-14.992212,-14.953177,-15.665910,-15.971464,-15.392863,-15.014594,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
330,'Male',-17.885078,-19.087559,-20.567562,-20.407108,-20.240883,-20.666222,-21.352287,-21.591372,-21.577139,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
331,'Male',-27.856340,-25.090708,-24.633238,-23.830137,-22.626913,-23.280348,-24.800947,-25.598581,-27.327532,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
332,'Male',-33.140629,-30.330515,-29.802738,-29.563713,-29.638454,-30.491890,-29.830095,-29.024952,-28.524021,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
333,'Female',-20.658417,-18.282227,-11.193456,-8.359219,-9.174542,-10.339983,-10.806499,-10.634888,-10.348563,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [25]:
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import SGDClassifier, LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

from sklearn.metrics import confusion_matrix, accuracy_score, roc_auc_score, roc_curve
from sklearn import preprocessing
from sklearn.model_selection import train_test_split

# feature normalization 

In [26]:
y = df['Sex']
X = df.loc[:, 1:]

col = X.columns
X = preprocessing.MinMaxScaler().fit_transform(X)
X = pd.DataFrame(X, columns = col)

In [27]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [28]:
mean = np.mean(X_train, axis=0)
std = np.std(X_train, axis=0)

X_train = (X_train - mean)/std
X_test = (X_test - mean)/std

X_train

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,334,335,336,337,338,339,340,341,342,343
30,0.813548,0.805867,0.707948,0.568615,0.547551,0.867387,0.942817,0.954966,1.005410,0.853687,...,0.065512,0.065512,0.065512,0.065512,0.065512,0.065512,0.065512,0.065512,0.065512,0.065512
22,0.496558,0.454788,0.736870,1.196907,1.119694,0.859300,0.967375,1.145580,0.786946,0.535667,...,0.065512,0.065512,0.065512,0.065512,0.065512,0.065512,0.065512,0.065512,0.065512,0.065512
285,0.445032,0.397324,0.387861,0.220478,0.094563,0.037737,-0.043396,0.099433,0.360018,0.423460,...,0.065512,0.065512,0.065512,0.065512,0.065512,0.065512,0.065512,0.065512,0.065512,0.065512
295,1.031729,1.083843,1.089206,1.009291,0.847201,0.654966,0.595717,0.470395,0.441273,0.239500,...,0.065512,0.065512,0.065512,0.065512,0.065512,0.065512,0.065512,0.065512,0.065512,0.065512
56,-0.591630,-0.529723,-0.878532,-1.238641,-1.145130,-0.852272,-0.406244,0.225750,0.508321,0.527045,...,0.065512,0.065512,0.065512,0.065512,0.065512,0.065512,0.065512,0.065512,0.065512,0.065512
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
188,0.094900,0.137356,0.077728,-0.091378,-0.280836,-0.295213,-0.321191,-0.492755,0.422270,0.989342,...,0.065512,0.065512,0.065512,0.065512,0.065512,0.065512,0.065512,0.065512,0.065512,0.065512
71,0.941323,1.052064,0.915713,0.603070,0.486171,0.415712,0.292921,0.186306,0.080756,0.003582,...,0.065512,0.065512,0.065512,0.065512,0.065512,0.065512,0.065512,0.065512,0.065512,0.065512
106,0.159333,0.009651,0.051094,0.062676,0.003884,-0.137015,-0.226112,-0.307564,-0.557597,-0.610116,...,0.065512,0.065512,0.065512,0.065512,0.065512,0.065512,0.065512,0.065512,0.065512,0.065512
270,-0.566446,0.363149,0.875457,0.465368,0.592180,1.252980,1.681510,1.813552,1.704920,1.275332,...,0.065512,0.065512,0.065512,0.065512,0.065512,0.065512,0.065512,0.065512,0.065512,0.065512


In [29]:
def model_assess(model, title = "Default"):
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    print('Accuracy', title, ':', round(accuracy_score(y_test, preds), 5), '\n')

# Random Forest model

In [30]:

rforest = RandomForestClassifier(n_estimators=1000, max_depth=10, random_state=0)
model_assess(rforest, "Random Forest")


Accuracy Random Forest : 0.70297 



# Number of Trees (n_estimators): Default value is 100.
# Maximum Depth (max_depth): Default value is None, which means the tree will expand until all leaves are pure or contain minimum samples.
# Minimum Samples Split (min_samples_split): Default value is 2, indicating that a node must have at least 2 samples to be eligible for splitting.
# Minimum Samples Leaf (min_samples_leaf): Default value is 1. It specifies that a leaf node must have at least 1 sample.
# Maximum Features (max_features): Default value is "auto", which considers the square root of the total number of features for classification tasks and all features for regression tasks.
# Feature Importance (feature_importances_): The importance of features is calculated after training the Random Forest model and is available in the feature_importances_ attribute.