# Unzip datafiles


In [1]:
import zipfile


with zipfile.ZipFile('/content/drive/MyDrive/archive(4).zip', 'r') as zip_ref:
    zip_ref.extractall('/content/drive/MyDrive/Colab Notebooks')

# Importing Libraries

In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import sklearn

import librosa
import librosa.display
import IPython.display as ipd
import warnings
warnings.filterwarnings('ignore')
from tqdm import tqdm

# PREPROCESSING AND EXTRACTING AUDIO FEATURES

In [3]:
def code_extractor(filename):
    cnt=0
    str = ''
    for ch in filename:
        if(ch=='_'):
            cnt+=1;
        elif(cnt==2):
            str += ch
    return str

In [6]:
def feature_extractor(y, sr):

    S = np.abs(librosa.stft(y))

    tonnetz = librosa.feature.tonnetz(y=y, sr=sr)
    tonnetz_mean = np.mean(tonnetz.T, axis=0)
    tonnetz_var = np.var(tonnetz.T, axis=0)
    features = np.append(tonnetz_mean, tonnetz_var)

    #zero_crossing_rate = librosa.feature.zero_crossing_rate(y)
    #zero_crossing_rate_mean = np.mean(zero_crossing_rate)
    #zero_crossing_rate_var = np.var(zero_crossing_rate)
    #features = np.append(features, [zero_crossing_rate_mean, zero_crossing_rate_var])

    spec_centroid = librosa.feature.spectral_centroid(sr=sr, S=S)
    spec_centroid_mean = np.mean(spec_centroid, axis = 1)
    spec_centroid_var = np.var(spec_centroid, axis = 1)
    features = np.append(features, [spec_centroid_mean, spec_centroid_var])

    mfcc = librosa.feature.mfcc(sr=sr, S=S)
    mfcc_mean = np.mean(mfcc.T, axis = 0)
    mfcc_var = np.var(mfcc.T, axis = 0)
    features = np.append(features, mfcc_mean)
    features = np.append(features, mfcc_var)

    spec_width = librosa.feature.spectral_bandwidth(sr=sr, S=S)
    spec_width_mean = np.mean(spec_width)
    spec_width_var = np.var(spec_width)
    features = np.append(features, [spec_width_mean, spec_width_var])

    spec_contrast = librosa.feature.spectral_contrast(sr=sr, S=S)
    spec_contrast_mean = np.mean(spec_contrast.T, axis = 0)
    spec_contrast_var= np.var(spec_contrast.T, axis = 0)
    features = np.append(features, spec_contrast_mean)
    features = np.append(features, spec_contrast_var)

    return features

In [7]:
import os

xl = pd.read_excel('/content/drive/MyDrive/Colab Notebooks/BVC_Voice_Bio_Public.xlsx')
data = pd.DataFrame()
label = pd.DataFrame()

for dirname, _, filenames in os.walk('/content/drive/MyDrive/Colab Notebooks/multiple_sentences'):
    for filename in filenames:
        y, sr = librosa.load(os.path.join(dirname, filename))
        y = librosa.effects.harmonic(y)
        data = data.append([feature_extractor(y, sr)])
        label = label.append([xl[xl['New_ID']==int(code_extractor(filename))].Sex.item()])
data.insert(70, 'Sex', label[0])

In [8]:
columns = ['tonnetz1_mean', 'tonnetz2_mean', 'tonnetz3_mean', 'tonnetz4_mean',
           'tonnetz5_mean', 'tonnetz6_mean', 'tonnetz1_var', 'tonnetz2_var',
           'tonnetz3_var', 'tonnetz4_var', 'tonnetz5_var', 'tonnetz6_var',
           'spec_centroid_mean', 'spec_centroid_var',
           'mfcc1_mean', 'mfcc2_mean', 'mfcc3_mean', 'mfcc4_mean', 'mfcc5_mean',
           'mfcc6_mean', 'mfcc7_mean', 'mfcc8_mean', 'mfcc9_mean', 'mfcc10_mean',
           'mfcc11_mean', 'mfcc12_mean', 'mfcc13_mean', 'mfcc14_mean', 'mfcc15_mean',
           'mfcc16_mean', 'mfcc17_mean', 'mfcc18_mean', 'mfcc19_mean', 'mfcc20_mean',
           'mfcc1_var', 'mfcc2_var', 'mfcc3_var', 'mfcc4_var', 'mfcc5_var',
           'mfcc6_var', 'mfcc7_var', 'mfcc8_var', 'mfcc9_var', 'mfcc10_var',
           'mfcc11_var', 'mfcc12_var', 'mfcc13_var', 'mfcc14_var', 'mfcc15_var',
           'mfcc16_var', 'mfcc17_var', 'mfcc18_var', 'mfcc19_var', 'mfcc20_var',
           'spec_width_mean', 'spec_width_var', 'spec_contrast1_mean', 'spec_contrast2_mean',
           'spec_contrast3_mean', 'spec_contrast4_mean', 'spec_contrast5_mean', 'spec_contrast6_mean',
           'spec_contrast7_mean', 'spec_contrast_var1', 'spec_contrast2_var', 'spec_contrast3_var',
           'spec_contrast4_var', 'spec_contrast5_var', 'spec_contrast6_var', 'spec_contrast7_var', 'Sex'
          ]
columns = np.asarray(columns)
data.columns = columns;
data.head()

Unnamed: 0,tonnetz1_mean,tonnetz2_mean,tonnetz3_mean,tonnetz4_mean,tonnetz5_mean,tonnetz6_mean,tonnetz1_var,tonnetz2_var,tonnetz3_var,tonnetz4_var,...,spec_contrast6_mean,spec_contrast7_mean,spec_contrast_var1,spec_contrast2_var,spec_contrast3_var,spec_contrast4_var,spec_contrast5_var,spec_contrast6_var,spec_contrast7_var,Sex
0,0.012258,-0.043308,0.014187,0.089551,-0.030602,0.00021,0.020496,0.005455,0.029694,0.026657,...,18.104843,43.302368,38.813899,30.940677,26.360451,11.449553,4.560365,4.104759,16.684918,'Male'
0,-0.002559,-0.062393,-0.148013,0.171587,-0.04165,0.037845,0.009314,0.002079,0.007142,0.010437,...,17.853945,42.105786,22.34116,26.796803,23.531965,9.413186,6.434799,3.024586,47.959942,'Male'
0,-0.064299,-0.049338,-0.017508,0.152659,-0.042449,-0.016234,0.006504,0.003774,0.012554,0.025934,...,20.461254,41.097609,26.730248,33.733398,19.511525,14.468591,6.597677,8.67284,47.993536,'Male'
0,-0.065641,0.021741,0.025934,-0.023877,0.006787,-0.025691,0.006111,0.001668,0.006259,0.033157,...,19.912814,42.634541,33.990237,21.526417,33.542858,5.951909,5.314104,9.298241,48.746468,'Male'
0,-0.042531,-0.000216,-0.040035,0.081545,-0.018598,-0.005203,0.003853,0.002567,0.006093,0.023012,...,17.796764,42.755606,37.44718,32.877207,13.108681,6.88527,3.876811,3.702312,58.690731,'Male'


In [9]:
data.to_csv('sample', encoding='utf-8', index=False)
data.to_excel('sampleXL.xlsx', sheet_name='sheet1', index=False)

# Model Creation

In [23]:
from sklearn.linear_model import  LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score, roc_auc_score, roc_curve
from sklearn import preprocessing
from sklearn.model_selection import train_test_split

In [24]:
y = data['Sex']
X = data.loc[:, data.columns != 'Sex']

col = X.columns
X = preprocessing.MinMaxScaler().fit_transform(X)
X = pd.DataFrame(X, columns = col)

In [25]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [26]:
log = LogisticRegression()

In [27]:
log.fit(X_train , y_train)

In [29]:
import joblib
import os

save_dir = '/content/drive/MyDrive/Colab Notebooks'
os.makedirs(save_dir, exist_ok=True)
joblib.dump(log , os.path.join(save_dir, 'Voice_gender_detection.pkl'))

['/content/drive/MyDrive/Colab Notebooks/Voice_gender_detection.pkl']

In [18]:
pred = log.predict(X_test)

# MODEL EVALUATION

In [19]:
from sklearn.metrics import accuracy_score

In [20]:
accuracy_score(y_test,pred)

0.9269662921348315

In [21]:
from sklearn.metrics import classification_report , accuracy_score , confusion_matrix

report = classification_report(y_test , pred)
print(report)

              precision    recall  f1-score   support

    'Female'       0.93      0.95      0.94       313
      'Male'       0.93      0.90      0.91       221

    accuracy                           0.93       534
   macro avg       0.93      0.92      0.92       534
weighted avg       0.93      0.93      0.93       534



In [22]:
con = confusion_matrix(y_test , pred)
con

array([[297,  16],
       [ 23, 198]])