In [1]:
!pip install python_speech_features

Collecting python_speech_features
  Downloading https://files.pythonhosted.org/packages/ff/d1/94c59e20a2631985fbd2124c45177abaa9e0a4eee8ba8a305aa26fc02a8e/python_speech_features-0.6.tar.gz
Building wheels for collected packages: python-speech-features
  Building wheel for python-speech-features (setup.py) ... [?25l[?25hdone
  Created wheel for python-speech-features: filename=python_speech_features-0.6-cp36-none-any.whl size=5887 sha256=58b1ebccb88859165a10a099ba2abb75a7068eae333c40cdb441c6db5be28a96
  Stored in directory: /root/.cache/pip/wheels/3c/42/7c/f60e9d1b40015cd69b213ad90f7c18a9264cd745b9888134be
Successfully built python-speech-features
Installing collected packages: python-speech-features
Successfully installed python-speech-features-0.6


In [2]:
!unzip '/content/drive/MyDrive/Machine Learning/LUMS_FALL2020_PROJECT_DATA1.zip' -d '/content/'

Archive:  /content/drive/MyDrive/Machine Learning/LUMS_FALL2020_PROJECT_DATA1.zip
   creating: /content/Gender_Recognition/Test/
   creating: /content/Gender_Recognition/Test/SPK083_M/
  inflating: /content/Gender_Recognition/Test/SPK083_M/1.wav  
  inflating: /content/Gender_Recognition/Test/SPK083_M/10.wav  
  inflating: /content/Gender_Recognition/Test/SPK083_M/2.wav  
  inflating: /content/Gender_Recognition/Test/SPK083_M/3.wav  
  inflating: /content/Gender_Recognition/Test/SPK083_M/4.wav  
  inflating: /content/Gender_Recognition/Test/SPK083_M/5.wav  
  inflating: /content/Gender_Recognition/Test/SPK083_M/6.wav  
  inflating: /content/Gender_Recognition/Test/SPK083_M/7.wav  
  inflating: /content/Gender_Recognition/Test/SPK083_M/8.wav  
  inflating: /content/Gender_Recognition/Test/SPK083_M/9.wav  
   creating: /content/Gender_Recognition/Test/SPK084_M/
  inflating: /content/Gender_Recognition/Test/SPK084_M/1.wav  
  inflating: /content/Gender_Recognition/Test/SPK084_M/10.wav  
 

In [4]:
import python_speech_features as mfcc
from scipy.io.wavfile import read
import numpy as np
import pandas as pd
import re
import os
from sklearn.metrics import confusion_matrix,accuracy_score,f1_score
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix,accuracy_score,f1_score,classification_report
from sklearn.model_selection import GridSearchCV
from sklearn.neural_network import MLPClassifier
from sklearn.svm import LinearSVC
from sklearn.naive_bayes import MultinomialNB,GaussianNB
from sklearn.preprocessing import normalize,StandardScaler,MinMaxScaler
from sklearn.pipeline import Pipeline

In [5]:
def get_MFCC(audio, sr):
    features = mfcc.mfcc(audio, sr, 0.025, 0.01, 13, appendEnergy = True)
    return np.mean(features, axis=0)

In [6]:
# Function which sorts files in a directory
def sorted_alphanumeric(data):
    convert = lambda text: int(text) if text.isdigit() else text.lower()
    alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ] 
    return sorted(data, key=alphanum_key)

In [7]:
# feature extraction for speaker recognition
def feature_extraction_S(dir):
  df=pd.DataFrame()
  Y=[]
  for i in sorted_alphanumeric(os.listdir(dir)):
    for files in os.listdir(dir+i):
      sr, audio = read(dir+i+'/'+files)
      features = get_MFCC(audio, sr)
      row=pd.Series(features)
      df=df.append(row,ignore_index=True)
      Y.append(int(i[3:6]))
  return df,Y

In [8]:
# feature extraction for gender recognition
def gen_features_G(dir):
  df=pd.DataFrame()
  Y=[]
  for i in sorted_alphanumeric(os.listdir(dir)):
    for files in os.listdir(dir+i):
      sr, audio = read(dir+i+'/'+files)
      features = get_MFCC(audio, sr)
      row=pd.Series(features)
      df=df.append(row,ignore_index=True)
      if i[-1]=='F':
        Y.append(0)
      elif i[-1]=='M':
        Y.append(1)
  return df,Y

**Speaker Recognition**

In [9]:
# Directories of all datasets
dir_train='/content/Speaker_Recognition/Train/'
dir_test='/content/Speaker_Recognition/Test/'
dir_train_G='/content/Gender_Recognition/Train/'
dir_test_G='/content/Gender_Recognition/Test/'


In [10]:
# Feature extraction for training for speaker recognition
train_X_speaker,train_Y_speaker=feature_extraction_S(dir_train)

In [11]:
# Feature extraction for testing for speaker recognition
test_X_speaker,test_Y_speaker=feature_extraction_S(dir_test)

In [12]:
# Feature extraction for training for gender recognition
train_X_gender,train_Y_gender=gen_features_G(dir_train_G)
# Feature extraction for testing for gender recognition
test_X_gender,test_Y_gender=gen_features_G(dir_test_G)

**Multi Layer Perceptron for Speaker Recognition**

In [62]:
# Creating object for Multi layer Perceptron
mlp=MLPClassifier()

In [63]:
parameters= {'hidden_layer_sizes':[(128,64),(64),(64,32),(32)],
             'activation': ['logistic'],
             'solver':['sgd'],
             'learning_rate_init':[0.4,0.1,0.01],
             'random_state':[1],
             'max_iter':[5000]}

In [None]:
# Performing grid search to train model with best parameters 
# This may take 10-15 mins
gridsearchcv=GridSearchCV(mlp,parameters,n_jobs=-1,scoring='f1_macro')
model=gridsearchcv.fit(train_X_speaker,train_Y_speaker)

In [None]:
# Predicting labels of test files for speaker recognition
pred=model.predict(test_X_speaker)

In [21]:
print('Best Parameters: \n\n',model.best_params_)

Best Parameters: 

 {'activation': 'logistic', 'hidden_layer_sizes': 64, 'learning_rate_init': 0.01, 'max_iter': 5000, 'random_state': 1, 'solver': 'sgd'}


In [22]:
print('Best f1_macro Score: ', model.best_score_)

Best f1_macro Score:  0.9200469483568074


In [23]:
print('Best Estimator: ',model.best_estimator_)

Best Estimator:  MLPClassifier(activation='logistic', alpha=0.0001, batch_size='auto',
              beta_1=0.9, beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=64, learning_rate='constant',
              learning_rate_init=0.01, max_fun=15000, max_iter=5000,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=1, shuffle=True, solver='sgd',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)


In [24]:
accuracyscore=accuracy_score(test_Y_speaker,pred)
print('Accuracy Score: ',accuracyscore)

Accuracy Score:  0.9401408450704225


In [None]:
report=classification_report(test_Y_speaker,pred)
print('Classification Report: \n\n',report)

In [None]:
cm=confusion_matrix(test_Y_speaker,pred)
print('Confusion Matrix: \n',cm)

**Multi Layer Perceptron for Gender Recognition**

In [43]:
# Creating object for Multi layer Perceptron
mlp=MLPClassifier()
# Parameters provided on document.
parameters= {'hidden_layer_sizes':[(128,64),(64),(64,32),(32)],
             'activation': ['logistic'],
             'solver':['sgd'],
             'learning_rate_init':[0.4,0.1,0.01],
             'random_state':[1],
             'max_iter':[5000]}

# Performing grid search to train model with best parameters 
# This may take 3-4 mins
gridsearchcv=GridSearchCV(mlp,parameters,n_jobs=-1,scoring='f1_macro')
model=gridsearchcv.fit(train_X_gender,train_Y_gender)

In [44]:
print('Best f1_macro Score: ', model.best_score_)

Best f1_macro Score:  0.7681556796023556


In [45]:
print('Best Estimator: ',model.best_estimator_)

Best Estimator:  MLPClassifier(activation='logistic', alpha=0.0001, batch_size='auto',
              beta_1=0.9, beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=64, learning_rate='constant',
              learning_rate_init=0.1, max_fun=15000, max_iter=5000,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=1, shuffle=True, solver='sgd',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)


In [46]:
print('Best Parameters: \n\n',model.best_params_)

Best Parameters: 

 {'activation': 'logistic', 'hidden_layer_sizes': 64, 'learning_rate_init': 0.1, 'max_iter': 5000, 'random_state': 1, 'solver': 'sgd'}


In [47]:
# Predicting labels of test files for Gender recognition
pred=model.predict(test_X_gender)
accuracyscore=accuracy_score(test_Y_gender,pred)
print('Accuracy Score: ',accuracyscore)

Accuracy Score:  0.8647058823529412


In [48]:
report=classification_report(test_Y_gender,pred)
print('Classification Report: \n\n',report)

Classification Report: 

               precision    recall  f1-score   support

           0       0.73      0.68      0.70        40
           1       0.90      0.92      0.91       130

    accuracy                           0.86       170
   macro avg       0.82      0.80      0.81       170
weighted avg       0.86      0.86      0.86       170



In [49]:
cm=confusion_matrix(test_Y_gender,pred)
print('Confusion Matrix: \n',cm)

Confusion Matrix: 
 [[ 27  13]
 [ 10 120]]


**Linear Support Vector for Speaker Recognition**

In [50]:
# Creating object of Linear Support Vector
clf=LinearSVC(random_state=0,max_iter=10000,dual=False)
# Creating model
model=clf.fit(train_X_speaker,train_Y_speaker)
# Predicting labels of test files
pred=model.predict(test_X_speaker)
# Calculating accuracy
accuracyscore=accuracy_score(test_Y_speaker,pred)
print('Accuracy Score: ',accuracyscore)

Accuracy Score:  0.8415492957746479


In [51]:
report=classification_report(test_Y_speaker,pred)
print('Classification Report: \n\n',report)

Classification Report: 

               precision    recall  f1-score   support

           1       1.00      0.50      0.67         2
           2       1.00      1.00      1.00         2
           3       1.00      1.00      1.00         2
           4       0.00      0.00      0.00         2
           5       1.00      1.00      1.00         2
           6       0.67      1.00      0.80         2
           7       0.33      1.00      0.50         2
           8       0.50      0.50      0.50         2
           9       0.50      1.00      0.67         2
          10       1.00      1.00      1.00         2
          11       1.00      1.00      1.00         2
          12       1.00      0.50      0.67         2
          13       0.67      1.00      0.80         2
          14       0.67      1.00      0.80         2
          15       1.00      1.00      1.00         2
          16       1.00      1.00      1.00         2
          17       0.67      1.00      0.80         2
 

  _warn_prf(average, modifier, msg_start, len(result))


In [52]:
cm=confusion_matrix(test_Y_speaker,pred)
print('Confusion Matrix: \n',cm)

Confusion Matrix: 
 [[1 0 0 ... 0 0 0]
 [0 2 0 ... 0 0 0]
 [0 0 2 ... 0 0 0]
 ...
 [0 0 0 ... 2 0 0]
 [0 0 0 ... 0 1 0]
 [0 0 0 ... 0 0 2]]


**Linear Support Vector for Gender Recognition**

In [53]:
# Creating object of Linear Support Vector
clf=LinearSVC(random_state=0,max_iter=10000,dual=False)
# Creating model
model=clf.fit(train_X_gender,train_Y_gender)
# Predicting labels for test files
pred=model.predict(test_X_gender)
accuracyscore=accuracy_score(test_Y_gender,pred)
print('Accuracy Score: ',accuracyscore)

Accuracy Score:  0.8352941176470589


In [None]:
report=classification_report(test_Y_gender,pred)
print('Classification Report: \n\n',report)

In [55]:
cm=confusion_matrix(test_Y_gender,pred)
print('Confusion Matrix: \n',cm)

Confusion Matrix: 
 [[ 22  18]
 [ 10 120]]


**Gaussian Naive Bayes for Speaker Recognition**

In [56]:
# Creating object of Gaussian Naive Bayes Classifier
gnb=GaussianNB()
# Using pipeline to scale data
pipe = Pipeline([('scaler', StandardScaler()), ('gnb', gnb)])
# Training model
model=pipe.fit(train_X_speaker,train_Y_speaker)
# Predicting labels for test files
pred=model.predict(test_X_speaker)
# Calculating accuracy score
accuracyscore=accuracy_score(test_Y_speaker,pred)
print('Accuracy Score: ',accuracyscore)

Accuracy Score:  0.9225352112676056


In [None]:
report=classification_report(test_Y_speaker,pred)
print('Classification Report: \n\n',report)

In [58]:
cm=confusion_matrix(test_Y_speaker,pred)
print('Confusion Matrix: \n',cm)

Confusion Matrix: 
 [[1 0 0 ... 0 0 0]
 [0 2 0 ... 0 0 0]
 [0 0 2 ... 0 0 0]
 ...
 [0 0 0 ... 2 0 0]
 [0 0 0 ... 0 2 0]
 [0 0 0 ... 0 0 2]]


**Gaussian Naive Bayes for Gender Recognition**

In [59]:
# Creating object of Gaussian Naive Bayes Classifier
gnb=GaussianNB()
# Using pipeline to scale data
pipe = Pipeline([('scaler', StandardScaler()), ('gnb', gnb)])
# Creating model
model=pipe.fit(train_X_gender,train_Y_gender)
# Predicting labels for test files
pred=model.predict(test_X_gender)
accuracyscore=accuracy_score(test_Y_gender,pred)
print('Accuracy Score: ',accuracyscore)

Accuracy Score:  0.8529411764705882


In [60]:
report=classification_report(test_Y_gender,pred)
print('Classification Report: \n\n',report)

Classification Report: 

               precision    recall  f1-score   support

           0       0.71      0.62      0.67        40
           1       0.89      0.92      0.91       130

    accuracy                           0.85       170
   macro avg       0.80      0.77      0.79       170
weighted avg       0.85      0.85      0.85       170



In [61]:
cm=confusion_matrix(test_Y_gender,pred)
print('Confusion Matrix: \n',cm)

Confusion Matrix: 
 [[ 25  15]
 [ 10 120]]
