<a href="https://colab.research.google.com/github/suruchi2010/SSN-projects/blob/main/speaker_id_verification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip3 install python_speech_features

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting python_speech_features
  Downloading python_speech_features-0.6.tar.gz (5.6 kB)
Building wheels for collected packages: python-speech-features
  Building wheel for python-speech-features (setup.py) ... [?25l[?25hdone
  Created wheel for python-speech-features: filename=python_speech_features-0.6-py3-none-any.whl size=5888 sha256=3221c397be1efa63fd946a7ed506606ac04e049076d02584cc1b51502555adea
  Stored in directory: /root/.cache/pip/wheels/b0/0e/94/28cd6afa3cd5998a63eef99fe31777acd7d758f59cf24839eb
Successfully built python-speech-features
Installing collected packages: python-speech-features
Successfully installed python-speech-features-0.6


In [None]:
import pickle
import numpy as np

from sklearn.mixture import GaussianMixture
from sklearn import preprocessing
from scipy.io.wavfile import read
from python_speech_features import mfcc
from python_speech_features import delta

import warnings
warnings.filterwarnings("ignore")

In [None]:
from glob import glob 
#glob()

from google.colab import drive
drive.mount('/content/gdrive',force_remount=True)

path = "/content/gdrive/My Drive/speaker id"
audio_files= glob(path + '/*')
audio_test=glob(audio_files[1] + '/*')
audio_train=glob(audio_files[0] + '/*')

#audio_train
audio_test

Mounted at /content/gdrive


['/content/gdrive/My Drive/speaker id/testing/kavitha',
 '/content/gdrive/My Drive/speaker id/testing/madhu',
 '/content/gdrive/My Drive/speaker id/testing/suruchi']

In [None]:
class feature_ext:
    def __init__(self):
        pass
       
    def extract_features(self, audio_path):
        	
        
        data, audio  = read(audio_path)
        print(data)
        mfcc_feature = mfcc(
                            audio,
                            # This displays the sampling rate we are looking for
                            data,
                            
                            winlen       = 0.05, #the length of the analysis window in seconds.
                            
                            winstep      = 0.01, #the step between successive windows in seconds
                            
                            numcep       = 13, #the number of cepstrum to return
                            
                            nfilt        = 30, #the number of filters in the filterbank
                            
                            nfft         = 1024, # the FFT size
                           
                            appendEnergy = True) #if this is true, the zeroth cepstral coefficient is replaced with the log of the total frame energy.
    
        
        mfcc_feature  = preprocessing.scale(mfcc_feature)
        deltas        = delta(mfcc_feature, 2)
        double_deltas = delta(deltas, 2)
        combined      = np.hstack((mfcc_feature, deltas, double_deltas))
        return combined

In [None]:
import os
warnings.filterwarnings("ignore")


class ModelsTrainer:

    def __init__(self,madhu_file, suruchi_file, kavitha_file):
        self.madhu_train   = madhu_file
        self.suruchi_train = suruchi_file
        self.kavitha_train  = kavitha_file
        self.features_extractor    = feature_ext()

    def process(self):
        madhu,suruchi, kavitha = self.get_file_paths(self.madhu_train,
                                             self.kavitha_train,
                                             self.suruchi_train)
        # collect voice features
        voice_feat_madhu   = self.collect_features(madhu)
        voice_feat_suruchi = self.collect_features(suruchi)
        voice_feat_kavitha   = self.collect_features(kavitha)
        
        # generate gaussian mixture models
        madhu_gmm   = GaussianMixture(n_components = 64, max_iter = 1000, covariance_type='full', n_init = 3)
        suruchi_gmm = GaussianMixture(n_components = 64, max_iter = 1000, covariance_type='full', n_init = 3)
        kavitha_gmm   = GaussianMixture(n_components = 64, max_iter = 1000, covariance_type='full', n_init = 3)
        
        # fit features to models
        madhu_gmm.fit(voice_feat_madhu)
        suruchi_gmm.fit(voice_feat_suruchi)
        kavitha_gmm.fit(voice_feat_kavitha)
        

        # save models
        self.save_gmm(madhu_gmm, "madhu")
        self.save_gmm(suruchi_gmm, "suruchi")
        self.save_gmm(kavitha_gmm, "kavitha")
        

    def get_file_paths(self, suruchi_train, kavitha_train, madhu_train):
        # get file paths
        madhu   = [ os.path.join(madhu_train, f) for f in os.listdir(madhu_train) ]
        suruchi = [ os.path.join(suruchi_train, f) for f in os.listdir(suruchi_train) ]   
        kavitha  = [ os.path.join(kavitha_train, f) for f in os.listdir(kavitha_train) ] 
        
    
        return madhu,suruchi, kavitha

    def collect_features(self, files):
        
        features = np.asarray(())
        # extract features for each speaker
        for file in files:
            print("%5s %10s" % ("PROCESSNG ", file))
            # extract MFCC & delta MFCC features from audio
            vector    = self.features_extractor.extract_features(file)
            # stack the features
            if features.size == 0: 
               features = vector
            else:                
                 features = np.vstack((features, vector))
        return features

    def save_gmm(self, gmm, name):
        
        filename = name + ".gmm"
        with open(filename, 'wb') as gmm_file:
            pickle.dump(gmm, gmm_file)
        print ("%5s %10s" % ("SAVING", filename,))


if __name__== "__main__":
    models_trainer = ModelsTrainer(audio_train[0], audio_train[1],audio_train[2])
    models_trainer.process()

PROCESSNG  /content/gdrive/My Drive/speaker id/training/suruchi/suruchi_1.wav
44100




PROCESSNG  /content/gdrive/My Drive/speaker id/training/suruchi/suruchi_2.wav
16000
PROCESSNG  /content/gdrive/My Drive/speaker id/training/suruchi/suruchi_3.wav
16000
PROCESSNG  /content/gdrive/My Drive/speaker id/training/suruchi/suruchi_4 .wav
16000
PROCESSNG  /content/gdrive/My Drive/speaker id/training/madhu/madhu_4.wav
16000
PROCESSNG  /content/gdrive/My Drive/speaker id/training/madhu/madhu_3.wav
16000
PROCESSNG  /content/gdrive/My Drive/speaker id/training/madhu/madhu_2.wav
16000
PROCESSNG  /content/gdrive/My Drive/speaker id/training/madhu/madhu_1.wav
16000
PROCESSNG  /content/gdrive/My Drive/speaker id/training/kavitha/kavitha_1.wav
16000
PROCESSNG  /content/gdrive/My Drive/speaker id/training/kavitha/kavitha_2.wav
16000
PROCESSNG  /content/gdrive/My Drive/speaker id/training/kavitha/kavitha_3.wav
16000
PROCESSNG  /content/gdrive/My Drive/speaker id/training/kavitha/kavitha_4.wav
16000
SAVING  madhu.gmm
SAVING suruchi.gmm
SAVING kavitha.gmm


In [None]:
class SpeakerIdentifier:

    def __init__(self, speaker1_files_path, speaker2_files_path, speaker3_files_path,madhu_model_path, suruchi_model_path,kavitha_model_path):
        self.speaker1_testing_path = speaker1_files_path
        self.speaker2_testing_path = speaker2_files_path
        self.speaker3_testing_path = speaker3_files_path
        self.error                 = 0
        self.total_sample          = 0
        self.features_extractor   = feature_ext()
        # load models
        self.madhu_gmm   = pickle.load(open(madhu_model_path, 'rb'))
        self.suruchi_gmm = pickle.load(open(suruchi_model_path, 'rb'))
        self.kavitha_gmm   = pickle.load(open(kavitha_model_path, 'rb'))
        


    def process(self):
        files = self.get_file_paths(self.speaker1_testing_path, self.speaker2_testing_path,self.speaker3_testing_path)
        # read the test directory and get the list of test audio files
        #print(files)
        for file in files:
            self.total_sample += 1
            print("%10s %8s %1s" % ("--> TESTING", ":", os.path.basename(file)))

            vector = self.features_extractor.extract_features(file)
            winner = self.identify_speaker(vector)
            #print(winner)
            expected_speaker = file.split("/")[1][:-1]
            #print(expected_speaker)

            print("%10s %6s %1s" % ("+ EXPECTATION",":", expected_speaker))
            print("%10s %3s %1s" %  ("+ IDENTIFICATION", ":", winner))

            if winner != expected_speaker: self.error += 1
            print("----------------------------------------------------")
            

    def get_file_paths(self, speaker1_testing_path, speaker2_testing_path, speaker3_testing_path):
        # get file paths

        speaker1 = [ os.path.join(speaker1_testing_path, f) for f in os.listdir(speaker1_testing_path) ]
        speaker2   = [ os.path.join(speaker2_testing_path, f) for f in os.listdir(speaker2_testing_path) ]
        speaker3   = [ os.path.join(speaker3_testing_path, f) for f in os.listdir(speaker3_testing_path) ]
        #speaker1 = speaker1_testing_path
        #speaker2 = speaker2_testing_path
        #speaker3 = speaker3_testing_path

        files   = speaker1 + speaker2 + speaker3
        return files

    def identify_speaker(self, vector):

        log_likelihood=[]

        # madhu hypothesis scoring
        is_madhu_scores         = np.array(self.madhu_gmm.score(vector))
        is_madhu_log_likelihood = is_madhu_scores.sum()
        log_likelihood.append(is_madhu_log_likelihood)
        #print('l_l'.format(log_likelihood))

        # suruchi hypothesis scoring
        is_suruchi_scores         = np.array(self.suruchi_gmm.score(vector))
        is_suruchi_log_likelihood = is_suruchi_scores.sum()
        log_likelihood.append(is_suruchi_log_likelihood)
        #print('l_l'.format(log_likelihood))

        # kavitha hypothesis scoring
        is_kavitha_scores         = np.array(self.kavitha_gmm.score(vector))
        is_kavitha_log_likelihood = is_kavitha_scores.sum()
        log_likelihood.append(is_kavitha_log_likelihood)
        #print('l_l'.format(log_likelihood))


     

        print("%10s %5s %1s" % ("+ MADHU SCORE",":", str(round(is_madhu_log_likelihood, 3))))
        print("%10s %5s %1s" % ("+ SURUCHI SCORE",":", str(round(is_suruchi_log_likelihood, 3))))
        print("%10s %5s %1s" % ("+ KAVITHA SCORE",":", str(round(is_kavitha_log_likelihood, 3))))
       
        winner = np.argmax(log_likelihood)
        return winner

        


if __name__== "__main__":
    speaker_identifier = SpeakerIdentifier(audio_test[1], audio_test[2], audio_test[0],"madhu.gmm","suruchi.gmm", "kavitha.gmm")
    speaker_identifier.process()

--> TESTING        : madhu_cut5sec.wav
16000
+ MADHU SCORE     : -7.135
+ SURUCHI SCORE     : -0.522
+ KAVITHA SCORE     : -4.342
+ EXPECTATION      : conten
+ IDENTIFICATION   : 1
----------------------------------------------------
--> TESTING        : madhu_2.wav
16000
+ MADHU SCORE     : -5.399
+ SURUCHI SCORE     : -1.91
+ KAVITHA SCORE     : -6.07
+ EXPECTATION      : conten
+ IDENTIFICATION   : 1
----------------------------------------------------
--> TESTING        : suruchi_test2.wav
16000
+ MADHU SCORE     : -5.412
+ SURUCHI SCORE     : -7.978
+ KAVITHA SCORE     : -11.024
+ EXPECTATION      : conten
+ IDENTIFICATION   : 0
----------------------------------------------------
--> TESTING        : suruchi_test1.wav
16000
+ MADHU SCORE     : 10.188
+ SURUCHI SCORE     : -3.346
+ KAVITHA SCORE     : -3.567
+ EXPECTATION      : conten
+ IDENTIFICATION   : 0
----------------------------------------------------
--> TESTING        : kavitha _2.wav
16000
+ MADHU SCORE     : -8.886
+ 