### Required Libraires

### First make sure to install ffmpeg for audio processing here is a link on how to install
- https://phoenixnap.com/kb/ffmpeg-windows

In [4]:
import os
import librosa
import warnings
import shutil
import pickle
import subprocess
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
from hmmlearn import hmm
from sklearn import preprocessing
from sklearn.svm import SVC
from python_speech_features import mfcc
from python_speech_features import delta
from sklearn.model_selection import train_test_split


### Terminal Configuration
Run this to avoid warnings being written in the console

In [6]:
warnings.filterwarnings('ignore')

# Helper Functions

functions used later in the code 

In [None]:
# Helper Functions

def get_files_from_paths(females_path, males_path):
    females = [ os.path.join(females_path, f) for f in os.listdir(females_path) ]
    males   = [ os.path.join(males_path, f) for f in os.listdir(males_path) ]
    files   = females + males
    return files


def save_gmm(gmm, name):
    path = os.getcwd()
    filename = os.path.join(path, f"{name}.gmm")

    with open(filename, 'wb') as gmm_file:
        pickle.dump(gmm, gmm_file)
    print ("SAVING", filename)

# Dataset Splitting
### Only run this if you dont have the dataset already split in `data` in `audios` folder
We split The Data into 20% testing and 80% training 

In [None]:
test_size=0.2 #20% testing
random_seed=42

input_path = 'audios/dataset'
output_path = 'audios/data'  

females_path = os.path.join(input_path, 'females')
males_path = os.path.join(input_path, 'males')

# Create output directories
output_train_path1 = os.path.join(output_path, 'TrainingData', 'females')
output_test_path1 = os.path.join(output_path, 'TestingData', 'females')
os.makedirs(output_train_path1, exist_ok=True)
os.makedirs(output_test_path1, exist_ok=True)

# Split and move female data
females_files = [os.path.join(females_path, f) for f in os.listdir(females_path)]
females_train, females_test = train_test_split(females_files, test_size=test_size, random_state=random_seed)

for file_path in females_train:
    shutil.copy(file_path, os.path.join(output_train_path1, os.path.basename(file_path)))
for file_path in females_test:
    shutil.copy(file_path, os.path.join(output_test_path1, os.path.basename(file_path)))

output_train_path2 = os.path.join(output_path, 'TrainingData', 'males')
output_test_path2 = os.path.join(output_path, 'TestingData', 'males')
os.makedirs(output_train_path2, exist_ok=True)
os.makedirs(output_test_path2, exist_ok=True)

# Split and move female data
males_files = [os.path.join(males_path, f) for f in os.listdir(males_path)]
males_train, males_test = train_test_split(males_files, test_size=test_size, random_state=random_seed)

for file_path in males_train:
    shutil.copy(file_path, os.path.join(output_train_path2, os.path.basename(file_path)))
for file_path in males_test:
    shutil.copy(file_path, os.path.join(output_test_path2, os.path.basename(file_path)))



# Extract Features From Signal Using MFCC

We use the following python_speech_features to pull out features from audio

In [2]:

# function to load the signal,samplerate from audio path

def getSignalnRate(file_path):
    signal, sample_rate = librosa.load(file_path, sr=None)
    return sample_rate,signal

"""
(audio) The audio signal from which to compute features.
(rate) The samplerate of the signal we are working with.
(winlen) The length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
(winstep) The step between successive windows in seconds.Default is 0.01s (10 milliseconds)
(numcep) The number of cepstrum to return. Default 13.
(nfilt) The number of filters in the filterbank.# Default is 26.
(nfft) The FFT size. Default is 512.
(appendEnergy) If true, the zeroth cepstral coefficient is replaced with the log of the total frame energy. 

(deltas) capture the rate of change of the MFCCs over time, providing information about the dynamics of the audio signal.
"""

def extract_features(audio_path):
    rate, audio  = getSignalnRate(audio_path)
    mfcc_feature = mfcc(audio,rate,winlen=0.05, winstep=0.01,numcep= 13,nfilt= 30,nfft= 1024,appendEnergy = True)
    mfcc_feature  = preprocessing.scale(mfcc_feature)
    deltas        = delta(mfcc_feature, 2)
    double_deltas = delta(deltas, 2)
    combined      = np.hstack((mfcc_feature, deltas, double_deltas))
    return combined

# Model Training

Then We Train the GMM model on the training data and save the model to load them in the SVC for Testing 
(if you have files `females.gmm` and `males.gmm` then no need to train as its already trained)

In [None]:

females_training_path = "audios/data/TrainingData/females"
males_training_path   = "audios/data/TrainingData/males"

def Train():
    files = get_files_from_paths(females_training_path,
                                            males_training_path)
    print("Files Loaded For Training ="+ str(len(files)))
    # collect voice features
    features = {"female" : np.asarray(()), "male" : np.asarray(())}
    
    for file in files:
        print("Training", ":", os.path.basename(file)))
        print(features["female"].shape, features["male"].shape)
        # extract MFCC & delta MFCC features from audio
        try: 
            vector  = extract_features(file)
            spk_gmm = hmm.GaussianHMM(n_components=16)      
            spk_gmm.fit(vector)
            spk_vec = spk_gmm.means_
            
            gender = os.path.basename(os.path.dirname(file))[:-1]
            print(gender)
            if features[gender].size == 0:  features[gender] = spk_vec
            else                         :  features[gender] = np.vstack((features[gender], spk_vec))
        
        except:
            pass
    
    save_gmm(features["female"], "females")
    save_gmm(features["male"],   "males")


# Start the training process
Train()

# Model Testing

We Test the model on the testing data and we evalute our SVC model (if you have the `svm_model` file that means its already been tested,saved and ready for inference)

In [None]:

females_testing_path = "audios/data/TestingData/females"
males_testing_path = "audios/data/TestingData/males"

females_model_path = "females.gmm"
males_model_path = "males.gmm"

# load models
females_gmm = pickle.load(open(females_model_path, 'rb'))
males_gmm   = pickle.load(open(males_model_path, 'rb'))

# Data split
X_train = np.vstack((females_gmm, males_gmm))
y_train = np.hstack(( -1 * np.ones(females_gmm.shape[0]), np.ones(males_gmm.shape[0])))

# clear the NaN values with mean of cols
df = pd.DataFrame(X_train)
df_new = df.fillna(df.mean())
X_train = df_new

# SVM training on gmm output and gender labels
clf = SVC(kernel = 'rbf', probability=True) #nfham
clf.fit(X_train, y_train)

# SVM testing
def Test():
    error = 0
    total_sample = 0    

    files = get_files_from_paths(females_testing_path, males_testing_path)
    for file in files:
        total_sample += 1
        print(" TESTING", ":", file)

        try: 
            # extract features
            vector = extract_features(file)
            # generate gaussian mixture models
            spk_gmm = hmm.GaussianHMM(n_components=16)      
            spk_gmm.fit(vector)
            spk_vec = spk_gmm.means_
            if sum(clf.predict(spk_vec)) > 0 : sc =  1
            else                                       : sc = -1
            genders = {-1: "female", 1: "male"}
            winner = genders[sc]
            expected_gender = os.path.basename(os.path.dirname(file))[:-1]
            print(expected_gender)
            
            print("actual gender",":", expected_gender)
            print("identification", ":", winner)

            if winner != expected_gender: error += 1
            print("----------------------------------------------------")


        except : print("Error")           
    accuracy = ( float(total_sample - error) / float(total_sample) ) * 100
    accuracy_msg = "Model Accuracy = " + str(round(accuracy, 3)) + "%"
    print(accuracy_msg)  



# Start The Testing Process
Test()

# Model Saving

Save the model for easy access

In [None]:
import joblib
joblib.dump(clf, 'svm_model.joblib')

# Model Inference

Now we can test the model with custom input after testing and evaluation

In [7]:
import joblib

clf = joblib.load("svm_model.joblib")

# Model Inference With Custom input 
def Infer(path_to_custom_file):
    try:
        vector = extract_features(path_to_custom_file)
        spk_gmm = hmm.GaussianHMM(n_components=16)      
        spk_gmm.fit(vector)

        spk_vec = spk_gmm.means_
        prediction = clf.predict(spk_vec)

        if sum(prediction) > 0:
            gender = "male"
        else:
            gender = "female"

        print(gender)

    except Exception as e:
        print(f"Error: {str(e)}")


# Start The inference Process paste in quotes the file path from current directory
Infer("custom-audio/test.m4a")

female
