# Gender Detection using Gaussian Mixture Models

## Setup
We first need to import the required Python libraries for the entire anaylsis  
Prerequisites required in this implementation are:
```
Python 3.x 
numpy   
scipy
matplotlib
scikit-learn
librosa
```

You can invoke any required library into your workspace as:  
`import package_name`  

In [1]:
import os
import pickle
import numpy as np
from scipy.io.wavfile import read
from sklearn.mixture import GaussianMixture as GMM
from sklearn import preprocessing
import librosa
import warnings

#Ignore warnings for now
warnings.filterwarnings("ignore")

### Load training data

In [2]:
training_path = '../pygender/train_data'

male_files = []
female_files = []

# r=root, d=directories, f = files
for r, d, f in os.walk(os.path.join(training_path,'male')):
    for file in f:
        if '.wav' in file:
            male_files.append(os.path.join(r, file))
for r, d, f in os.walk(os.path.join(training_path,'female')):
    for file in f:
        if '.wav' in file:
            female_files.append(os.path.join(r, file))
print('Number of male files = ', len(male_files))
print('Number of female files = ', len(female_files))

Number of male files =  5
Number of female files =  5


### Feature extraction
Now let's extract MFCC features that we will use for training our GMMs.

We will first train a Gaussian model for female speech.

In [3]:
def get_MFCC(sampling_rate,audio):
    features = librosa.feature.mfcc(audio,sampling_rate, win_length = int(0.025*sampling_rate), hop_length = int(0.01*sampling_rate), n_mfcc = 13, htk=True)
    features = np.transpose(features)
    features = preprocessing.scale(features)
    return features

In [4]:
features = np.asarray(())
for f in female_files:
    sampling_rate,audio = read(f)
    vector   = get_MFCC(sampling_rate,audio.astype(float))
    if features.size == 0:
        features = vector
    else:
        features = np.vstack((features, vector))
print('Feature vector shape: ',features.shape)

Feature vector shape:  (30072, 13)


### Fitting a GMM


In [5]:
N_gmm_components = 1
female_gmm = GMM(n_components = N_gmm_components, max_iter = 200, covariance_type='diag',n_init = 3)
female_gmm.fit(features)

GaussianMixture(covariance_type='diag', init_params='kmeans', max_iter=200,
                means_init=None, n_components=1, n_init=3, precisions_init=None,
                random_state=None, reg_covar=1e-06, tol=0.001, verbose=0,
                verbose_interval=10, warm_start=False, weights_init=None)

### Saving the trained model
We use the _pickle_ module to save ("pickle") the trainel GMM model, which can be used whie testing.

In [6]:
if not os.path.isdir("models/model_"+str(N_gmm_components)):
    os.mkdir("models/model_"+str(N_gmm_components))
picklefile = "models/model_"+str(N_gmm_components)+"/female.gmm"
pickle.dump(female_gmm,open(picklefile,'wb'))

In [7]:
os.path.isdir("models/model_"+str(N_gmm_components))

True

Similarly, we train a Gaussian model for male speech

In [18]:
features = np.asarray(());
for f in male_files:
    sampling_rate,audio = read(f)
    vector   = get_MFCC(sampling_rate,audio.astype(float))
    if features.size == 0:
        features = vector
    else:
        features = np.vstack((features, vector))

male_gmm = GMM(n_components = N_gmm_components, max_iter = 200, covariance_type='diag',n_init = 3)
male_gmm.fit(features)

picklefile = "models/model_"+str(N_gmm_components)+"/male.gmm"
pickle.dump(male_gmm,open(picklefile,'wb'))

# Test

Let's load the test files, and the pickled models

In [19]:
male_gmm.predict()

GaussianMixture(covariance_type='diag', init_params='kmeans', max_iter=200,
                means_init=None, n_components=1, n_init=3, precisions_init=None,
                random_state=None, reg_covar=1e-06, tol=0.001, verbose=0,
                verbose_interval=10, warm_start=False, weights_init=None)

In [9]:
test_path = '../pygender/test_data'
model_path = "models/model_"+str(N_gmm_components)
gmm_files = [os.path.join(model_path,fname) for fname in os.listdir(model_path) if fname.endswith('.gmm')]
print(gmm_files)

['models/model_1/female.gmm', 'models/model_1/male.gmm']


Unpickling to retrieve the saved models

In [10]:
models = [pickle.load(open(fname,'rb')) for fname in gmm_files]

In [11]:
files = [[], []]

true = [0,0]
false = [0,0]
total = [0,0]
for j, gen in enumerate(['female', 'male']):
    # r=root, d=directories, f = files
    for r, d, f in os.walk(os.path.join(test_path, gen)):
        for file in f:
            if '.wav' in file:
                files[j].append(os.path.join(r, file))
    for f in files[j]:
        # print(f.split("/")[-1])
        sampling_rate, audio  = read(f)
        features   = get_MFCC(sampling_rate,audio.astype(float))
        scores     = None
        log_likelihood = np.zeros(len(models))
        for i in range(len(models)):
            gmm    = models[i]         #checking with each model one by one
            scores = np.array(gmm.score(features))
            log_likelihood[i] = scores.sum()
        winner = np.argmax(log_likelihood)
        if(winner == j):
            true[j]+=1
        else:
            false[j]+=1
        total[j]+=1

Calculate and print accuracy

In [12]:
accuracy = (sum(true)/sum(total))
print("Accuracy = ", accuracy*100, "%")

Accuracy =  49.45652173913043 %


Looks like training a Gaussian Model did not work very well!

Now let's try training a 8-component Gaussian Mixture Model 
(change n_components in the GMM function)

TRAINING:

In [14]:
N_gmm_components = 8
for gen in ['female', 'male']:
    features = np.asarray(());
    files = {'female':female_files, 'male':male_files}
    for f in files[gen]:
        sampling_rate,audio = read(f)
        vector   = get_MFCC(sampling_rate,audio.astype(float))
        if features.size == 0:
            features = vector
        else:
            features = np.vstack((features, vector))

    gmm = GMM(n_components = N_gmm_components, max_iter = 200, covariance_type='diag',n_init = 3)
    gmm.fit(features)
    if not os.path.isdir("models/model_"+str(N_gmm_components)):
        os.mkdir("models/model_"+str(N_gmm_components))
    picklefile = "models/model_"+str(N_gmm_components)+'/'+gen+".gmm"
    pickle.dump(gmm,open(picklefile,'wb'))
    print('modeling completed for',picklefile)

modeling completed for models/model_8/female.gmm
modeling completed for models/model_8/male.gmm


TEST

In [15]:
test_path = '../pygender/test_data'
model_path = "models/model_"+str(N_gmm_components)
gmm_files = [os.path.join(model_path,fname) for fname in os.listdir(model_path) if fname.endswith('.gmm')]
print(gmm_files)
models = [pickle.load(open(fname,'rb')) for fname in gmm_files]

files = [[], []]

true = [0,0]
false = [0,0]
total = [0,0]
for j, gen in enumerate(['female', 'male']):
    # r=root, d=directories, f = files
    for r, d, f in os.walk(os.path.join(test_path, gen)):
        for file in f:
            if '.wav' in file:
                files[j].append(os.path.join(r, file))
    for f in files[j]:
        # print(f.split("/")[-1])
        sampling_rate, audio  = read(f)
        features   = get_MFCC(sampling_rate,audio.astype(float))
        scores     = None
        log_likelihood = np.zeros(len(models))
        for i in range(len(models)):
            gmm    = models[i]         #checking with each model one by one
            scores = np.array(gmm.score(features))
            log_likelihood[i] = scores.sum()
        winner = np.argmax(log_likelihood)
        if(winner == j):
            true[j]+=1
        else:
            false[j]+=1
        total[j]+=1
accuracy = (sum(true)/sum(total))
print("Accuracy = ", accuracy*100, "%")

['models/model_8/female.gmm', 'models/model_8/male.gmm']
Accuracy =  83.42391304347827 %


Now changing the parameters of the GMM to improve the accuracy of gender detection.
You could also try recording your voice, and test if the GMM works!

In [17]:
audio_file_path = 'manan.wav'
N_gmm_components = 1
model_path = "models/model_"+str(N_gmm_components)
gmm_files = [os.path.join(model_path,fname) for fname in os.listdir(model_path) if fname.endswith('.gmm')]
print(gmm_files)
models = [pickle.load(open(fname,'rb')) for fname in gmm_files]

dict = {0:'FEMALE', 1:'MALE'}
sampling_rate, audio  = read(audio_file_path)
features   = get_MFCC(sampling_rate,audio.astype(float))
scores     = None
log_likelihood = np.zeros(len(models))
for i in range(len(models)):
    gmm    = models[i]         #checking with each model one by one
    scores = np.array(gmm.score(features))
    log_likelihood[i] = scores.sum()
winner = np.argmax(log_likelihood)
print(dict[winner])

['models/model_1/female.gmm', 'models/model_1/male.gmm']
MALE
