# Gender Detection using Gaussian Mixture Models

## Setup
We first need to import the required Python libraries for the entire anaylsis  
Prerequisites required in this implementation are:
```
Python 3.x 
numpy   
scipy
matplotlib
scikit-learn
librosa
```

You can invoke any required library into your workspace as:  
`import package_name`  

In [15]:
import os
import pickle
import numpy as np
from scipy.io.wavfile import read
from sklearn.mixture import GaussianMixture as GMM
import python_speech_features as mfcc
import librosa
from sklearn import preprocessing
import warnings
import argparse

#Ignore warnings for now
warnings.filterwarnings("ignore")

### Load training data

In [11]:
training_path = '../pygender/train_data'

male_files = []
female_files = []

# r=root, d=directories, f = files
for r, d, f in os.walk(os.path.join(training_path,'male')):
    for file in f:
        if '.wav' in file:
            male_files.append(os.path.join(r, file))
for r, d, f in os.walk(os.path.join(training_path,'female')):
    for file in f:
        if '.wav' in file:
            female_files.append(os.path.join(r, file))
print('Number of male files = ', len(male_files))
print('Number of female files = ', len(female_files))

Number of male files =  5
Number of female files =  5


### Feature extraction
Now let's extract MFCC features that we will use for training our GMMs

In [26]:
def get_MFCC(sampling_rate,audio):
    #features = librosa.feature.mfcc(audio,sampling_rate, win_length = int(0.025*sampling_rate), n_mfcc = 13)
    ### Alternatively, one can extract mfcc features from the python_speech_features module
    features = mfcc.mfcc(audio,sampling_rate, 0.025, 0.01, 13,appendEnergy = False)    
    features = preprocessing.scale(features)
    return features

In [27]:
features = np.asarray(())
for f in male_files:
    sampling_rate,audio = read(f)
    vector   = get_MFCC(sampling_rate,audio)
    if features.size == 0:
        features = vector
    else:
        features = np.vstack((features, vector))

In [28]:
features.shape

(30169, 13)