In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from os import listdir
import os
import sys
from librosa import display,util,load,feature
import librosa

In [2]:
%matplotlib inline

In [3]:
# Import different models 
import keras
from keras.models import model_from_json
import numpy
import os
import json

In [4]:
def _extract_feat(audiofile_path):
    # load in audio file
    y, sr = load(audiofile_path) # y = audio file, sr = sample rate

    # extract the various features of the audio
    mfcc = np.mean(feature.mfcc(y = y, sr = sr, n_mfcc=40).T, axis = 0)  
    mel = np.mean(feature.melspectrogram(y = y, sr = sr).T, axis = 0)
    stft = np.abs(librosa.stft(y))
    chroma = np.mean(feature.chroma_stft(S = stft, y = y, sr = sr).T, axis = 0)
    contrast = np.mean(feature.spectral_contrast(S = stft, y = y, sr = sr).T, axis = 0)
    tonnetz =  np.mean(feature.tonnetz(y = librosa.effects.harmonic(y), sr = sr).T, axis = 0)

    return mfcc,chroma,mel,contrast,tonnetz # shape: (40,), (12,), (128,), (7,), (6,)

In [5]:
def _read_single_predict_file(audiofile_path):
    # Instatiate a dataframe the train audio features will be in 
    columns =  ['mfcc']*40 + ['chroma']*12 + ['mel']*128 + ['contrast']*7 + ['tonnetz']*6
    audio_df = pd.DataFrame(columns = columns)
    audio_df # 0 rows × 193 columns
    mfcc,chroma,mel,contrast,tonnetz = _extract_feat(audiofile_path)
    features = np.hstack([mfcc,chroma,mel,contrast,tonnetz])
    
    # add id in the front of the features array
    labelled = np.insert(features, 0, axis = 0)
    fill = np.empty((0,193))
    row = np.vstack([fill,labelled]) # shape (0,193)
    
    # put row in a dataframe
    row_df = pd.DataFrame(columns = columns)
    # append row_df into the dataframe
    audio_df = audio_df.append([row_df], ignore_index = True)
    
    audio_df.reset_index(inplace = True, drop = True)
    
    return audio_df

In [6]:
os.getcwd()

'C:\\Users\\rujut\\Desktop\\Sleigh Group'

In [7]:
os.chdir((r'C:\Users\rujut\Desktop\Sleigh Group'))

In [8]:
with open('model_1201.json', 'r',encoding='utf-8') as json_file:
    model = model_from_json(json_file.read())

In [9]:
# load weights into new model
model.load_weights("model_weights_1201.h5")
print("Loaded model from disk")

Loaded model from disk


In [10]:
print("Done")

Done


In [11]:
one_example  = "C:/Users/rujut/Desktop/Sleigh Group/Australia/Australia/Adrienne O'Connor/Adrienne O'Connor Calm.wav"
print(one_example)

C:/Users/rujut/Desktop/Sleigh Group/Australia/Australia/Adrienne O'Connor/Adrienne O'Connor Calm.wav


In [12]:
example_two = "C:/Users/rujut/Desktop/Sleigh Group/Australia/Australia/Adrienne O'Connor/Adrienne O'Connor Energetic.wav"

In [13]:
example_three = "C:/Users/rujut/Desktop/Sleigh Group/Australia/Australia/Test/rujutavoice.wav"

In [14]:
star_list = pd.read_csv("vox1_meta.csv")
star_list.head()
star_list['ID'] = star_list['VoxCeleb1 ID'].str.slice(start=2)
star_list = star_list.sort_values('ID')

In [15]:
audiofile_path = one_example

In [26]:
def _calculate_possible(audiofile_path):
    columns = ['id'] + ['mfcc']*40 + ['chroma']*12 + ['mel']*128 + ['contrast']*7 + ['tonnetz']*6
    audio_df = pd.DataFrame(columns = columns)
    audio_df # 0 rows × 193 columns
    mfcc,chroma,mel,contrast,tonnetz = _extract_feat(audiofile_path)
    features = np.hstack([mfcc,chroma,mel,contrast,tonnetz])
    labelled = np.insert(features, 0, 0, axis = 0)
    fill = np.empty((0,194))
    row = np.vstack([fill,labelled]) # shape (1,193)

    # Put row in a dataframe
    row_df = pd.DataFrame(row, columns = columns)
    
    # Append row_df into the dataframe
    audio_df = audio_df.append(row_df, ignore_index = True)
    pred_df = audio_df.drop('id', axis=1)
    pred_df_array = pred_df.values 
    pred_df_ = np.reshape(pred_df_array, newshape = (1, 1,193))
    pred_df_array = pred_df.values 
    pred_df_ = np.reshape(pred_df_array, newshape = (1, 1,193))
    
    # Use model to predict on testing set
    pred_result = model.predict(pred_df_)
    
    # Convert y_pred from continuous variables to discreet binary (0,1)
    yy = []
    arr_sorted = -np.sort(-pred_result,axis=1)
    top_three = arr_sorted[:,:3]
    for elem in pred_result: # each element is an array
             
        row = []
        for e in elem: # each number in the array
            if e in top_three[:,:1]: # since softmax outputs probability...
                row.append(1) # assign highest prob as 1
            elif e in top_three[:,:2]:
                row.append(2) # assign second highest prob as 2
            elif e in top_three[:,:3]:
                row.append(3) # assign third highest prob as 3
            else:
                row.append(0) # rest all would be 0
        yy.append(row)
    return yy


In [21]:
#this returns the top three voices that were nearest but does not consider probability of how close the voice is 
#def _prediction(check_list, star_list):
    #close_star_id = []
    #indices = [i for i, x in enumerate(check_list[0]) if x == 1]
    #print('indices',indices)
    #close_star_id.append(indices) 
    #return star_list.iloc[close_star_id[0]]

In [27]:
#this returns the top three voices that were nearest alongwith the probability of how close the voice is 
def _prediction(check_list, star_list):
    close_star_id1 = check_list[0].index(1)
    close_star_id2 = check_list[0].index(2)
    close_star_id3 = check_list[0].index(3)
    return star_list.iloc[close_star_id1] ,star_list.iloc[close_star_id2],star_list.iloc[close_star_id3]
  

In [28]:
#making prediction
close_star_check = _calculate_possible(audiofile_path)
_prediction(close_star_check, star_list)

(VoxCeleb1 ID         id10588
 VGGFace1 ID     Kat_Dennings
 Gender                     f
 Nationality              USA
 Set                      dev
 ID                     10588
 Name: 587, dtype: object,
 VoxCeleb1 ID           id10331
 VGGFace1 ID     Gemma_Arterton
 Gender                       f
 Nationality                 UK
 Set                        dev
 ID                       10331
 Name: 330, dtype: object,
 VoxCeleb1 ID         id10620
 VGGFace1 ID     Keeley_Hawes
 Gender                     f
 Nationality               UK
 Set                      dev
 ID                     10620
 Name: 619, dtype: object)