# Interact with Google Colab

In [3]:
# Install the PyDrive wrapper & import libraries.
# This only needs to be done once per notebook.

# PyDrive simplifies many common Google Drive API tasks.
!pip install -U -q PyDrive
# -q means quiet

# provide the ability to authenticate to Google APIs using various methods.
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

# Authenticate and create the PyDrive client.
# This only needs to be done once per notebook.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)


# Be able to work within google folder
from google.colab import drive
drive.mount('/content/drive')

[?25l[K    1% |▎                               | 10kB 13.5MB/s eta 0:00:01[K    2% |▋                               | 20kB 1.9MB/s eta 0:00:01[K    3% |█                               | 30kB 2.7MB/s eta 0:00:01[K    4% |█▎                              | 40kB 1.8MB/s eta 0:00:01[K    5% |█▋                              | 51kB 2.2MB/s eta 0:00:01[K    6% |██                              | 61kB 2.6MB/s eta 0:00:01[K    7% |██▎                             | 71kB 3.0MB/s eta 0:00:01[K    8% |██▋                             | 81kB 3.4MB/s eta 0:00:01[K    9% |███                             | 92kB 3.8MB/s eta 0:00:01[K    10% |███▎                            | 102kB 2.9MB/s eta 0:00:01[K    11% |███▋                            | 112kB 2.9MB/s eta 0:00:01[K    12% |████                            | 122kB 4.1MB/s eta 0:00:01[K    13% |████▎                           | 133kB 4.1MB/s eta 0:00:01[K    14% |████▋                           | 143kB 7.6MB/s eta 0:00:01[

In [4]:
!pwd

/content


In [5]:
# Change directory to the one this notebook is in
import os
os.chdir("drive/My Drive/Colab Notebooks/")
!ls

 audio_df2.pkl	        AudioRecognition.ipynb	'Speaker Recognition.gslides'
 audio_df3.pkl	        demo.ipynb		 test_audio
 audio_df4.pkl	        full_df.pkl		 train_audio
 audio_df.pkl	        model.json		 vox1_meta.csv
 audio_features.ipynb   model_weights.h5


# Load in Model

In [1]:
from keras.models import model_from_json
from keras.optimizers import Adamax
import pandas as pd
from pydub import AudioSegment
from pydub.playback import play
import librosa
import librosa.feature 
import random
from os import listdir
from IPython.display import Audio
import numpy as np

Using TensorFlow backend.


In [2]:
# Read in model
json_file = open('model.json', 'r')
loaded_model_json = json_file.read()

json_file.close()
loaded_model = model_from_json(loaded_model_json)

# load weights into new model
loaded_model.load_weights("model_weights.h5")
print("Loaded model into notebook")

Loaded model into notebook


In [3]:
# Compile the model with a specified Adamax optimizer
opt = Adamax(lr = 1e-3, decay = 1e-5) # Adamax has shown to yield faster learning than Adam and SGD
loaded_model.compile(loss = 'categorical_crossentropy', 
              optimizer = opt,
             metrics = ['accuracy'])

In [4]:
loaded_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_1 (Conv1D)            (None, 1, 128)            49536     
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 1, 128)            0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 1, 128)            0         
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 1, 256)            65792     
_________________________________________________________________
max_pooling1d_2 (MaxPooling1 (None, 1, 256)            0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 1, 256)            0         
_________________________________________________________________
conv1d_3 (Conv1D)            (None, 1, 256)            131328    
__________

# Load in dataframe

In [5]:
full_df = pd.read_pickle('full_df.pkl')
full_df.head()

Unnamed: 0,id,mfcc,mfcc.1,mfcc.2,mfcc.3,mfcc.4,mfcc.5,mfcc.6,mfcc.7,mfcc.8,...,tonnetz,tonnetz.1,tonnetz.2,tonnetz.3,tonnetz.4,tonnetz.5,name,gender,nationality,set
0,10332,-274.717176,124.096613,-57.174884,47.0918,-31.190586,-3.631755,-34.891319,-4.590279,-0.825808,...,0.004705,0.003553,-0.009748,0.026007,0.001958,-0.009467,Gemma_Atkinson,f,UK,dev
1,10332,-263.282737,110.169695,-37.532975,49.808548,-30.783113,4.530054,-34.272742,-0.396505,2.08819,...,-0.008559,0.00109,-0.052647,0.053715,-0.014433,-0.001219,Gemma_Atkinson,f,UK,dev
2,10332,-255.330134,107.140581,-37.526152,66.718413,-25.090227,5.835736,-34.682863,0.156228,0.196809,...,-0.000381,0.004534,0.003975,0.063643,-0.003111,-0.009005,Gemma_Atkinson,f,UK,dev
3,10332,-238.647293,116.777418,-43.337226,57.847068,-29.790134,4.556106,-29.883005,-1.273415,0.166903,...,-0.015756,-0.00064,0.002584,0.037222,-0.006243,0.002152,Gemma_Atkinson,f,UK,dev
4,10332,-253.325462,110.840374,-47.403615,46.822241,-28.057003,5.674004,-26.317193,-0.719588,1.632264,...,-0.006521,0.009226,0.008907,0.020127,0.001455,0.002741,Gemma_Atkinson,f,UK,dev


In [6]:
# Convert underscore in name columns to spaces
full_df.name = full_df.name.apply(lambda n: ' '.join(n.split('_')))


In [0]:
def extract_feat(filename, filepath):
  
  # load in audio file
  y, sr = librosa.load(filepath + filename) # y = audio file, sr = sample rate

  # extract the various features of the audio
  mfcc = np.mean(librosa.feature.mfcc(y = y, sr = sr, n_mfcc=40).T, axis = 0)  
  mel = np.mean(librosa.feature.melspectrogram(y = y, sr = sr).T, axis = 0)
  stft = np.abs(librosa.stft(y))
  chroma = np.mean(librosa.feature.chroma_stft(S = stft, y = y, sr = sr).T, axis = 0)
  contrast = np.mean(librosa.feature.spectral_contrast(S = stft, y = y, sr = sr).T, axis = 0)
  tonnetz =  np.mean(librosa.feature.tonnetz(y = librosa.effects.harmonic(y), sr = sr).T, axis = 0)
  
  return mfcc,chroma,mel,contrast,tonnetz # shape: (40,), (12,), (128,), (7,), (6,)
  

In [0]:
# When function is called, return a list of names to choose from
def list_names():
  return full_df.name.unique()

In [0]:
def persons_voice(name):
  id_from_name = full_df[full_df.name == name].id.unique()
  idd = str(id_from_name[0])[:-2]
  original_format = 'id'+idd
  subfiles = [f for f in listdir('train_audio/{}'.format(original_format))] # go into each celeb file
  
  # choose a random subfile
  subfile = random.choice(subfiles)
  audiofiles = [f for f in listdir('train_audio/{}/{}'.format(original_format, subfile))] # extract the audio files in each subfile
  
  # choose a random audio file
  global audiofile
  audiofile = random.choice(audiofiles)
  global pathname
  pathname = 'train_audio/{}/{}/'.format(original_format, subfile)
  
  return AudioSegment.from_wav(pathname + audiofile)

In [266]:
persons_voice('Jessie J')

In [267]:
audiofile

'00004.wav'

In [0]:
def predict_celeb(): 
  # get features for the audio
  mfcc,chroma,mel,contrast,tonnetz = extract_feat(audiofile, pathname)
  features = np.hstack([mfcc,chroma,mel,contrast,tonnetz])
  fill = np.empty((0,193))
  row = np.vstack([fill,features]) # shape (1,193) 

  #reshape row to fit into model
  row_reshaped = np.expand_dims(row, axis=0) # shape (1,1,193) 
  y_pred = loaded_model.predict(row_reshaped)

  # Convert y_pred from continuous variables to discreet binary (0,1)
  yy = []
  for elem in y_pred: # each element is an array
    row = []
    for e in elem: # each number in the array
      if e == elem.max(): # since softmax outputs probability...
        row.append(1) # assign highest prob as 1
      else:
        row.append(0) # assign all other probs as 0
    yy.append(row)
  y_pred_ = np.array(yy)

  # Find index where value = 1
  index = list(y_pred_[0]).index(1)

  # Use index to find person in a dummified dataframe of the names
  dummied_df = pd.get_dummies(full_df.id)
  name_index = dummied_df.columns[index]
  name = full_df[full_df.id == name_index].name.unique()

  return name

In [273]:
predict_celeb()

array(['Jenna Coleman'], dtype=object)

In [272]:
persons_voice('Jenna Coleman')