In [None]:
# ! pip install --editable ..

# display_audio

In [19]:
%%writefile '/Users/greenapple/project5/src/data/display_audio.py'

import librosa
import librosa.display
import matplotlib.pyplot as plt
from IPython.display import Audio
import pandas as pd
import sklearn

def play_audio(file):
    audio, sr = librosa.load(file, sr=16000, offset=0)
    return Audio(audio, rate=sr)


def display_waveplot(file):
    audio, sr = librosa.load(file, sr=16000, offset=0)
    plt.figure(figsize=(14, 5))
    librosa.display.waveplot(audio, sr=sr)
    return 


def display_spectrogram(file):
    audio, sr = librosa.load(file, sr=16000, offset=0)
    X = librosa.stft(audio, n_fft=512, hop_length=200)
    Xdb = librosa.amplitude_to_db(abs(X))
    # Scale if needed
    Xdb = sklearn.preprocessing.scale(Xdb, axis=1, copy=False) 

    plt.figure(figsize=(14, 5))
    librosa.display.specshow(Xdb, x_axis='time', y_axis='log', hop_length=200)
    plt.colorbar()
    return

Overwriting /Users/greenapple/project5/src/data/display_audio.py


# process_files

In [40]:
%%writefile '/Users/greenapple/project5/src/data/process_files.py'

import os
import glob
import random

def files_for_modeling(path, speaker_num):
    '''
    Returns a list of files from a data folder for a specified number of speakers. 
    Collects files in order they are in the folder.
    '''
    id_folder_list =  glob.glob(os.path.join(path, '*'))
    files = []
    id_folder_list = id_folder_list[:speaker_num]
    
    for id_folder in id_folder_list:
        file_path  = glob.glob(os.path.join(id_folder, '*', '*.flac'))
        for file in file_path:      
            if os.stat(file).st_size > 75000:
                files.append(file)
           
    return files


def files_for_modeling_10_audios(path, speaker_start, speaker_stop):
    '''
    Collects audio files that are ~5 sec long. Spaecify the number of speakers. 10 files per speaker.
    '''
    id_folder_list =  glob.glob(os.path.join(path, '*'))
    # folders_ids =  glob.glob(os.path.join(data_dir, '*'))
    files = []
#     speaker_num = len(id_folder_list)
    id_folder_list = id_folder_list[speaker_start:speaker_stop]
    speaker_count = 0
    for id_folder in id_folder_list:
        speaker_count+=1
        file_path  = glob.glob(os.path.join(id_folder, '*', '*.flac'))
        count = 0
        for file in file_path: 
            if os.stat(file).st_size > 75000:
                files.append(file)
                count+=1
            if count==10:
                print('Done with speaker {}, have {} audio files'.format(speaker_count, count))
                break
    return files

def files_for_modeling_10_audios_9_sec(path, speaker_start, speaker_stop):
    '''
    Collects audio files that are ~5 sec long. Spaecify the number of speakers. 10 files per speaker.
    '''
    id_folder_list =  glob.glob(os.path.join(path, '*'))
    # folders_ids =  glob.glob(os.path.join(data_dir, '*'))
    files = []
#     speaker_num = len(id_folder_list)
    id_folder_list = id_folder_list[speaker_start:speaker_stop]
    speaker_count = 0
    for id_folder in id_folder_list:
        speaker_count+=1
        file_path  = glob.glob(os.path.join(id_folder, '*', '*.flac'))
        count = 0
        for file in file_path: 
            if os.stat(file).st_size > 170000:
                files.append(file)
                count+=1
            if count==10:
                print('Done with speaker {}, have {} audio files'.format(speaker_count, count))
                break
    return files

def files_for_modeling_3_audios(path, speaker_num):
    '''
    Collects audio files that are ~5 sec long. Specify the number of speakers. 3 files per speaker.
    '''
    id_folder_list =  glob.glob(os.path.join(path, '*'))
    # folders_ids =  glob.glob(os.path.join(data_dir, '*'))
    files = []
#     speaker_num = len(id_folder_list)
    id_folder_list = id_folder_list[:speaker_num]
    
    for id_folder in id_folder_list:
        file_path  = glob.glob(os.path.join(id_folder, '*', '*.flac'))
        count = 0
        for file in file_path: 
            if os.stat(file).st_size > 75000:
                files.append(file)
                count+=1
            if count==3:
                break
    return files


def files_for_modeling_3_audios_random(path, speaker_num):
    '''
    Randompy collects audio files that are ~5 sec long. 
    Parameters: number of speakers, data path. 
    Returns 3 files per speaker.
    '''
    id_folder_list =  glob.glob(os.path.join(path, '*'))
  # folders_ids =  glob.glob(os.path.join(data_dir, '*'))
    files = []
  # speaker_num = len(id_folder_list)
    id_folder_list = random.choices(id_folder_list, k=speaker_num)

    for id_folder in id_folder_list:
        file_path  = glob.glob(os.path.join(id_folder, '*', '*.flac'))
        count = 0
        for file in file_path: 
            if os.stat(file).st_size > 75000:
                files.append(file)
                count+=1
            if count==3:
                break
    return files

Overwriting /Users/greenapple/project5/src/data/process_files.py


# process_audio

In [8]:
%%writefile '/Users/greenapple/project5/src/data/process_audio.py'

import numpy as np
import pandas as pd
import glob
import os
import librosa
import sklearn
from itertools import combinations
import random
from src.data.process_files import files_for_modeling_10_audios


def load_audio_file(file, sample_rate=16000, offset=0.4, duration=3):
    
    audio, sample_rate = librosa.load(file, sr=sample_rate, offset=offset, duration=duration)
    
    return audio, sample_rate

def load_audio_file_9_sec(file, sample_rate=16000, offset=0.4, duration=9):
    
    audio, sample_rate = librosa.load(file, sr=sample_rate, offset=offset, duration=duration)
    
    return audio, sample_rate


def fourier_transform(x):
    X = librosa.stft(x, n_fft=512, hop_length=200)
    Xdb = librosa.amplitude_to_db(abs(X))
    return Xdb


def rescale(Xdb):    
    Xdb_rescaled = sklearn.preprocessing.scale(Xdb, axis=1, copy=False) # Scale
    return Xdb_rescaled


def resize(Xdb_rescaled):    
    Xdb_resized = np.resize(Xdb_rescaled, (224, 224))
    return Xdb_resized

def resize_9_sec(Xdb_rescaled):    
    Xdb_resized = np.resize(Xdb_rescaled, (224, 672))
    return Xdb_resized


def VGG16_resize_9_sec(Xdb_resized):  
    '''
    Reshapes features to VGG16 input format. Specifically, adds channel dimensions.
    '''
    Xdb_3D = np.stack((Xdb_resized[:,:224], 
                       Xdb_resized[:,224:448], 
                       Xdb_resized[:,448:]),axis = 2)
    return Xdb_3D

def VGG16_resize(Xdb_resized):  
    '''
    Reshapes features to VGG16 input format. Specifically, adds channel dimensions.
    '''
    Xdb_3D = np.stack((Xdb_resized, 
                       Xdb_resized, 
                       Xdb_resized),axis = 2)
    return Xdb_3D


def speaker_id(file):
    '''
    Returns speaker ID for a given audio file.
    '''
    path_split_1 = os.path.split(file)
    path_split_2 = os.path.split(path_split_1[0])
    path_split_3 = os.path.split(path_split_2[0])  
    y = int(path_split_3[1])
    return y

def file_name(file):
    '''
    Returns file name.
    '''
    name_file = os.path.split(file)[1]
    return name_file


def one_observation_VGG16(file):
    audio, _ = load_audio_file(file)
    Xdb = fourier_transform(audio)
    Xdb_rescaled = rescale(Xdb)
    Xdb_resized = resize(Xdb_rescaled)
    Xdb_3D = VGG16_resize(Xdb_resized)
   
    try:
        y = speaker_id(file)
        name_file = file_name(file)
    except ValueError:
        y = 'None'
        name_file = 'None'
    return Xdb_3D, y, name_file

def one_observation_9_sec(file):
    audio, _ = load_audio_file_9_sec(file)
    Xdb = fourier_transform(audio)
    Xdb_rescaled = rescale(Xdb)
    Xdb_resized = resize_9_sec(Xdb_rescaled)
    Xdb_3D = VGG16_resize_9_sec(Xdb_resized)
   
    try:
        y = speaker_id(file)
        name_file = file_name(file)
    except ValueError:
        y = 'None'
        name_file = 'None'
    return Xdb_3D, y, name_file


def one_observation_spec(file):
    audio, _ = load_audio_file(file)
    Xdb = fourier_transform(audio)
    Xdb_resized = resize(Xdb)
    try:
        y = speaker_id(file)
        name_file = file_name(file)
    except ValueError:
        y = 'None'
        name_file = 'None'
    
    return Xdb_resized, y, name_file


Overwriting /Users/greenapple/project5/src/data/process_audio.py


# audio_to_features

In [44]:
%%writefile '/Users/greenapple/project5/src/data/audio_to_features.py'

import os
import numpy as np
import pandas as pd
import glob
from src.data import process_files
from src.data import process_audio
from keras.applications.vgg16 import VGG16


def file_to_features(path, speaker_start, speaker_stop, one_observation_func):
    '''
    Extracts features and targets from audio files and reshapes them for a siamese net.
    '''
    
    files = process_files.files_for_modeling_10_audios(path, speaker_start, speaker_stop)
    
    # Extract features
    data = pd.DataFrame()
    Xdb_3D_list = []
    id_list = []
    file_name_list = []

    for file in files:
        Xdb_3D, speaker_id, name_file = process_audio.one_observation_func(file) # Features and label for one obervation = audio file
    
        Xdb_3D_list.append(Xdb_3D)
        id_list.append(speaker_id)
        file_name_list.append(name_file)
        
    data['speaker_id'] = id_list
    data['features'] = Xdb_3D_list  
    data['file_name'] = file_name_list 
    
    return data


def VGG16_features(path, speaker_start, speaker_stop):
    '''
    Extracts features and targets from audio files and generates embeddings with VGG16.
    '''
    files = process_files.files_for_modeling_10_audios(path, speaker_start, speaker_stop)
    
    # Extract features
    data = pd.DataFrame()
    Xdb_3D_list = []
    id_list = []
    file_name_list = []

    for file in files:
        Xdb_3D, speaker_id, name_file = process_audio.one_observation_VGG16(file) # Features and label for one obervation = audio file
        Xdb_3D_list.append(Xdb_3D)
        id_list.append(speaker_id)
        file_name_list.append(name_file)
        
    data['speaker_id'] = id_list
    data['features'] = Xdb_3D_list  
    data['file_name'] = file_name_list 
    
    # Reshape features and target for modeling with VGG16 base model or alike
    X = np.array(data.features.tolist())
    
    # Load CNN base model - VGG16
    VGG16_model = VGG16(weights='imagenet', include_top=False, input_shape=(224,224, 3)) # remove top dense layers
 
    for layer in VGG16_model.layers:     # freeze convolutional layers 
        layer.trainable = False  
    
    embeddings = VGG16_model.predict(X)
    emb_lst = [i for i in embeddings]
    data['VGG16_embds'] = emb_lst
    
    return data

def VGG16_features_9_sec(path, speaker_start, speaker_stop):
    '''
    Extracts features and targets from audio files and generates embeddings with VGG16.
    '''
    files = process_files.files_for_modeling_10_audios_9_sec(path, speaker_start, speaker_stop)
    
    # Extract features
    data = pd.DataFrame()
    Xdb_3D_list = []
    id_list = []
    file_name_list = []

    for file in files:
        Xdb_3D, speaker_id, name_file = process_audio.one_observation_9_sec(file) # Features and label for one obervation = audio file
        Xdb_3D_list.append(Xdb_3D)
        id_list.append(speaker_id)
        file_name_list.append(name_file)
        
    data['speaker_id'] = id_list
    data['features'] = Xdb_3D_list  
    data['file_name'] = file_name_list 
    
    # Reshape features and target for modeling with VGG16 base model or alike
    X = np.array(data.features.tolist())
    
    # Load CNN base model - VGG16
    VGG16_model = VGG16(weights='imagenet', include_top=False, input_shape=(224,224, 3)) # remove top dense layers
 
    for layer in VGG16_model.layers:     # freeze convolutional layers 
        layer.trainable = False  
    
    embeddings = VGG16_model.predict(X)
    emb_lst = [i for i in embeddings]
    data['VGG16_embds'] = emb_lst
    
    return data



# Move out when get the src import to work

def files_for_modeling_10_audios(path, speaker_start, speaker_stop):
    '''
    Collects audio files that are ~5 sec long. Spaecify the number of speakers. 10 files per speaker.
    '''
    
    id_folder_list =  glob.glob(os.path.join(path, '*'))
    # folders_ids =  glob.glob(os.path.join(data_dir, '*'))
    files = []
#     speaker_num = len(id_folder_list)
    id_folder_list = id_folder_list[speaker_start:speaker_stop]
    speaker_count = 0
    for id_folder in id_folder_list:
        speaker_count+=1
        file_path  = glob.glob(os.path.join(id_folder, '*', '*.flac'))
        count = 0
        for file in file_path: 
            if os.stat(file).st_size > 75000:
                files.append(file)
                count+=1
            if count==10:
                print('Done with speaker {}, have {} audio files'.format(speaker_count, count))
                break
    return files


def file_to_VGG16_features(file):
    '''
    Extracts VGG16 embeddings from an audio file.
    '''
    # Extract audio features (spectrogram)
    Xdb_3D, speaker_id, name_file = process_audio.one_observation_VGG16(file)
    
    # Reshape features and target for modeling with VGG16 base model or alike
    X = np.reshape(Xdb_3D, (1, 224, 224, 3))
    
    # Load CNN base model - VGG16
    VGG16_model = VGG16(weights='imagenet', include_top=False, input_shape=(224,224, 3)) # remove top dense layers
 
    for layer in VGG16_model.layers:     # freeze convolutional layers 
        layer.trainable = False  
    
    embeddings = VGG16_model.predict(X)
    embeddings = np.reshape(embeddings, (7, 7, 512))
    
    return embeddings



Overwriting /Users/greenapple/project5/src/data/audio_to_features.py


# features_for_model

In [63]:
%%writefile '/Users/greenapple/project5/src/data/features_for_model.py'

import numpy as np
import pandas as pd
import glob
import os
import librosa
import sklearn
from itertools import permutations, combinations 
import random
from imblearn.under_sampling import RandomUnderSampler


def siamese_VGG16_features(data):
    # Process features for siamese model
    siam_features = [comb for comb in combinations(data.VGG16_embds, 2)]
#     print(len(siam_features))
    siam_targets_tup = [comb for comb in combinations(data.speaker_id, 2)]
#     print(siam_targets_tup)
    siam_files_tup = [comb for comb in combinations(data.file_name, 2)]
#     print(siam_files_tup)
    siam_targets = [1 if a==b else 0 for a, b in siam_targets_tup]
    siam_same_file = [1 if a==b else 0 for a, b in siam_files_tup]
    siam_same_id = [1 if a==b else 0 for a, b in siam_targets_tup]
    
    siam_data = pd.DataFrame()
    siam_data['siam_targets'] = siam_targets
    siam_data['siam_features'] = siam_features  
    siam_data['siam_pairs_ids'] = siam_targets_tup 
    siam_data['file_names'] = siam_files_tup 
    siam_data['siam_same_file'] = siam_same_file 
    siam_data['siam_same_id'] = siam_same_id 
    
    # Remove pairs made up by the same file
    siam_data_filtered = siam_data.loc[siam_data.siam_same_file==0] # 1 never happens 
    
    # X and y
    siam_data_X = siam_data_filtered[['siam_features', 
                                  'siam_pairs_ids', 
                                  'file_names', 
                                  'siam_same_file', 
                                  'siam_same_id']].copy()
    
    siam_y = siam_data_filtered.siam_targets
    
    # Undersample target 0 (different speakers)
    rus = RandomUnderSampler(random_state=4)
    X_res, y_res = rus.fit_resample(siam_data_X, siam_y)
    
    X_res_data = pd.DataFrame(X_res, columns=['siam_features', 
                                  'siam_pairs_ids', 
                                  'file_names', 
                                  'siam_same_file', 
                                  'siam_same_id'])
    
    # Reshape features and target for modeling
    X = np.array(X_res_data.siam_features.tolist())
    y = np.array(y_res.tolist())

    return X, y, siam_data, X_res_data


def siamese_spec_features(data):
    # Process features for siamese model
    siam_features = [comb for comb in combinations(data.features, 2)]
#     print(len(siam_features))
    siam_targets_tup = [comb for comb in combinations(data.speaker_id, 2)]
#     print(siam_targets_tup)
    siam_files_tup = [comb for comb in combinations(data.file_name, 2)]
#     print(siam_files_tup)
    siam_targets = [1 if a==b else 0 for a, b in siam_targets_tup]
    siam_same_file = [1 if a==b else 0 for a, b in siam_files_tup]
    siam_same_id = [1 if a==b else 0 for a, b in siam_targets_tup]
    
    siam_data = pd.DataFrame()
    siam_data['siam_targets'] = siam_targets
    siam_data['siam_features'] = siam_features  
    siam_data['siam_pairs_ids'] = siam_targets_tup 
    siam_data['file_names'] = siam_files_tup 
    siam_data['siam_same_file'] = siam_same_file 
    siam_data['siam_same_id'] = siam_same_id 
    
    # Remove pairs made up by the same file
    siam_data_filtered = siam_data.loc[siam_data.siam_same_file==0] # 1 never happens 
    
    # X and y
    siam_data_X = siam_data_filtered[['siam_features', 
                                  'siam_pairs_ids', 
                                  'file_names', 
                                  'siam_same_file', 
                                  'siam_same_id']].copy()
    
    siam_y = siam_data_filtered.siam_targets
    
    # Undersample target 0 (different speakers)
    rus = RandomUnderSampler(random_state=4)
    X_res, y_res = rus.fit_resample(siam_data_X, siam_y)
    
    X_res_data = pd.DataFrame(X_res, columns=['siam_features', 
                                  'siam_pairs_ids', 
                                  'file_names', 
                                  'siam_same_file', 
                                  'siam_same_id'])
    
    # Reshape features and target for modeling
    X = np.array(X_res_data.siam_features.tolist())
    y = np.array(y_res.tolist())

    return X, y, siam_data, X_res_data


def siamese_VGG16_features_unbal(data):
    '''
    Extracts features and targets from audio files and reshapes them for a siamese net.
    '''
    # Process features for siamese model
    siam_features = [comb for comb in permutations(data.features, 2)]
#     print(len(siam_features))
    siam_targets_tup = [comb for comb in permutations(data.speaker_id, 2)]
#     print(siam_targets_tup)
    siam_files_tup = [comb for comb in permutations(data.file_name, 2)]
#     print(siam_files_tup)
    siam_targets = [1 if a==b else 0 for a, b in siam_targets_tup]
    siam_same_file = [1 if a==b else 0 for a, b in siam_files_tup]
    siam_same_id = [1 if a==b else 0 for a, b in siam_targets_tup]
    
    siam_data = pd.DataFrame()
    siam_data['siam_targets'] = siam_targets
    siam_data['siam_features'] = siam_features  
    siam_data['siam_pairs_ids'] = siam_targets_tup 
    siam_data['file_names'] = siam_files_tup 
    siam_data['siam_same_file'] = siam_same_file 
    siam_data['siam_same_id'] = siam_same_id 
    
    # Remove pairs made up by the same file
    siam_data_filtered = siam_data.loc[siam_data.siam_same_file==0] #1 never happens 
    
    
    # Reshape features and target for modeling
    X = np.array(siam_data_filtered.siam_features.tolist())
    y = np.array(siam_data_filtered.siam_targets.tolist())
    
    return X, y, siam_data, siam_data_filtered


def reshape_for_cnn(X_res_data, y_res):
    # Reshape features and target for modeling
    X = np.array(X_res_data.siam_features.tolist())
    y = np.array(y_res.tolist())

    return X, y

Overwriting /Users/greenapple/project5/src/data/features_for_model.py


# one_shot_learning

In [40]:
%%writefile '/Users/greenapple/project5/src/models/one_shot_learning.py'

import pandas as pd
import numpy as np
from src.data import process_audio
import random

def get_samples_random(path, speaker_num):
    '''
    Extracts features and targets from audio files.
    '''
    files = process_audio.files_for_modeling_3_audios_random(path, speaker_num)
    
    # Extract features
    speaker_data = pd.DataFrame()
    Xdb_3D_list = []
    id_list = []

    for file in files:
        Xdb_3D, speaker_id = process_audio.one_observation(file) # Features and label for one obervation = audio file
    
        Xdb_3D_list.append(Xdb_3D)
        id_list.append(speaker_id)
    
    speaker_data['speaker_id'] = id_list
    speaker_data['features'] = Xdb_3D_list  
    
    return speaker_data

def get_samples_in_order(path, speaker_num):
    '''
    Extracts features and targets from audio files.
    '''
    files = process_audio.files_for_modeling_3_audios(path, speaker_num)
    
    # Extract features
    speaker_data = pd.DataFrame()
    Xdb_3D_list = []
    id_list = []

    for file in files:
        Xdb_3D, speaker_id = process_audio.one_observation(file) # Features and label for one obervation = audio file
    
        Xdb_3D_list.append(Xdb_3D)
        id_list.append(speaker_id)
    
    speaker_data['speaker_id'] = id_list
    speaker_data['features'] = Xdb_3D_list  
    
    return speaker_data

def one_shot_set_spec_features(speaker_data, n_way):
    '''
    Returns one shot learning sample set with n_way sample pairs.
    '''
    
    # Randomly select n_way samples 
    ids = []
    samples = []

    while len(ids)<n_way:
        sample = speaker_data.sample(1, replace=False)
        if sample.iloc[0]['speaker_id'] not in ids:
            ids.append(sample.iloc[0]['speaker_id'])
            samples.append(sample.iloc[0]['features'])
#             print(ids)
            
    # Select a sample from the same speaker for the first sample 
    test_sample_df = speaker_data.loc[speaker_data.speaker_id==ids[0]]
    if test_sample_df.shape[0]==1:  # only one audio is avalable for this speaker
        test_sample = test_sample_df.iloc[0]['features'] 
#         print('only one')
    else:
        test_sample = 'empty'
        while test_sample=='empty':
            audio = test_sample_df.sample(1)
            if not np.array_equal(audio.iloc[0]['features'], samples[0]):
                test_sample = audio.iloc[0]['features'] 
        
    # Make pairs
    pairs = [(test_sample, sample) for sample in samples]
    pairs = np.array(pairs)

    # Targets
    targets = np.zeros((n_way,))
    targets[0] = 1
    
    return pairs, targets

def one_shot_set_VGG16_features(speaker_data, n_way):
    '''
    Returns one shot learning sample set with n_way sample pairs.
    '''
    
    # Randomly select n_way samples 
    ids = []
    samples = []

    while len(ids)<n_way:
        sample = speaker_data.sample(1, replace=False)
        if sample.iloc[0]['speaker_id'] not in ids:
            ids.append(sample.iloc[0]['speaker_id'])
            samples.append(sample.iloc[0]['VGG16_embds'])
#             print(ids)
            
    # Select a sample from the same speaker for the first sample 
    test_sample_df = speaker_data.loc[speaker_data.speaker_id==ids[0]]
    if test_sample_df.shape[0]==1:  # only one audio is avalable for this speaker
        test_sample = test_sample_df.iloc[0]['VGG16_embds'] 
#         print('only one')
    else:
        test_sample = 'empty'
        while test_sample=='empty':
            audio = test_sample_df.sample(1)
            if not np.array_equal(audio.iloc[0]['VGG16_embds'], samples[0]):
                test_sample = audio.iloc[0]['VGG16_embds'] 
        
    # Make pairs
    pairs = [(test_sample, sample) for sample in samples]
    pairs = np.array(pairs)

    # Targets
    targets = np.zeros((n_way,))
    targets[0] = 1
    
    return pairs, targets


def one_shot_score_VGG16_features(model, trials, speaker_data, n_way):
    '''
    Returns percent of correctly predicted one shot trials.
    '''
    n_correct = 0
    count = 0
    
    for i in range(trials):
        pairs, targets = one_shot_set_VGG16_features(speaker_data, n_way)
        probs = model.predict([pairs[:, 0], pairs[:, 1]])
        if np.argmax(probs) == np.argmax(targets):
            n_correct+=1
        count+=1
        current_score = 100.0 * n_correct / count
        print('Trial {}, current score {}'.format(count, current_score), end='\r')
    percent_correct = 100.0 * n_correct / trials
    
    return percent_correct


Overwriting /Users/greenapple/project5/src/models/one_shot_learning.py


# applications

In [11]:
%%writefile '/Users/greenapple/project5/src/models/applications.py'

import numpy as np
from src.models import one_shot_learning
from src.data import audio_to_features

def sample_set_same_speaker(speaker_data):
    '''
    Returns 5 pairs of audio files for the same speaker. Same audio file paired with 5 distinct audios.
    '''
    # Randomly select a speaker
    speaker_id = int(speaker_data.speaker_id.sample(1, replace=False))
    same_speaker = speaker_data.loc[speaker_data.speaker_id==speaker_id]
    
    files = []
    samples = []

    while len(samples)<6:
        sample = same_speaker.sample(1, replace=False)
        if sample.iloc[0]['file_name'] not in files:
            files.append(sample.iloc[0]['file_name'])
            samples.append(sample.iloc[0]['VGG16_embds'])
            
    # Assign a test semple
    test_sample = samples[0]
    test_file = files[0]
    samples = samples[1:]
    
    # Make pairs
    pairs = [(test_sample, sample) for sample in samples]
    pairs = np.array(pairs)
    
    files_pairs = [(test_file, files) for sample in samples]
    files_pairs = np.array(files_pairs)
    
    return pairs, files_pairs


def sample_set_different_speaker(speaker_data):
    '''
    Returns 5 pairs of audio files for the same speaker. Same audio file paired with 5 distinct audios.
    '''
    # Randomly select a speaker
    speaker_id = int(speaker_data.speaker_id.sample(1, replace=False))
    same_speaker = speaker_data.loc[speaker_data.speaker_id==speaker_id]
    
    files = []
    samples = []

    while len(samples)<5:
        sample = same_speaker.sample(1, replace=False) # Randomly pick a row
        if sample.iloc[0]['file_name'] not in files:
            files.append(sample.iloc[0]['file_name'])
            samples.append(sample.iloc[0]['VGG16_embds'])
            
    # Select a test semple
    test_sample = 'empty'
    while test_sample=='empty':
        test = speaker_data.sample(1, replace=False) # Randomly pick a row
        if test.iloc[0]['speaker_id'] != speaker_id:
            test_sample = test.iloc[0]['VGG16_embds']
    
    test_file = test.iloc[0]['file_name']
    
    # Make pairs
    pairs = [(test_sample, sample) for sample in samples]
    pairs = np.array(pairs)
    
    files_pairs = [(test_file, files) for sample in samples]
    files_pairs = np.array(files_pairs)
    
    return pairs


def one_speaker_authentification_1(model, trials, speaker_data, n):
    '''
    Returns percent of correctly predicted speaker authentifications.
    '''
    probs_list = []
    
    for i in range(trials):
        pairs, files = sample_set_same_speaker_n(speaker_data, n)
        probs = model.predict([pairs[:, 0], pairs[:, 1]])
#         probs_list.append(probs)
    return probs, files

def one_speaker_authentification_0(model, trials, speaker_data, n):
    '''
    Returns percent of correctly predicted speaker authentifications.
    '''
    probs_list = []
    
    for i in range(trials):
        pairs = sample_set_different_speaker_n(speaker_data, n)
        probs = model.predict([pairs[:, 0], pairs[:, 1]])
#         probs_list.append(probs)
    return probs


def recorded_sample_set(test_file, file_list):
    '''
    Returns 5 pairs of audio files made from voiceprint and a test sample.
    Voiceprint is a set of 5 audios from the saved speaker.
    '''
    # Voiceprint VGG16 embeddings:
    samples = []
    for file in file_list:
            embeddings = audio_to_features.file_to_VGG16_features(file)
            samples.append(embeddings)
            
    # Test sample VGG16 embeddings
    sample = audio_to_features.file_to_VGG16_features(test_file)
    
    # Make pairs
    pairs = [(test_sample, sample) for sample in samples]
    pairs = np.array(pairs)
    
    return pairs


def sample_set_same_speaker_n(speaker_data, n):
    '''
    Returns n pairs of audio files for the same speaker. Same audio file paired with n distinct audios.
    '''
    # Randomly select a speaker
    speaker_id = int(speaker_data.speaker_id.sample(1, replace=False))
    same_speaker = speaker_data.loc[speaker_data.speaker_id==speaker_id]
    
    files = []
    samples = []

    while len(samples)<n+1:
        sample = same_speaker.sample(1, replace=False)
        if sample.iloc[0]['file_name'] not in files:
            files.append(sample.iloc[0]['file_name'])
            samples.append(sample.iloc[0]['VGG16_embds'])
            
    # Assign a test semple
    test_sample = samples[0]
    test_file = files[0]
    samples = samples[1:]
    
    # Make pairs
    pairs = [(test_sample, sample) for sample in samples]
    pairs = np.array(pairs)
    
    files_pairs = [(test_file, files) for sample in samples]
    files_pairs = np.array(files_pairs)
    
    return pairs, files


def sample_set_different_speaker_n(speaker_data, n):
    '''
    Returns 5 pairs of audio files for the same speaker. Same audio file paired with 5 distinct audios.
    '''
    # Randomly select a speaker
    speaker_id = int(speaker_data.speaker_id.sample(1, replace=False))
    same_speaker = speaker_data.loc[speaker_data.speaker_id==speaker_id]
    
    files = []
    samples = []

    while len(samples)<n:
        sample = same_speaker.sample(1, replace=False) # Randomly pick a row
        if sample.iloc[0]['file_name'] not in files:
            files.append(sample.iloc[0]['file_name'])
            samples.append(sample.iloc[0]['VGG16_embds'])
            
    # Select a test semple
    test_sample = 'empty'
    while test_sample=='empty':
        test = speaker_data.sample(1, replace=False) # Randomly pick a row
        if test.iloc[0]['speaker_id'] != speaker_id:
            test_sample = test.iloc[0]['VGG16_embds']
    
    test_file = test.iloc[0]['file_name']
    
    # Make pairs
    pairs = [(test_sample, sample) for sample in samples]
    pairs = np.array(pairs)
    
    files_pairs = [(test_file, files) for sample in samples]
    files_pairs = np.array(files_pairs)
    
    return pairs


Overwriting /Users/greenapple/project5/src/models/applications.py


# process_audio_siam - remove

In [23]:
%%writefile '/Users/greenapple/project5/src/data/process_audio_siam.py'

import numpy as np
import pandas as pd
import glob
import os
import librosa
import sklearn
from itertools import permutations, combinations
import random
from imblearn.under_sampling import RandomUnderSampler


def files_for_modeling_10_audios(path, speaker_start, speaker_stop):
    '''
    Collects audio files that are ~5 sec long. Spaecify the number of speakers. 3 files per speaker.
    '''
    id_folder_list =  glob.glob(os.path.join(path, '*'))
    # folders_ids =  glob.glob(os.path.join(data_dir, '*'))
    files = []
#     speaker_num = len(id_folder_list)
    id_folder_list = id_folder_list[speaker_start:speaker_stop]
    speaker_count = 0
    for id_folder in id_folder_list:
        speaker_count+=1
        file_path  = glob.glob(os.path.join(id_folder, '*', '*.flac'))
        count = 0
        for file in file_path: 
            if os.stat(file).st_size > 75000:
                files.append(file)
                count+=1
            if count==10:
                print('Done with speaker {}, have {} audio files'.format(speaker_count, count))
                break
    return files


def files_for_modeling_3_audios(path, speaker_num):
    '''
    Collects audio files that are ~5 sec long. Spaecify the number of speakers. 3 files per speaker.
    '''
    id_folder_list =  glob.glob(os.path.join(path, '*'))
    # folders_ids =  glob.glob(os.path.join(data_dir, '*'))
    files = []
#     speaker_num = len(id_folder_list)
    id_folder_list = id_folder_list[:speaker_num]
    
    for id_folder in id_folder_list:
        file_path  = glob.glob(os.path.join(id_folder, '*', '*.flac'))
        count = 0
        for file in file_path: 
            if os.stat(file).st_size > 75000:
                files.append(file)
                count+=1
            if count==3:
                break
    return files


def files_for_modeling_3_audios_random(path, speaker_num):
    '''
    Collects audio files that are ~5 sec long. 
    Parameters: number of speakers, data path. 
    Returns 3 files per speaker.
    '''
    id_folder_list =  glob.glob(os.path.join(path, '*'))
  # folders_ids =  glob.glob(os.path.join(data_dir, '*'))
    files = []
  # speaker_num = len(id_folder_list)
    id_folder_list = random.choices(id_folder_list, k=speaker_num)

    for id_folder in id_folder_list:
        file_path  = glob.glob(os.path.join(id_folder, '*', '*.flac'))
        count = 0
        for file in file_path: 
            if os.stat(file).st_size > 75000:
                files.append(file)
                count+=1
            if count==3:
                break
    return files


def files_for_modeling(path, speaker_num):
    id_folder_list =  glob.glob(os.path.join(path, '*'))
    # folders_ids =  glob.glob(os.path.join(data_dir, '*'))
    files = []
#     speaker_num = len(id_folder_list)
    id_folder_list = id_folder_list[:speaker_num]
    
    for id_folder in id_folder_list:
        file_path  = glob.glob(os.path.join(id_folder, '*', '*.flac'))
        for file in file_path:      
            if os.stat(file).st_size > 75000:
                files.append(file)
           
    return files


def load_audio_file(file, sample_rate=16000, offset=0.4, duration=3):
    
    audio, sample_rate = librosa.load(file, sr=sample_rate, offset=offset, duration=duration)
    
    return audio, sample_rate

def fourier_transform(x):
    X = librosa.stft(x, n_fft=512, hop_length=200)
    Xdb = librosa.amplitude_to_db(abs(X))
    return Xdb


def pre_processing(Xdb):    
    Xdb = sklearn.preprocessing.scale(Xdb, axis=1, copy=False) # Scale
    Xdb_resized = np.resize(Xdb, (224, 224))
    Xdb_3D = np.stack((Xdb_resized, Xdb_resized, Xdb_resized),axis = 2)
    return Xdb_3D


def speaker_id(file):
    path_split_1 = os.path.split(file)
    path_split_2 = os.path.split(path_split_1[0])
    path_split_3 = os.path.split(path_split_2[0])  
    y = int(path_split_3[1])
    return y

def file_name(file):
    name_file = os.path.split(file)[1]
    return name_file


def one_observation(file):
    audio, _ = load_audio_file(file)
    Xdb = fourier_transform(audio)
    Xdb_3D = pre_processing(Xdb)
    y = speaker_id(file)
    name_file = file_name(file)
    return Xdb_3D, y, name_file


def audio_to_features(path, speaker_num):
    '''
    Extracts features and targets from audio files and reshapes them for a siamese net.
    '''
    files = files_for_modeling_3_audios(path, speaker_num)
    
    # Extract features
    data = pd.DataFrame()
    Xdb_3D_list = []
    id_list = []
    file_name_list = []

    for file in files:
        Xdb_3D, speaker_id, name_file = one_observation(file) # Features and label for one obervation = audio file
    
        Xdb_3D_list.append(Xdb_3D)
        id_list.append(speaker_id)
        file_name_list.append(name_file)
        
    data['speaker_id'] = id_list
    data['features'] = Xdb_3D_list  
    data['file_name'] = file_name_list 
    
    # Process features for siamese model
    siam_features = [comb for comb in permutations(data.features, 2)]
    siam_targets_tup = [comb for comb in permutations(data.speaker_id, 2)]
    siam_files_tup = [comb for comb in permutations(data.file_name, 2)]
    
    siam_targets = [1 if a==b else 0 for a, b in siam_targets_tup]
    siam_same_file = [1 if a==b else 0 for a, b in siam_files_tup]
    
    siam_data = pd.DataFrame()
    siam_data['siam_targets'] = siam_targets
    siam_data['siam_features'] = siam_features  
    siam_data['siam_pairs_ids'] = siam_targets_tup 
    siam_data['file_names'] = siam_files_tup 
    siam_data['siam_same_file'] = siam_same_file 
    
    # Remove pairs made up the same file
    siam_data_filtered = siam_data.loc[siam_data.siam_same_file==0]
    
    
    # Reshape features and target for modeling
    X = np.array(siam_data_filtered.siam_features.tolist())
    y = np.array(siam_data_filtered.siam_targets.tolist())
    
    return X, y, siam_data, siam_data_filtered


def ten_audio_to_features(path, speaker_start, speaker_stop):
    '''
    Extracts features and targets from audio files and reshapes them for a siamese net.
    '''
    files = files_for_modeling_10_audios(path, speaker_start, speaker_stop)
    
    # Extract features
    data = pd.DataFrame()
    Xdb_3D_list = []
    id_list = []
    file_name_list = []

    for file in files:
        Xdb_3D, speaker_id, name_file = one_observation(file) # Features and label for one obervation = audio file
    
        Xdb_3D_list.append(Xdb_3D)
        id_list.append(speaker_id)
        file_name_list.append(name_file)
        
    data['speaker_id'] = id_list
    data['features'] = Xdb_3D_list  
    data['file_name'] = file_name_list 
    
    # Process features for siamese model
    siam_features = [comb for comb in permutations(data.features, 2)]
#     print(len(siam_features))
    siam_targets_tup = [comb for comb in permutations(data.speaker_id, 2)]
#     print(siam_targets_tup)
    siam_files_tup = [comb for comb in permutations(data.file_name, 2)]
#     print(siam_files_tup)
    siam_targets = [1 if a==b else 0 for a, b in siam_targets_tup]
    siam_same_file = [1 if a==b else 0 for a, b in siam_files_tup]
    siam_same_id = [1 if a==b else 0 for a, b in siam_targets_tup]
    
    siam_data = pd.DataFrame()
    siam_data['siam_targets'] = siam_targets
    siam_data['siam_features'] = siam_features  
    siam_data['siam_pairs_ids'] = siam_targets_tup 
    siam_data['file_names'] = siam_files_tup 
    siam_data['siam_same_file'] = siam_same_file 
    siam_data['siam_same_id'] = siam_same_id 
    
    # Remove pairs made up by the same file
    siam_data_filtered = siam_data.loc[siam_data.siam_same_file==0] #1 never happens 
    
    
    # Reshape features and target for modeling
    X = np.array(siam_data_filtered.siam_features.tolist())
    y = np.array(siam_data_filtered.siam_targets.tolist())
    
    return X, y, siam_data, siam_data_filtered


def audio_data_to_siam_features(data):
    '''
    Extracts features and targets from audio files and reshapes them for a siamese net.
    '''
    # Process features for siamese model
    siam_features = [comb for comb in permutations(data.features, 2)]
#     print(len(siam_features))
    siam_targets_tup = [comb for comb in permutations(data.speaker_id, 2)]
#     print(siam_targets_tup)
    siam_files_tup = [comb for comb in permutations(data.file_name, 2)]
#     print(siam_files_tup)
    siam_targets = [1 if a==b else 0 for a, b in siam_targets_tup]
    siam_same_file = [1 if a==b else 0 for a, b in siam_files_tup]
    siam_same_id = [1 if a==b else 0 for a, b in siam_targets_tup]
    
    siam_data = pd.DataFrame()
    siam_data['siam_targets'] = siam_targets
    siam_data['siam_features'] = siam_features  
    siam_data['siam_pairs_ids'] = siam_targets_tup 
    siam_data['file_names'] = siam_files_tup 
    siam_data['siam_same_file'] = siam_same_file 
    siam_data['siam_same_id'] = siam_same_id 
    
    # Remove pairs made up by the same file
    siam_data_filtered = siam_data.loc[siam_data.siam_same_file==0] #1 never happens 
    
    
    # Reshape features and target for modeling
    X = np.array(siam_data_filtered.siam_features.tolist())
    y = np.array(siam_data_filtered.siam_targets.tolist())
    
    return X, y, siam_data, siam_data_filtered


def dataframe_to_siamese_features_LSTM(data):
    # Process features for siamese model
    siam_features = [comb for comb in combinations(data.features, 2)]
#     print(len(siam_features))
    siam_targets_tup = [comb for comb in combinations(data.speaker_id, 2)]
#     print(siam_targets_tup)
    siam_files_tup = [comb for comb in combinations(data.file_name, 2)]
#     print(siam_files_tup)
    siam_targets = [1 if a==b else 0 for a, b in siam_targets_tup]
    siam_same_file = [1 if a==b else 0 for a, b in siam_files_tup]
    siam_same_id = [1 if a==b else 0 for a, b in siam_targets_tup]
    
    siam_data = pd.DataFrame()
    siam_data['siam_targets'] = siam_targets
    siam_data['siam_features'] = siam_features  
    siam_data['siam_pairs_ids'] = siam_targets_tup 
    siam_data['file_names'] = siam_files_tup 
    siam_data['siam_same_file'] = siam_same_file 
    siam_data['siam_same_id'] = siam_same_id 
    
    # Remove pairs made up by the same file
    siam_data_filtered = siam_data.loc[siam_data.siam_same_file==0] # 1 never happens 
    
    # X and y
    siam_data_X = siam_data_filtered[['siam_features', 
                                  'siam_pairs_ids', 
                                  'file_names', 
                                  'siam_same_file', 
                                  'siam_same_id']].copy()
    
    siam_y = siam_data_filtered.siam_targets
    
    # Undersample target 0 (different speakers)
    rus = RandomUnderSampler(random_state=4)
    X_res, y_res = rus.fit_resample(siam_data_X, siam_y)
    
    X_res_data = pd.DataFrame(X_res, columns=['siam_features', 
                                  'siam_pairs_ids', 
                                  'file_names', 
                                  'siam_same_file', 
                                  'siam_same_id'])
    
    # Reshape features and target for modeling
    X = np.array(X_res_data.siam_features.tolist())
    y = np.array(y_res.tolist())

    return X, y, siam_data, X_res_data


Overwriting /Users/greenapple/project5/src/data/process_audio_siam.py


# balance_data - remove

In [8]:
%%writefile '/Users/greenapple/project5/src/data/balance_data.py'

from imblearn.under_sampling import RandomUnderSampler
import pandas as pd

def under_sample(siam_data_filtered):
    '''
    Undersamples the majority class (0, different speakers). Shuffles the dataframe befor the undersampling.
    '''
    # Shuffle data
    siam_data_filtered = siam_data_filtered.sample(frac=1).reset_index(drop=True)
    
    # X and y
    siam_data_X = siam_data_filtered[['siam_features', 
                                  'siam_pairs_ids', 
                                  'file_names', 
                                  'siam_same_file', 
                                  'siam_same_id']].copy()
    
    siam_y = siam_data_filtered.siam_targets
    
    # Undersample target 0 (different speakers)
    rus = RandomUnderSampler(random_state=4)
    X_res, y_res = rus.fit_resample(siam_data_X, siam_y)
    
    X_res_data = pd.DataFrame(X_res, columns=['siam_features', 
                                  'siam_pairs_ids', 
                                  'file_names', 
                                  'siam_same_file', 
                                  'siam_same_id'])

    return X_res_data, y_res

Overwriting /Users/greenapple/project5/src/data/balance_data.py


# reshape_for_cnn - duplicated - moved to process audio

In [9]:
%%writefile '/Users/greenapple/project5/src/data/reshape.py'

import numpy as np

def reshape_for_cnn(X_res_data, y_res):
    # Reshape features and target for modeling
    X = np.array(X_res_data.siam_features.tolist())
    y = np.array(y_res.tolist())

    return X, y

Overwriting /Users/greenapple/project5/src/data/reshape.py


# one_shot_learning_VGG16

In [10]:
%%writefile '/Users/greenapple/project5/src/models/one_shot_learning_VGG16.py'

import pandas as pd
import numpy as np
from src.data import process_audio
import random

def get_samples_random(path, speaker_num):
    '''
    Extracts features and targets from audio files.
    '''
    files = process_audio.files_for_modeling_3_audios_random(path, speaker_num)
    
    # Extract features
    speaker_data = pd.DataFrame()
    Xdb_3D_list = []
    id_list = []

    for file in files:
        Xdb_3D, speaker_id = process_audio.one_observation(file) # Features and label for one obervation = audio file
    
        Xdb_3D_list.append(Xdb_3D)
        id_list.append(speaker_id)
    
    speaker_data['speaker_id'] = id_list
    speaker_data['features'] = Xdb_3D_list  
    
    return speaker_data

def get_samples_in_order(path, speaker_num):
    '''
    Extracts features and targets from audio files.
    '''
    files = process_audio.files_for_modeling_3_audios(path, speaker_num)
    
    # Extract features
    speaker_data = pd.DataFrame()
    Xdb_3D_list = []
    id_list = []

    for file in files:
        Xdb_3D, speaker_id = process_audio.one_observation(file) # Features and label for one obervation = audio file
    
        Xdb_3D_list.append(Xdb_3D)
        id_list.append(speaker_id)
    
    speaker_data['speaker_id'] = id_list
    speaker_data['features'] = Xdb_3D_list  
    
    return speaker_data

def one_shot_set(speaker_data, n_way):
    '''
    Returns one shot learning sample set with n_way sample pairs.
    '''
    
    # Randomly select n_way samples 
    ids = []
    samples = []

    while len(ids)<n_way:
        sample = speaker_data.sample(1, replace=False)
        if sample.iloc[0]['speaker_id'] not in ids:
            ids.append(sample.iloc[0]['speaker_id'])
            samples.append(sample.iloc[0]['VGG16_embds'])
#             print(ids)
            
    # Select a sample from the same speaker for the first sample 
    test_sample_df = speaker_data.loc[speaker_data.speaker_id==ids[0]]
    if test_sample_df.shape[0]==1:  # only one audio is avalable for this speaker
        test_sample = test_sample_df.iloc[0]['VGG16_embds'] 
#         print('only one')
    else:
        test_sample = 'empty'
        while test_sample=='empty':
            audio = test_sample_df.sample(1)
            if not np.array_equal(audio.iloc[0]['VGG16_embds'], samples[0]):
                test_sample = audio.iloc[0]['VGG16_embds'] 
        
    # Make pairs
    pairs = [(test_sample, sample) for sample in samples]
    pairs = np.array(pairs)

    # Targets
    targets = np.zeros((n_way,))
    targets[0] = 1
    
    return pairs, targets


def one_shot_score(model, trials, speaker_data, n_way):
    '''
    Returns percent of correctly predicted one shot trials.
    '''
    n_correct = 0
    count = 0
    
    for i in range(trials):
        pairs, targets = one_shot_set(speaker_data, n_way)
        probs = model.predict([pairs[:, 0], pairs[:, 1]])
        if np.argmax(probs) == np.argmax(targets):
            n_correct+=1
        count+=1
        current_score = 100.0 * n_correct / count
        print('Trial {}, current score {}'.format(count, current_score), end='\r')
    percent_correct = 100.0 * n_correct / trials
    
    return percent_correct

Writing /Users/greenapple/project5/src/models/one_shot_learning_VGG16.py


# history

In [12]:
%%writefile '/Users/greenapple/project5/src/models/history.py'

import json, codecs

def save_hist(path, history):
    with codecs.open(path, 'w', encoding='utf-8') as f:
        json.dump(history, f, separators=(',', ':'), sort_keys=True, indent=4) 

def load_hist(path):
    n = {} # set history to empty
    if os.path.exists(path): # reload history if it exists
        with codecs.open(path, 'r', encoding='utf-8') as f:
            n = json.loads(f.read())
    return n

def append_hist(h1, h2):
    if h1 == {}:
        return h2
    else:
        dest = {}
        for key, value in h1.items():
            dest[key] = value + h2[key]
        return dest

Writing /Users/greenapple/project5/src/models/history.py
