In [2]:
import pandas as pd
import numpy as np
import os
import soundfile as sf
import matplotlib.pyplot as plt
import logmelspectr_params as params
from tqdm import tqdm
import itertools

In [None]:
def frame(data, window_length, hop_length):
    """Convert array into a sequence of successive possibly overlapping frames.
    An n-dimensional array of shape (num_samples, ...) is converted into an
    (n+1)-D array of shape (num_frames, window_length, ...), where each frame
    starts hop_length points after the preceding one.
    This is accomplished using stride_tricks, so the original data is not
    copied.  However, there is no zero-padding, so any incomplete frames at the
    end are not included.
    Args:
    data: np.array of dimension N >= 1.
    window_length: Number of samples in each frame.
    hop_length: Advance (in samples) between each window.
    Returns:
    (N+1)-D np.array with as many rows as there are complete frames that can be
    extracted.
    """
    num_samples = data.shape[0]
    num_frames = 1 + int(np.floor((num_samples - window_length) / hop_length))
    shape = (num_frames, window_length) + data.shape[1:]
    strides = (data.strides[0] * hop_length,) + data.strides
    return np.lib.stride_tricks.as_strided(data, shape=shape, strides=strides)

In [None]:
train_txt_path = "/nas/public/dataset/asvspoof2019/LA/ASVspoof2019_LA_cm_protocols/ASVspoof2019.LA.cm.train.trn.txt"
dev_txt_path = "/nas/public/dataset/asvspoof2019/LA/ASVspoof2019_LA_cm_protocols/ASVspoof2019.LA.cm.dev.trl.txt"
eval_txt_path = "/nas/public/dataset/asvspoof2019/LA/ASVspoof2019_LA_cm_protocols/ASVspoof2019.LA.cm.eval.trl.txt"


In [None]:
train_feat_root = "/nas/home/cborrelli/tripletloss_bot/features/logmelspectr/train"
dev_feat_root = "/nas/home/cborrelli/tripletloss_bot/features/logmelspectr/dev"
eval_feat_root = "/nas/home/cborrelli/tripletloss_bot/features/logmelspectr/eval"

In [None]:
df_train = pd.read_csv(train_txt_path, sep=" ", header=None)
df_train.columns = ["speaker_id", "audio_filename", "null", "system_id", "label"]
df_train = df_train.drop(columns="null")

df_dev = pd.read_csv(dev_txt_path, sep=" ", header=None)
df_dev.columns = ["speaker_id", "audio_filename", "null", "system_id", "label"]
df_dev = df_dev.drop(columns="null")

df_eval = pd.read_csv(eval_txt_path, sep=" ", header=None)
df_eval.columns = ["speaker_id", "audio_filename", "null", "system_id", "label"]
df_eval = df_eval.drop(columns="null")

In [None]:
features_sample_rate = 1.0 / params.STFT_HOP_LENGTH_SECONDS
example_window_length = int(round(
      params.EXAMPLE_WINDOW_SECONDS * features_sample_rate))
example_hop_length = int(round(
      params.EXAMPLE_HOP_SECONDS * features_sample_rate))

In [None]:
count = 0
for index, row in tqdm(df_train.iterrows(), total=len(df_train)):
    audio_filename = row['audio_filename']
    log_mel = np.load(os.path.join(train_feat_root, audio_filename + '.npy'))
    log_mel = log_mel.transpose()
    log_mel_examples = frame(log_mel, window_length=example_window_length, hop_length=example_hop_length)
    number_of_frames = log_mel_examples.shape[0]
    if number_of_frames==0:
        count +=1

    

In [None]:
alg_classes = ['-','A01', 'A02', 'A03', 'A04', 'A05', 'A06']
num_combinations = 500
positive_data = {}

for alg in alg_classes:
    alg_samples_set1 = df_train.audio_filename[df_train['system_id']==alg].sample(random_state=2, 
                                                                                  n=num_combinations)
    
    alg_samples_set2 = df_train.audio_filename[(df_train['system_id']==alg)& 
                                               ~df_train['system_id'].isin(list(alg_samples_set1))
                                              ].sample(random_state=2, n=num_combinations)

    positive_data_pairs = list(itertools.product(alg_samples_set1, alg_samples_set2))
    positive_data[alg+'_'+alg] = positive_data_pairs

    
negative_data = {}
for alg_1 in tqdm(alg_classes, total=len(alg_classes)):
    for alg_2 in alg_classes:
        
        if alg_2 +'_' +alg_1 in negative_data.keys():
            continue
        if alg_1 == alg_2:
            continue
        alg_1_samples = df_train.audio_filename[df_train['system_id'] == alg_1].sample(random_state=2, 
                                                                                       n=num_combinations)
        alg_2_samples = df_train.audio_filename[df_train['system_id'] == alg_2].sample(random_state=2,
                                                                                      n=num_combinations)
        negative_data_pairs = list(itertools.product(alg_1_samples, alg_2_samples))
        negative_data[alg_1+'_'+alg_2] = negative_data_pairs

In [5]:
classes_list = ['-','A01', 'A02', 'A03', 'A04', 'A05', 'A06']


In [None]:
classes_list=['-', 'A01', 'A02', 'A03', 'A04', 'A05', 'A06']

In [None]:
def getitem(batch_index, train_feat_root, dataframe,  dim, n_channels, batch_size = 50):
    'Generate one batch of data'        

    feature_path = train_feat_root
    negative_couples_classes = np.array(list(itertools.combinations(classes_list, r=2)))
    positive_couples_classes = np.array(list(zip(classes_list, classes_list)))
    
    negative_selected_pairs = negative_couples_classes[np.random.choice(negative_couples_classes.shape[0], 
                                                        batch_size // 2, replace=True), :]
    positive_selected_pairs = positive_couples_classes[np.random.choice(positive_couples_classes.shape[0], 
                                                        batch_size // 2, replace=True), :]
    
    selected_pairs = np.concatenate((positive_selected_pairs, negative_selected_pairs), axis=1)
    
    y = np.concatenate(np.zeros((batch_size//2)), np.ones((batch_size//2)), axis=0)


    short_sample = False
    
    features_sample_rate = 1.0 / params.STFT_HOP_LENGTH_SECONDS

    example_window_length = int(round(
        params.EXAMPLE_WINDOW_SECONDS * features_sample_rate))
    example_hop_length = int(round(
        params.EXAMPLE_HOP_SECONDS * features_sample_rate))
    
    X = np.empty((batch_size, *dim, n_channels))
    
    sample_batch_index = 0

    for pairs in selected_pairs:

        sample = np.empty((*dim, n_channels))
        
        
        for alg in pairs:
            row = dataframe[dataframe.system_id == alg].sample(n=1)

            log_mel = np.load(os.path.join(feature_path, row['audio_filename'] + '.npy'))
            log_mel = log_mel.transpose()
            
            if log_mel.shape[0] < dim[0]:
                pad_len = dim[0] - log_mel.shape[0]
                log_mel = np.pad(log_mel, ((0, 0,), (0, pad_len)))
                
            log_mel = frame(log_mel, example_window_length, example_hop_length)

            selected_frame = np.random.randint(low=0, high=log_mel.shape[0], size=1)   

            selected_log_mel = log_mel[selected_frame, :, :]
            selected_log_mel = selected_log_mel[:,:, np.newaxis]
            sample = np.concatenate((sample, selected_log_mel), axis=0)
                
            
                
        sample = sample[np.newaxis, :, :, :]
        X[sample_batch_index, :, :, :] = sample

        print(X.shape)

        if short_sample==True:
            short_sample = False
            continue
        break
        break
            
                    
                    

    for n in negative_couples_classes:
        pass
        
    return
    # Generate data

    #X, y = self.__data_generation(pos_indexes, neg_indexes)

    #return X, y


In [None]:
getitem(0, train_feat_root, df_train, dim=(96*2, 64), n_channels=1)

In [None]:
b

In [12]:
a = np.ones((5, 3))
a

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [13]:
pad_len = 10
np.pad(a, ((0, 0,), (0, pad_len)))

array([[1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])