In [1]:
%matplotlib inline

import matplotlib.pyplot as plt
import librosa 
import librosa.display as dis
import numpy as np
import fnmatch
import os
import random
import re
import threading
import Queue

import librosa
import numpy as np
import torch
FILE_PATTERN = r'p([0-9]+)_([0-9]+)\.wav'

In [6]:


def get_category_cardinality(files):
    id_reg_expression = re.compile(FILE_PATTERN)
    min_id = None
    max_id = None
    for filename in files:
        matches = id_reg_expression.findall(filename)[0]
        id, recording_id = [int(id_) for id_ in matches]
        if min_id is None or id < min_id:
            min_id = id
        if max_id is None or id > max_id:
            max_id = id

    return min_id, max_id


def randomize_files(files):
    for file in files:
        file_index = random.randint(0, (len(files) - 1))
        yield files[file_index]


def find_files(directory, pattern='*.wav'):
    '''Recursively finds all files matching the pattern.'''
    files = []
    for root, dirnames, filenames in os.walk(directory):
        for filename in fnmatch.filter(filenames, pattern):
            files.append(os.path.join(root, filename))
    return files


def load_generic_audio(directory, sample_rate):
    '''Generator that yields audio waveforms from the directory.'''
    files = find_files(directory)
    id_reg_exp = re.compile(FILE_PATTERN)
    randomized_files = randomize_files(files)
    for it, filename in enumerate(randomized_files):
        ids = id_reg_exp.findall(filename)
        if not ids:
            # The file name does not match the pattern containing ids, so
            # there is no id.
            category_id = None
        else:
            # The file name matches the pattern for containing ids.
            category_id = int(ids[0][0])
        audio, _ = librosa.load(filename, sr=sample_rate, mono=True)
        audio = audio.reshape(-1, 1)
        yield audio, filename, category_id


def trim_silence(audio, threshold, frame_length=2048):
    '''Removes silence at the beginning and end of a sample.'''
    if audio.size < frame_length:
        frame_length = audio.size
    energy = librosa.feature.rmse(audio, frame_length=frame_length)
    frames = np.nonzero(energy > threshold)
    indices = librosa.core.frames_to_samples(frames)[1]

    # Note: indices can be an empty array, if the whole audio was silence.
    return audio[indices[0]:indices[-1]] if indices.size else audio[0:0]


def not_all_have_id(files):
    ''' Return true iff any of the filenames does not conform to the pattern
        we require for determining the category id.'''
    id_reg_exp = re.compile(FILE_PATTERN)
    for file in files:
        ids = id_reg_exp.findall(file)
        if not ids:
            return True
    return False


class AudioReader(object):
    '''Generic background audio reader that preprocesses audio files
    and enqueues them into a python Queue.'''

    def __init__(self,
                 audio_dir,
                 sample_rate,
                 receptive_field,
                 sample_size=None,
                 cut_len = 1000,
                 dtype = np.float32,
                 similarity = 300):
        self.audio_dir = audio_dir
        self.sample_rate = sample_rate
        self.sample_size = sample_size
        self.receptive_field = receptive_field
        self.cut_len = cut_len
        self.similarity = similarity
        self.dtype = dtype

        # TODO Find a better way to check this.
        # Checking inside the AudioReader's thread makes it hard to terminate
        # the execution of the script, so we do it in the constructor for now.
        files = find_files(audio_dir)
        if not files:
            raise ValueError("No audio files found in '{}'.".format(audio_dir))
        
        # Determine the number of mutually-exclusive categories we will
        # accomodate in our embedding table.
        


    def thread_main(self):
        # Go through the dataset multiple times
        
        iterator = load_generic_audio(self.audio_dir, self.sample_rate)
        count_audio = 0
        count_folder = 0
        for audio, filename, category_id in iterator:
            size = np.ceil((len(audio)-self.receptive_field-self.cut_len)/self.receptive_field)+1
            count_piece = 0
            parity = True
            targets =  np.zeros((size),dtype=np.int32)
            cuted_audio = np.zeros((size*self.receptive_field,audio.shape[1]))
            while len(audio) > self.receptive_field+self.cut_len:
                if parity:
                    point = np.random.randint(self.receptive_field)
                    piece = np.zeros((self.receptive_field,audio.shape[1]))
                    piece[:point,:] = audio[:point,:]
                    position = 0
                    min = 0
                    pos_min = 0
                    for idx, ampl in enumerate(audio[:point + self.cut_len,:]):
                        if (abs(ampl - piece[-1, :]) > min):
                            min = abs(ampl - piece[-1, :])
                            pos_min = point + self.cut_len + idx
                        if (ampl + (self.receptive_field-point) == audio[:point + self.cut_len,:].shape[0]):
                            position = pos_min
                        else:
                            if (abs(ampl - piece[-1, :]) < self.similarity):
                                position = point + self.cut_len + idx
                                break
                    piece[point:,:] = audio[point+self.cut_len:point+self.cut_len+(self.receptive_field-point),:]
                    #piece_hann = librosa.core.istft(librosa.core.stft(piece[:,0]))
                    #piece_hann = piece_hann.reshape(-1,1)
                    cuted_audio[count_piece*self.receptive_field:(1+count_piece)*self.receptive_field,:] = piece
                    audio = audio[self.receptive_field:, :]
                    #print np.mean(piece-piece_hann)
                    targets[count_piece] = 1
                    
                else:                        
                    piece = audio[:self.receptive_field, :]
                    cuted_audio[count_piece*self.receptive_field:(1+count_piece)*self.receptive_field,:] = piece
                    targets[count_piece] = 0
                    audio = audio[self.receptive_field:, :]

                parity = not parity
                count_piece+=1
            if count_audio % 200 == 0:                
                os.makedirs("./new_set/wav/{}".format(count_audio/200))
                os.makedirs("./new_set/targets/{}".format(count_audio/200))
            np.savetxt('./new_set/targets/{}/audio{}.txt'.format(count_audio/200,count_audio), targets, delimiter=" ", fmt="%s") 
            librosa.output.write_wav('./new_set/wav/{}/audio{}.wav'.format(count_audio/200,count_audio),
                                         cuted_audio.astype(self.dtype), 16000)
            if (count_audio % 100 == 0):
                print ('step={}'.format(count_audio))
            count_audio = count_audio + 1
        #return self.data_set.qsize()

In [7]:
reader = AudioReader(
        "/home/administrator/workspace/true_model/VCTK-Corpus/wav48",
        sample_rate=16000,
        receptive_field=637) 

In [8]:
reader.thread_main()



step=0
step=100
step=200
step=300
step=400
step=500
step=600
step=700
step=800
step=900
step=1000
step=1100
step=1200
step=1300
step=1400
step=1500
step=1600
step=1700
step=1800
step=1900
step=2000
step=2100
step=2200
step=2300
step=2400
step=2500
step=2600
step=2700
step=2800
step=2900
step=3000
step=3100
step=3200
step=3300
step=3400
step=3500
step=3600
step=3700
step=3800
step=3900
step=4000
step=4100
step=4200
step=4300
step=4400
step=4500
step=4600
step=4700
step=4800
step=4900
step=5000
step=5100
step=5200
step=5300
step=5400
step=5500
step=5600
step=5700
step=5800
step=5900
step=6000
step=6100
step=6200
step=6300
step=6400
step=6500
step=6600
step=6700
step=6800
step=6900
step=7000
step=7100
step=7200
step=7300
step=7400
step=7500
step=7600
step=7700
step=7800
step=7900
step=8000
step=8100
step=8200
step=8300
step=8400
step=8500
step=8600
step=8700
step=8800
step=8900
step=9000
step=9100
step=9200
step=9300
step=9400
step=9500
step=9600
step=9700
step=9800
step=9900
step=10000
s

In [9]:
print ("WOOOHOOOOOO")

WOOOHOOOOOO
