<a href="https://colab.research.google.com/github/MohannadEhabBarakat/text2audio/blob/main/code/datasets/Lipri_speech_pipeline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Librispeech Data pipline 

LibriSpeech is a corpus of approximately 1000 hours of 16kHz read English speech, prepared by Vassil Panayotov with the assistance of Daniel Povey. The data is derived from read audiobooks from the LibriVox project, and has been carefully segmented and aligned. 



###Needed pakages 

In [None]:
import tensorflow as tf
import tensorflow_datasets as tfds

In [None]:
def load_dataset(path):
  """
  load the dataset using tfds

  Arguments:
  path -- path to save the dataset on 

  Return:
  train_clean, train_noise, dev_clean, dev_noise, test_clean, tast_noise -- data splits 
  """
  train_clean, train_noise, dev_clean, dev_noise, test_clean, tast_noise= tfds.load('Librispeech',
                                                                                    split=['train_clean360', 'train_other500', 'dev_clean', 'dev_other', 'test_clean', 'test_other' ], 
                                                                                    as_supervised=True, data_dir=file_path) 
  
  return train_clean, train_noise, dev_clean, dev_noise, test_clean, tast_noise
  


In [None]:
def concatinate_data(split1, split2):
  """
  concatinate two dataset's splis 

  Arguments:
  split1, split2 -- two splits to be concatinated  

  Return:
  concatinated -- A Tensor resulting from concatenation of the input splits  
  """
  concatinated = tf.concat([split1, split2], axis = 0)
  return concatinated


In [None]:
def dataset_format (dataset_split, text_to_speech = False):
  """
  reverse the dataset format from (speech, text) tuple to (text, speech) if text to speech is true 
  
  Arguments:
  dataset_split -- dataset 
  text_to_speech  -- boolien value if true will reverse input and labels  

  Return:
  same dataset if speech to text, reversed tuple if text to speech 
  """
  if text_to_speech:
    return (dataset_split[1], dataset_split[0])
  else:
    return dataset_split


In [None]:
def pipeline (clean_data, noisy_data , clean_only = True, shuffle = True ,
              batch_size = 32, buffer_size= 1000, text_to_speech = False):
  """
  apply shuffiling and batching to the input data  

  Arguments:
  clean_data -- clean split of the data
  noisy_data -- noisy split of the data 
  clean_only -- boolien if true pipeline will be applied to clean split only
  shuffle -- boolien if true data will be shuffled 
  batch_size -- integer for the  batch size
  buffer_size -- integer for the buffer size for shuffeling 
  text_to_speech  -- boolien value if true will reverse the dataset format from (speech, text) tuple to (text, speech)
  
  Return:
  data -- suffiled and batched data 
  """
  if clean_only:
    data = clean_data
    if shuffle: 
      data   = data.shuffle(buffer_size)
    data = data.map(lambda speech, text: dataset_format(data, text_to_speech = text_to_speech )
    data = data.batch(batch_size)
    data = data.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

  else:
    data = concatinate_data(clean_data, noisy_data)
    
    if shuffle: 
      data   = data.shuffle(buffer_size)

    data = data.map(lambda speech, text: dataset_format(data, text_to_speech = text_to_speech )
    data = data.batch(batch_size)
    data = data.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

      
  return data



In [None]:
def dataset_loader_and_formatter (path, clean_only = True, shuffle = True,
              batch_size_train = 32, batch_size_val = 32, batch_size_test = 32, buffer_size= 1000, text_to_speech = False):
  
  """
  load all dataset splits and apply pipeline to them   

  Arguments:
  path -- path to save the dataset 
  clean_only -- boolien if true pipeline will be applied to clean splits only
  shuffle -- boolien if true data will be shuffled 
  batch_size -- integer for the  batch size for each of the train, val, and test sets
  buffer_size -- integer for the buffer size for shuffeling 
  text_to_speech  -- boolien value if true will reverse the dataset format from (speech, text) tuple to (text, speech)

  Return:
  train_data -- ready to use dataset
  val_data  -- ready to use dataset
  test_data  -- ready to use dataset
  """
  
  train_clean, train_noise, dev_clean, dev_noise, test_clean, tast_noise = load_dataset(path)

  train_data = pipeline (train_clean, train_noise , clean_only = clean_only, shuffle = shuffle ,
                        batch_size = batch_size_train, buffer_size= buffer_size, text_to_speech = text_to_speech )
  
  val_data = pipeline (dev_clean, dev_noise , clean_only = clean_only, shuffle = shuffle ,
                       batch_size = batch_size_val, buffer_size= buffer_size, text_to_speech = text_to_speech )
  
  test_data = pipeline (test_clean, tast_noise , clean_only = clean_only, shuffle = shuffle ,
                       batch_size = batch_size_test, buffer_size= buffer_size, text_to_speech = text_to_speech )
  
  return train_data, val_data, test_data
