<a href="https://colab.research.google.com/github/phrasenmaeher/custom-audio-classification-tf/blob/main/custaudio_tfr_generation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Code for section 3 of the post at
[TDS/Medium](https://towardsdatascience.com/custom-audio-classification-with-tensorflow-af8c16c38689)

In [None]:
import librosa
import numpy as np
import pandas as pd
import sklearn
import tensorflow as tf
import tqdm
import argparse
from pathlib import Path

tfr_dir = None

In [None]:
def _bytes_feature(value):
    """Returns a bytes_list from a string / byte."""
    if isinstance(value, type(tf.constant(0))): # if value is tensor
        value = value.numpy() # get value of tensor
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def _float_feature(value):
  """Returns a float_list from a float / double."""
  return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))

def _int64_feature(value):
  """Returns an int64_list from a bool / enum / int / uint."""
  return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

def serialize_array(array):
  array = tf.io.serialize_tensor(array)
  return array

In [None]:
def gen_example(sound_clip, sr, label, shape):
  data={'x': _int64_feature(shape[0]),
        'y': _int64_feature(shape[1]),
        'sr':_int64_feature(sr),
        'label': _int64_feature(label),
        'feature':_bytes_feature(serialize_array(sound_clip))}
  
  return data

In [None]:
def gen_tfr(csv_path, outname, max_files):
  print("Parsing {}".format(csv_path))
  
  df = pd.read_csv(csv_path)
  df = df.sample(frac=1).reset_index(drop=True) #shuffle the dataset before writing into TFRecords (prevents batches of only one label later on)
  
  splits = (len(df)//max_files) + 1 #determine how many tfr shards are needed
  if len(df)%max_files == 0:
    splits-=1


  print(f" Using {splits} shard(s) for {len(df)} files, with up to {max_files} samples per shard")
  file_counter = 0
  for i in tqdm.tqdm(range(splits)):
    filename = "{}{}of{}_{}.tfrecords".format(tfr_dir, i+1, splits, outname) #create TFR shard
    writer = tf.io.TFRecordWriter(filename)
    
    current_shard_count = 0
    while current_shard_count < max_files: #while this shard is not full
      
      index = i*max_files+current_shard_count
      if index == len(df): #when we have consumed the whole DF, preempt generation
        break
      #print(index)

      row = df.iloc[index] #get the sample

      sound_clip,sr = librosa.load(row[0], sr=22050)
      sound_clip = np.expand_dims(sound_clip, axis=1)
      if sound_clip.shape[0] != 1323000: #skip any samples that are not of appropriate shape
        print("{} was not of fit shape: {}".format(row[0], sound_clip.shape))
        current_shard_count += 1
        continue

      shape = sound_clip.shape
      label = row[1]

      data = gen_example(sound_clip, sr, label, shape) #generate the example

      out = tf.train.Example(features=tf.train.Features(feature=data))
      writer.write(out.SerializeToString())
      current_shard_count+=1
      
      file_counter +=1

    writer.close()
  
  print("Parsed {} files for {}".format(str(file_counter), outname))


In [None]:
def gen_monitoring_sample(csv_path, outname, num_samples, use_all):
  print("Parsing {} to enable logging some statistics".format(csv_path))
  
  df = pd.read_csv(csv_path)
  y = df.pop('label').to_frame()
  
  if use_all:
    df_X = df
    df_y = y
  else:
    test_size = (100/len(df)) * num_samples
    print(test_size)
    _, df_X, _, df_y = sklearn.model_selection.train_test_split(df, y, stratify=y, test_size=test_size/100)
  
  x = np.empty(shape=(len(df_X), 1323000,1), dtype=np.float32)
  y = []
  
  for i in range(len(df_X)):
      row = df_X.iloc[i]

      sound_clip,sr = librosa.load(row[0], sr=22050)
      sound_clip = np.expand_dims(sound_clip, axis=1)
      shape = sound_clip.shape
      label = df_y.iloc[i]
      label = label[0]
      x[i] = sound_clip
      y.append(label)

  y = np.asarray(y, dtype="int8")
  print("Now saving to dir")
  np.save(tfr_dir+outname+"_y_monitor.npy", y)
  np.save(tfr_dir+outname+"_x_monitor.npy", x)

In [None]:
def main(args):
  global tfr_dir
  tfr_dir = args['output_path']
  Path(tfr_dir).mkdir(parents=True, exist_ok=True)

  gen_tfr(csv_path=args['test_list'], outname="test", max_files=args['test_max'])
  gen_tfr(csv_path=args['validation_list'], outname="valid", max_files=args['valid_max'])
  gen_tfr(csv_path=args['train_list'], outname="train", max_files=args['train_max'])
  
  if args['use_monitoring']:
    gen_monitoring_sample(csv_path=args['test_list'], outname="test", num_samples=args['test_monitor'], use_all=args['use_all_samples'])
    gen_monitoring_sample(csv_path=args['validation_list'], outname= "valid", num_samples=args['valid_monitor'], use_all=args['use_all_samples'])
    gen_monitoring_sample(csv_path=args['train_list'], outname= "train", num_samples=args['train_monitor'], use_all=args['use_all_samples'])

In [None]:
parser = argparse.ArgumentParser(description='')
parser.add_argument('--output_path', dest='output_path', default='/content/drive/MyDrive/custaudio/tfr_dir/', help='Base path for the dataset')
parser.add_argument('--train_list', dest='train_list', default='/content/drive/MyDrive/custaudio/custom_train.csv', help="CSV file that stores the training files")
parser.add_argument('--validation_list', dest='validation_list', default='/content/drive/MyDrive/custaudio/custom_valid.csv',help="CSV file that stores the validation files")
parser.add_argument('--test_list', dest='test_list', default='/content/drive/MyDrive/custaudio/custom_test.csv',help="CSV file that stores the test files")
parser.add_argument('--files_per_train_shard', dest='train_max', type=int, default=50, help='Number of files for the TFRecord file')
parser.add_argument('--files_per_test_shard', dest='test_max', type=int, default=50, help='Number of files for the TFRecord file')
parser.add_argument('--files_per_valid_shard', dest='valid_max', type=int, default=50, help='Number of files for the TFRecord file')

parser.add_argument('--use_monitoring_samples', dest='use_monitoring', type=int, default=1, help='Whether to create an additional numpy array that contains samples that can be used to generate live statistics during training')
parser.add_argument('--use_all_samples', dest='use_all_samples', type=int, default=0, help='For small datasets use all available subset samples to generate monitoring data')
parser.add_argument('--num_train_monitor', dest='train_monitor', type=int, default=25, help='Number of train samples to store in a numpy array to observe live training statistic on')
parser.add_argument('--num_test_monitor', dest='test_monitor', type=int, default=25, help='Number of test samples to store in a numpy array to observe live training statistics on')
parser.add_argument('--num_valid_monitor', dest='valid_monitor', type=int, default=25, help='Number of valid samples to store in a numpy array to observe live training statistics on')

args, unknown = parser.parse_known_args()
args = args.__dict__

In [None]:
if __name__ == '__main__':
  main(args)

  0%|          | 0/1 [00:00<?, ?it/s]

Parsing /content/drive/MyDrive/custaudio/custom_test.csv
 Using 1 shard(s) for 50 files, with up to 50 samples per shard


100%|██████████| 1/1 [00:14<00:00, 14.18s/it]


Parsed 50 files for test
Parsing /content/drive/MyDrive/custaudio/custom_valid.csv


  0%|          | 0/1 [00:00<?, ?it/s]

 Using 1 shard(s) for 50 files, with up to 50 samples per shard


100%|██████████| 1/1 [00:13<00:00, 13.67s/it]
  0%|          | 0/1 [00:00<?, ?it/s]

Parsed 50 files for valid
Parsing /content/drive/MyDrive/custaudio/custom_train.csv
 Using 1 shard(s) for 50 files, with up to 50 samples per shard


100%|██████████| 1/1 [00:13<00:00, 13.70s/it]


Parsed 50 files for train
Parsing /content/drive/MyDrive/custaudio/custom_test.csv to enable logging some statistics
50.0
Now saving to dir
Parsing /content/drive/MyDrive/custaudio/custom_valid.csv to enable logging some statistics
50.0
Now saving to dir
Parsing /content/drive/MyDrive/custaudio/custom_train.csv to enable logging some statistics
50.0
Now saving to dir
