In [None]:
import librosa
import logging
import importlib
import src.preprocess
import pandas as pd
import tensorflow_io as tfio
import tensorflow as tf
import numpy as np
importlib.reload(src.preprocess)

GS_DATA = "gs://bird-clef-kimmo/data"
TRAIN_SHORT_AUDIO_DATA = f"{GS_DATA}/train_short_audio"

logging.basicConfig(level=logging.DEBUG)

SR = 32000
SPLIT_SECS = 5

In [None]:
!gsutil ls $GS_DATA

In [None]:
TRAIN_METADATA_CSV = f"{GS_DATA}/train_metadata.csv"
# TRAIN_METADATA_CSV = f"../data/train_metadata_small.csv"

COLUMNS = ["primary_label","secondary_labels","type","latitude","longitude","scientific_name","common_name","date","filename","rating","time"]
train_metadata_ds = tf.data.experimental.make_csv_dataset(
    TRAIN_METADATA_CSV, batch_size=1, select_columns=COLUMNS)

In [None]:
def read_file(url):
    logging.debug(f"Reading file: {url}")
    return tf.squeeze(tfio.audio.AudioIOTensor(url).to_tensor())

def add_audio(row):
    filename = row["filename"]
    primary_label = row["primary_label"]
    file_url = TRAIN_SHORT_AUDIO_DATA + "/" + primary_label + "/" + filename # tf.py_function(lambda filenames: [f"{GS_DATA}/{filename}" for filename in filenames], [filename], [tf.string])
    audio = tf.py_function(read_file, [tf.squeeze(file_url)], [tf.float32])
    return {**row, "file_url": file_url, "audio": audio}

for row in train_metadata_ds.map(add_audio).take(4):
    print(row)


In [None]:
def split_audio(audio):
    length = audio.shape[-1]
    assert audio.shape[0] == 1
    
    audio = tf.squeeze(audio, axis=0)
    # print("Shape", audio.shape)
    
    # print(f"Splitting signal of shape: {audio.shape}")
    
    n_splits = length // (SR * SPLIT_SECS)
    
    # print(f"Splitting to {n_splits} splits")
    
    splits = []
    
    for i in range(n_splits):
        start = i * SR*SPLIT_SECS
        end = (i+1) * SR*SPLIT_SECS
        split = audio[start:end]
        splits.append(split)
    
    return tf.convert_to_tensor(splits)

def split_to_segments(rows):
    audio = rows["audio"]
    
    splits = tf.py_function(split_audio, [audio], tf.float32)
    
    original = tf.data.Dataset.from_tensors(rows).repeat()
    splits_ = tf.data.Dataset.from_tensor_slices({"segment": splits})
    
    def combine_keys(a, b):
        return {**a, **b}
    
    zipped = tf.data.Dataset.zip((original, splits_)).map(combine_keys)
    return zipped
    # return tf.data.Dataset.from_tensor_slices(splits)
    

def drop_extra_keys(rows):
    KEYS_TO_DROP = ("audio", "filename")
    rows = rows.copy()
    for key in KEYS_TO_DROP:
        rows.pop(key)
    return rows

dataset = train_metadata_ds.map(add_audio).flat_map(split_to_segments).map(drop_extra_keys)

for row in dataset.take(15):
    print(row)