## This file prepares data for recurrent and LSTM models

If current data is single point prediction with CNNs

In [None]:
# common math imports
import numpy as np
# common torch imports
import torch

In [None]:
y_input = None
y_target = None
X_input = None
X_target = None
params_file = None
file_names = None

In [None]:
# process data in json file
import json
with open(params_file) as f:
    params = json.load(f)
frame_duration = float(params['tensorize']['frame_duration'])
num_seconds = float(params['tensorize']['num_seconds'])
n_mels = int(params['tensorize']['n_mels'])
seq_len = int(params['recurrent_format']['seq_len'])
window_size = int(num_seconds / frame_duration)

In [None]:
# load in the data
fileX = X_input
fileY = y_input
X = torch.load(fileX)
y = torch.load(fileY)
file_list = np.loadtxt(file_names, delimiter=',', dtype=str)

assert X.shape[0] == y.shape[0]
assert X.shape[0] == len(file_list)
print(file_list[:5])

['birdclef-2022/train_audio/afrsil1/XC125458.ogg'
 'birdclef-2022/train_audio/afrsil1/XC125458.ogg'
 'birdclef-2022/train_audio/afrsil1/XC125458.ogg'
 'birdclef-2022/train_audio/afrsil1/XC175522.ogg'
 'birdclef-2022/train_audio/afrsil1/XC175522.ogg']


In [None]:
# setup the data
X_fill = []
y_fill = []
X = X.squeeze()
y = y.squeeze()
X_arr = X.numpy()
y_arr = y.numpy()
curr_file = file_list[0]
X_list = []
y_list = []
X_list.append(X_arr[0])
y_list.append(y_arr[0])
num_files = len(file_list)
itr = 1
for i in range(1, num_files):
    itr += 1
    next_file = file_list[i]
    # not the same bird recording
    # but care if last obs. of bird completed sequence
    if (curr_file != next_file) and (itr > 1):
        itr -= 1
        seq_diff = seq_len - itr
        # fill rest of the sequence with zeros
        for _ in range(seq_diff):
            X_list.append(np.zeros((n_mels, window_size), dtype=np.float16))
            y_list.append(np.int16(0))
        X_fill.append(X_list)
        y_fill.append(y_list)
        itr = 1
        X_list = []
        y_list = []
        X_list.append(X_arr[i])
        y_list.append(y_arr[i])

    # not the same bird recording
    # but the last obs. completed the sequence
    # so we don't want to prematurely fill the sequence
    # with zeros
    elif (curr_file != next_file) and (itr == 1):
        X_list.append(X_arr[i])
        y_list.append(y_arr[i])

    # same bird recording
    else:
        X_list.append(X_arr[i])
        y_list.append(y_arr[i])
        # save the sequence
        # and restart the process
        if itr == seq_len:
            X_fill.append(X_list)
            y_fill.append(y_list)
            itr = 0
            X_list = []
            y_list = []
    curr_file = next_file

# the final sequence
seq_diff = seq_len - itr
# the last obs. completed a sequence
if itr == 0:
    pass
# the last obs. did not complete a sequence
# fill the rest of the sequence with zeros
elif seq_diff > 0:
    for _ in range(seq_diff):
        X_list.append(np.zeros((n_mels, window_size), dtype=np.float16))
        y_list.append(np.int16(0))
    X_fill.append(X_list)
    y_fill.append(y_list)
else:
    raise ValueError("This should not happen")

assert len(X_fill) == len(y_fill), f"X_fill: {len(X_fill)}, y_fill: {len(y_fill)}"

X_fill = np.array(X_fill, dtype=np.float16)
y_fill = np.array(y_fill, dtype=np.uint16)

X_rnn = torch.from_numpy(X_fill)
y_rnn = torch.from_numpy(y_fill)

print(f"X_rnn shape: {X_rnn.shape}")
print(f"y_rnn shape: {y_rnn.shape}")

# save the tensors
torch.save(X_rnn, X_target)
torch.save(y_rnn, y_target)

X_rnn shape: torch.Size([22793, 10, 32, 50])
y_rnn shape: torch.Size([22793, 10])
