In [None]:
import os
import numpy as np
import tensorflow as tf
import soundfile as sf
from temporalcontext import settings
from temporalcontext.functions import read_selmap, read_folds_info, \
    load_annotations, load_audio_as_segments, segments2specgrams, \
    LSTMData, song_section_selections, non_song_section_selections, \
    get_testing_cnn_model, cnn_predict

## Create fold-specific data sets

In [None]:
# The root directory under which audio & corresponding annotation files are
# available
audio_root = os.path.join(settings.raw_data_root, settings.raw_audio_dir)
seltab_root = os.path.join(settings.raw_data_root, settings.raw_annot_dir)

selmap = read_selmap(os.path.join(settings.raw_data_root, 'selmap.csv'))
fold_file_idxs = read_folds_info(os.path.join(settings.raw_data_root, 'folds_info.txt'))

# Loop over each unique segment advance setting
for seg_adv, time_step in list(set([(ex['segment_advance'], ex['time_steps']) for ex in settings.lstm_experiments])):

    tracks_summary = np.zeros((len(fold_file_idxs), 2), dtype=np.uint64)
    
    for fold_idx in range(len(fold_file_idxs)):
        
        fold_seg_root = os.path.join(settings.project_root, settings.folds_dir,
                                     'f{:02d}'.format(fold_idx + 1),
                                     'seg_adv_{:.2f}'.format(seg_adv))
        
        print('Fold {:02d}, segment advance={:.2f} s'.format(fold_idx + 1, seg_adv))

        tf.keras.backend.clear_session()

        # Load the CNN model
        model_path = os.path.join(fold_seg_root, settings.models_dir, 'baseCNN.h5')
        cnn_model = get_testing_cnn_model(model_path)

        # Output root
        output_root = os.path.join(fold_seg_root, settings.lstm_data_dir)

        # Run model on all data files and save the outputs
        for audio_file, pos_annot_file, neg_annot_file in selmap:

            # Get audio_file duration
            file_dur = sf.info(os.path.join(audio_root, audio_file)).duration

            # ---------- Song sections ----------
            print('    {:s}: '.format(pos_annot_file), end='')

            selections = load_annotations(os.path.join(seltab_root, pos_annot_file))
            section_start_ends, song_start_end_sel_idxs = \
                song_section_selections(selections, file_dur,
                                        settings.max_call_separation,
                                        settings.min_calls_in_song,
                                        time_step * seg_adv)

            print('{:d} sections'.format(section_start_ends.shape[0]), end='')
            # print(section_start_ends)

            if section_start_ends.shape[0] > 0:
                
                tracks_summary[fold_idx, 0] += section_start_ends.shape[0]
                total_segs = 0

                for sec_idx, (sec_start, sec_end) in enumerate(section_start_ends):

                    # Load song section from audio file
                    segments, segment_starts, fs = load_audio_as_segments(
                        os.path.join(audio_root, audio_file),
                        settings.segment_length, seg_adv,
                        extents=[sec_start, sec_end])
                    segments = segments2specgrams(segments, fs, settings.specgram_params, settings.bandwidth_extents)

                    # Determine 'fully contained' overlaps with annotations and
                    # score positives as 1 & negatives as 0

                    sec_seg_idxs = np.arange(song_start_end_sel_idxs[sec_idx, 0],
                                             song_start_end_sel_idxs[sec_idx, 1] + 1)
                    # print('Num sel  :', len(sec_seg_idxs), '[', sec_seg_idxs[0], '-', sec_seg_idxs[-1], ']')
                    # print('Sel :', selections[sec_seg_idxs[0], 0], selections[sec_seg_idxs[-1], 1])
                    y = np.stack([
                        np.logical_and(
                            seg_s <= selections[sec_seg_idxs, 0],
                            seg_e >= selections[sec_seg_idxs, 1])
                        for seg_s, seg_e in zip(segment_starts, segment_starts + settings.segment_length)]
                    ).any(axis=1)

                    # Run CNN model on section's segments
                    cnn_scores, cnn_fcns = cnn_predict(cnn_model, np.expand_dims(segments, axis=3))

                    # save to file
                    output_file = os.path.join(
                        output_root,
                        os.path.splitext(audio_file)[0] + settings.section_suffixes[0] + '{:02d}.npz'.format(sec_idx + 1))
                    os.makedirs(os.path.split(output_file)[0], exist_ok=True)
                    LSTMData.write(output_file,
                                   sec_start, sec_end, cnn_scores, cnn_fcns, y,
                                   selections)

                    total_segs += segments.shape[0]

                print(', {:d} segments'.format(total_segs))
                
            else:
                print()


            # ---------- Non-song sections ----------
            print('    {:s}: '.format(neg_annot_file), end='')

            selections = load_annotations(os.path.join(seltab_root, neg_annot_file))
            section_start_ends = non_song_section_selections(selections,
                                                             settings.min_non_song_duration)

            print('{:d} sections'.format(section_start_ends.shape[0]), end='')
            # print(section_start_ends)

            if section_start_ends.shape[0] > 0:
                
                tracks_summary[fold_idx, 1] += section_start_ends.shape[0]
                total_segs = 0

                for sec_idx, (sec_start, sec_end) in enumerate(section_start_ends):

                    # Load segment from audio file
                    segments, segment_starts, fs = load_audio_as_segments(
                        os.path.join(audio_root, audio_file),
                        settings.segment_length, seg_adv,
                        extents=[sec_start, sec_end])
                    segments = segments2specgrams(segments, fs, settings.specgram_params, settings.bandwidth_extents)

                    # Everything will be zero
                    y = np.zeros((segments.shape[0], ))

                    # Run CNN model on segment
                    cnn_scores, cnn_fcns = cnn_predict(cnn_model, np.expand_dims(segments, axis=3))

                    # save to file
                    output_file = os.path.join(
                        output_root,
                        os.path.splitext(audio_file)[0] + settings.section_suffixes[1] + '{:02d}.npz'.format(sec_idx + 1))
                    os.makedirs(os.path.split(output_file)[0], exist_ok=True)
                    LSTMData.write(output_file,
                                   sec_start, sec_end, cnn_scores, cnn_fcns, y, None)

                    total_segs += segments.shape[0]

                print(', {:d} segments'.format(total_segs))
                
            else:
                print()
        
        del cnn_model

        print('------------------------------------------------------------')
    print('Summary for segment advance {:.2f} s'.format(seg_adv))
    for fold_idx in range(len(fold_file_idxs)):
        print('  Fold {:02d}: {:7d} song sections, {:7d} non-song sections'.format(
            fold_idx + 1, tracks_summary[fold_idx, 0], tracks_summary[fold_idx, 1]))
    print('============================================================')
    print()