In [15]:
from random import random

from hvo_sequence.io_helpers import load_HVO_Sequence_from_file

In [16]:
grouping_types = ["AllAvailableStreams"]

In [17]:
import os
taptam_set = "taptamdrum_hvo_output_both_is_or"
root_dir = f"data/triple_streams/{taptam_set}"

# search and compile all folders that end in _{grouping_type} in the root_dir
subfolder_dict = {grouping_type: [] for grouping_type in grouping_types}

# subfolders will be formatted as "data/triple_streams/taptam_merged_hvo/***_hvo"
for grouping_type in grouping_types:
    subfolder = os.path.join(root_dir, f"*")
    subfolder_dict[grouping_type] = [f for f in os.listdir(root_dir) if 'DS_Store' not in f]
    subfolder_dict[grouping_type] = subfolder_dict[grouping_type] #[:10]  # Limit to first 10 for testing
print(subfolder_dict)


{'AllAvailableStreams': ['203_p1_simple', '345_p1_simple', '281_p1_complex', '173_p1_simple', '109_p1_simple', '279_p1_simple', '34_p1_rep1', '90_p2_complex', '17_p2_rep0', '10_p2_rep2', '24_p1_rep1', '83_p1_complex', '7_p4_rep0', '175_p1_complex', '14_p4_rep0', '13_p4_rep2', '308_p1_complex', '56_p1_rep1', '4_p2_rep0', '3_p2_rep2', '46_p1_rep1', '142_p1_complex', '104_p2_complex', '76_p1_simple', '48_p1_rep2', '117_p1_complex', '58_p1_rep2', '120_p1_complex', '133_p2_complex', '167_p1_simple', '20_p3_rep1', '249_p1_complex', '138_p2_simple', '217_p1_simple', '285_p1_simple', '2_p3_rep2', '5_p3_rep0', '196_p1_complex', '12_p1_rep0', '15_p1_rep2', '21_p2_rep1', '146_p1_simple', '236_p1_simple', '119_p2_simple', '237_p1_complex', '222_p1_simple', '152_p1_simple', '200_p1_complex', '73_p2_complex', '323_p1_complex', '1_p1_rep0', '6_p1_rep2', '70_p1_rep0', '67_p1_rep2', '60_p1_rep0', '169_p1_complex', '22_p4_rep1', '314_p1_complex', '128_p1_simple', '255_p1_complex', '262_p1_complex', '11_

In [18]:
# load all files in the dictionary above, use tqdm to show progress
# also the key in the subdictionary is file name without the _{grouping_type}.hvo

# i want the dict to look like this for eg.
# loaded_hvos = {"velocity": {"1_funk-groove1_138_beat_4-4": "filenames": [], "hvo_sequences": []},

import tqdm
from hvo_sequence.io_helpers import load_HVO_Sequence_from_file
loaded_hvos = {grouping_type: {} for grouping_type in grouping_types}

for grouping_type, subfolders in subfolder_dict.items():
    for subfolder in subfolders:
        # remove the _{grouping_type} from the folder name
        subfolder_name = subfolder.replace(f"_{grouping_type}", "")
        loaded_hvos[grouping_type].update({subfolder_name: {
            "filenames": [],
            "hvo_sequences": [],
            "compiled_single_hvos": [],
            "split_n_bar_phrases": None
        }})

# add filenames and hvo_sequences to the dict
for grouping_type, subfolders in subfolder_dict.items():
    print(f"Loading {grouping_type}")
    for subfolder in tqdm.tqdm(subfolders):
        # remove the _{grouping_type} from the folder name
        subfolder_name = subfolder.replace(f"_{grouping_type}", "")
        hvo_files = [f for f in os.listdir(os.path.join(root_dir, subfolder)) if f.endswith('.hvo')]

        for hvo_file in hvo_files:
            hvo_seq = load_HVO_Sequence_from_file(os.path.join(root_dir, subfolder, hvo_file))
            loaded_hvos[grouping_type][subfolder_name]["filenames"].append(hvo_file)
            loaded_hvos[grouping_type][subfolder_name]["hvo_sequences"].append(hvo_seq)



Loading AllAvailableStreams


100%|██████████| 1116/1116 [00:02<00:00, 381.26it/s]


In [19]:
# Dictionary structure of loaded_hvos

def get_loaded_hvos_text_description(loaded_hvos, title="Lack MIDI"):
    text = f'{title}\n\n'
    text += "Dictionary Structure:\n"
    
    for grouping_type, hvo_dict in loaded_hvos.items():
        text += f"{grouping_type}:\n"
        cnt = 0
        for subfolder_name, subfolder_data in hvo_dict.items():
            text += (f"    |-------> {subfolder_name} ({len(subfolder_data['filenames'])} files) \n")
            text += (f"    |          |-------> {subfolder_name} \n")
            text += (f"    |                     |--> 'filenames:' \n")

            for i, filename in enumerate(subfolder_data["filenames"]):
               text += (f"    |                           |-------> {filename} \n")

            text +=(f"    |                     |--> 'hvo_sequences:'\n")
            for i, hvo_seq in enumerate(subfolder_data["hvo_sequences"]):
                text +=(f"    |                           |-------> {hvo_seq.hvo.shape[0]} steps, i.e. {hvo_seq.hvo.shape[0] // 16} bars.\n")

            text += (f"    |                     |--> 'compiled_single_hvos:'\n")
            text += (f"    |                               |-------> {loaded_hvos[grouping_type][subfolder_name]['compiled_single_hvos']}\n")

            text += (f"    |                     |--> 'split_n_bar_phrases:'\n")
            text += (f"    |                               |-------> {loaded_hvos[grouping_type][subfolder_name]['split_n_bar_phrases']}\n")

            text += (f"    |-------> ...\n")
            text += ("\n")

            cnt+= 1
            if cnt > 5:  # Limit to first 5 subfolders for readability
                break
    
    return text

print(get_loaded_hvos_text_description(loaded_hvos))

Lack MIDI

Dictionary Structure:
AllAvailableStreams:
    |-------> 203_p1_simple (4 files) 
    |          |-------> 203_p1_simple 
    |                     |--> 'filenames:' 
    |                           |-------> 203_p1_simple_both.hvo 
    |                           |-------> 203_p1_simple_flattened.hvo 
    |                           |-------> 203_p1_simple_left.hvo 
    |                           |-------> 203_p1_simple_right.hvo 
    |                     |--> 'hvo_sequences:'
    |                           |-------> 32 steps, i.e. 2 bars.
    |                           |-------> 32 steps, i.e. 2 bars.
    |                           |-------> 32 steps, i.e. 2 bars.
    |                           |-------> 32 steps, i.e. 2 bars.
    |                     |--> 'compiled_single_hvos:'
    |                               |-------> []
    |                     |--> 'split_n_bar_phrases:'
    |                               |-------> None
    |-------> ...

    |-------> 34

In [20]:
# adjust all loaded HVO sequences to the same length
for grouping_type, hvo_dict in loaded_hvos.items():
    for subfolder_name, subfolder_data in hvo_dict.items():
        # Find the maximum length of HVO sequences in this subfolder
        max_length = max(hvo_seq.hvo.shape[0] for hvo_seq in subfolder_data["hvo_sequences"])

        # Adjust each HVO sequence to the maximum length
        for i, hvo_seq in enumerate(subfolder_data["hvo_sequences"]):
            hvo_seq.adjust_length(max(max_length, 32))

# reprint
print(get_loaded_hvos_text_description(loaded_hvos))


Lack MIDI

Dictionary Structure:
AllAvailableStreams:
    |-------> 203_p1_simple (4 files) 
    |          |-------> 203_p1_simple 
    |                     |--> 'filenames:' 
    |                           |-------> 203_p1_simple_both.hvo 
    |                           |-------> 203_p1_simple_flattened.hvo 
    |                           |-------> 203_p1_simple_left.hvo 
    |                           |-------> 203_p1_simple_right.hvo 
    |                     |--> 'hvo_sequences:'
    |                           |-------> 32 steps, i.e. 2 bars.
    |                           |-------> 32 steps, i.e. 2 bars.
    |                           |-------> 32 steps, i.e. 2 bars.
    |                           |-------> 32 steps, i.e. 2 bars.
    |                     |--> 'compiled_single_hvos:'
    |                               |-------> []
    |                     |--> 'split_n_bar_phrases:'
    |                               |-------> None
    |-------> ...

    |-------> 34

In [21]:
# Get a random grouping type and a random file name from that grouping type
import random
from hvo_sequence.hvo_seq import HVO_Sequence

def compile_four_hvo_streams_into_single_hvo(seperate_hvo_seqs_, filenames_, participant_number):
    voice_mapping = {
    'stream_0': [36],
    'stream_1': [38],
    'stream_2': [42],
    'stream_3': [46]
    }

    time_sigs = seperate_hvo_seqs_[-1].time_signatures
    tempos = seperate_hvo_seqs_[-1].tempos

    # max length of all HVO sequences

    single_compiled_hvo_seq = HVO_Sequence(
        beat_division_factors=[4],
        drum_mapping=voice_mapping
    )

    for time_sig in time_sigs:
        single_compiled_hvo_seq.add_time_signature(time_step=time_sig.time_step, numerator=time_sig.numerator, denominator=time_sig.denominator)
    for tempo in tempos:
        single_compiled_hvo_seq.add_tempo(time_step=tempo.time_step, qpm=tempo.qpm)

    max_length = max(hvo_seq.hvo.shape[0] for hvo_seq in seperate_hvo_seqs_)
    single_compiled_hvo_seq.adjust_length(max_length)

    metadata_ = {}
    metadata_.update(seperate_hvo_seqs_[-1].metadata)
    metadata_.update({"participant_number": participant_number.split("_")[0]})
    metadata_.update({"attempt": participant_number.split("_attempt")[-1]})

    metadata_.update({f'stream_{ix}': filenames_[ix].split("_")[-1].replace(".hvo", "") for ix in range(len(filenames_))})
    n_streams = single_compiled_hvo_seq.hvo.shape[-1] // 3
    for ix, hvo_stream_seq_ in enumerate(seperate_hvo_seqs_):
        hvo_stream_seq_type = filenames_[ix].replace(".hvo", "")
        metadata_[f'stream_{ix}'] = hvo_stream_seq_type
        single_compiled_hvo_seq.hvo[:, ix] = hvo_stream_seq_.hvo[:, 0]
        single_compiled_hvo_seq.hvo[:, ix+n_streams] = hvo_stream_seq_.hvo[:, hvo_stream_seq_.hvo.shape[1] // 3]
        single_compiled_hvo_seq.hvo[:, ix+n_streams*2] = hvo_stream_seq_.hvo[:, 2 * hvo_stream_seq_.hvo.shape[1] // 3]

    single_compiled_hvo_seq.metadata.update(metadata_)

    return single_compiled_hvo_seq

import itertools

def get_combinations_of_four_hvo_streams(seperate_hvo_seqs_, filenames_):
    seperate_hvo_seqs_four_streams = []
    filenames_four_streams = []
    # get all combinations of four HVO sequences (without repetition)
    for combination in itertools.combinations(zip(seperate_hvo_seqs_, filenames_), 4):
        hvo_streams_combination, filenames_combination = zip(*combination)
        if len(hvo_streams_combination) == 4:
            seperate_hvo_seqs_four_streams.append(list(hvo_streams_combination))
            filenames_four_streams.append(list(filenames_combination))
    return seperate_hvo_seqs_four_streams, filenames_four_streams


In [22]:


# populate the compiled_single_hvo for each grouping type (use tqdm to show progress)
#
for grouping_type, hvo_dict in loaded_hvos.items():
    for subfolder_name, subfolder_data in tqdm.tqdm(hvo_dict.items()):
        filenames = subfolder_data["filenames"]
        seperate_hvo_seqs = subfolder_data["hvo_sequences"]
        grouped_hvos, grouped_filenames = get_combinations_of_four_hvo_streams(seperate_hvo_seqs, filenames)
        loaded_hvos[grouping_type][subfolder_name]["compiled_single_hvos"] = []
        for i in range(len(grouped_hvos)):
            seperate_hvo_seqs_ = grouped_hvos[i]
            filenames_ = grouped_filenames[i]
            # compile the HVO sequences into a single HVO sequence
            compiled_single_hvo = compile_four_hvo_streams_into_single_hvo(seperate_hvo_seqs_, filenames_, subfolder_name)
            loaded_hvos[grouping_type][subfolder_name]["compiled_single_hvos"].append(compiled_single_hvo)

# reprint



print(get_loaded_hvos_text_description(loaded_hvos))


100%|██████████| 1116/1116 [00:00<00:00, 7985.28it/s]

Lack MIDI

Dictionary Structure:
AllAvailableStreams:
    |-------> 203_p1_simple (4 files) 
    |          |-------> 203_p1_simple 
    |                     |--> 'filenames:' 
    |                           |-------> 203_p1_simple_both.hvo 
    |                           |-------> 203_p1_simple_flattened.hvo 
    |                           |-------> 203_p1_simple_left.hvo 
    |                           |-------> 203_p1_simple_right.hvo 
    |                     |--> 'hvo_sequences:'
    |                           |-------> 32 steps, i.e. 2 bars.
    |                           |-------> 32 steps, i.e. 2 bars.
    |                           |-------> 32 steps, i.e. 2 bars.
    |                           |-------> 32 steps, i.e. 2 bars.
    |                     |--> 'compiled_single_hvos:'
    |                               |-------> [<hvo_sequence.hvo_seq.HVO_Sequence object at 0x31516d070>]
    |                     |--> 'split_n_bar_phrases:'
    |                        




In [23]:
# loaded_hvos['AllAvailableStreams']['3ca0f5bf8a30195c5e2f62f10a894c1d']['compiled_single_hvos'][0].to_html_plot(
#     filename="hvo_1.html",
#     save_figure=False,
#     show_figure=True)
# loaded_hvos['AllAvailableStreams']['3ca0f5bf8a30195c5e2f62f10a894c1d']['compiled_single_hvos'][-1].to_html_plot(
#     filename="hvo_1.html",
#     save_figure=False,
#     show_figure=True)

In [24]:
# Split in two bar phrases, with a hop size of 16.

def split_into_n_bar_phrases(hvo_seq, n_bars=2, hop_size=16):
    """
    Split the HVO sequence into n-bar phrases with a hop size.
    Returns a list of HVO sequences.
    """
    n_steps_per_bar = hvo_seq.grid_maker.n_steps_per_beat * 4  # Assuming 4/4 time signature
    n_steps_per_phrase = n_steps_per_bar * n_bars

    phrases = []

    for start in range(0, hvo_seq.hvo.shape[0] - n_steps_per_phrase + 1, hop_size):
        end = start + n_steps_per_phrase
        phrase_hvo = hvo_seq.copy_empty()
        phrase_hvo.adjust_length(n_steps_per_phrase)
        phrase_hvo.hvo[:n_steps_per_phrase, :] = hvo_seq.hvo[start:end, :]
        phrase_hvo.metadata.update({'start_bar': start // n_steps_per_bar})
        phrases.append(phrase_hvo)

    return phrases

for grouping_type, hvo_dict in loaded_hvos.items():
    for subfolder_name, subfolder_data in tqdm.tqdm(hvo_dict.items()):
        loaded_hvos[grouping_type][subfolder_name]["split_n_bar_phrases"] = []
        compiled_single_hvos = subfolder_data["compiled_single_hvos"]
        for compiled_single_hvo in compiled_single_hvos:
            split_n_bar_phrases = split_into_n_bar_phrases(compiled_single_hvo, n_bars=2, hop_size=16)
            loaded_hvos[grouping_type][subfolder_name]["split_n_bar_phrases"].extend(split_n_bar_phrases)

# reprint

import pickle, bz2
# Save the loaded_hvos dictionary to a compressed file
os.makedirs("data/triple_streams/split_2bars", exist_ok=True)
with bz2.BZ2File(f"data/triple_streams/split_2bars/{taptam_set}.pkl.bz2", "wb") as f:
    pickle.dump(loaded_hvos, f)




100%|██████████| 1116/1116 [00:00<00:00, 9550.54it/s]


In [25]:
# save the description text to a file
with open(f"data/triple_streams/split_2bars/{taptam_set}_description.txt", "w") as f:
    f.write(get_loaded_hvos_text_description(loaded_hvos))


print(get_loaded_hvos_text_description(loaded_hvos))

Lack MIDI

Dictionary Structure:
AllAvailableStreams:
    |-------> 203_p1_simple (4 files) 
    |          |-------> 203_p1_simple 
    |                     |--> 'filenames:' 
    |                           |-------> 203_p1_simple_both.hvo 
    |                           |-------> 203_p1_simple_flattened.hvo 
    |                           |-------> 203_p1_simple_left.hvo 
    |                           |-------> 203_p1_simple_right.hvo 
    |                     |--> 'hvo_sequences:'
    |                           |-------> 32 steps, i.e. 2 bars.
    |                           |-------> 32 steps, i.e. 2 bars.
    |                           |-------> 32 steps, i.e. 2 bars.
    |                           |-------> 32 steps, i.e. 2 bars.
    |                     |--> 'compiled_single_hvos:'
    |                               |-------> [<hvo_sequence.hvo_seq.HVO_Sequence object at 0x31516d070>]
    |                     |--> 'split_n_bar_phrases:'
    |                        

In [26]:
import pickle, bz2
import os
from hvo_sequence.hvo_seq import HVO_Sequence
# Load the loaded_hvos dictionary from a compressed file
def load_loaded_hvos_from_file(file_path):
    with bz2.BZ2File(file_path, "rb") as f:
        loaded_hvos = pickle.load(f)
    return loaded_hvos

loaded_hvos = load_loaded_hvos_from_file(f"data/triple_streams/split_2bars/{taptam_set}.pkl.bz2")
print(get_loaded_hvos_text_description(loaded_hvos))


Lack MIDI

Dictionary Structure:
AllAvailableStreams:
    |-------> 203_p1_simple (4 files) 
    |          |-------> 203_p1_simple 
    |                     |--> 'filenames:' 
    |                           |-------> 203_p1_simple_both.hvo 
    |                           |-------> 203_p1_simple_flattened.hvo 
    |                           |-------> 203_p1_simple_left.hvo 
    |                           |-------> 203_p1_simple_right.hvo 
    |                     |--> 'hvo_sequences:'
    |                           |-------> 32 steps, i.e. 2 bars.
    |                           |-------> 32 steps, i.e. 2 bars.
    |                           |-------> 32 steps, i.e. 2 bars.
    |                           |-------> 32 steps, i.e. 2 bars.
    |                     |--> 'compiled_single_hvos:'
    |                               |-------> [<hvo_sequence.hvo_seq.HVO_Sequence object at 0x31653fb20>]
    |                     |--> 'split_n_bar_phrases:'
    |                        

In [29]:
def get_random_set(loaded_hvos):
    """
    Get a random grouping type and a random file name from that grouping type.
    Returns the filenames and HVO sequences.
    """
    random_grouping_type = random.choice(list(loaded_hvos.keys()))
    random_file_name = random.choice(list(loaded_hvos[random_grouping_type].keys()))
    print(loaded_hvos[random_grouping_type][random_file_name]['compiled_single_hvos'][-1].metadata)
    return loaded_hvos[random_grouping_type][random_file_name]

streams_set_dict = get_random_set(loaded_hvos)

from bokeh.io import output_notebook, show

streams_set_dict['split_n_bar_phrases'][0].to_html_plot(
    filename="hvo_1.html",
    save_figure=False,
    show_figure=True)



{'Style': 'rock-halftime', 'Tempo': np.float64(140.0), 'TestNumber': '004', 'Repetition': 'Repetition 0', 'participant_number': '4', 'attempt': '4_p2_rep0', 'stream_0': '4_p2_rep0_flattened', 'stream_1': '4_p2_rep0_right', 'stream_2': '4_p2_rep0_left', 'stream_3': '4_p2_rep0_both'}




In [28]:
streams_set_dict['split_n_bar_phrases'][0].to_html_plot(
    filename="hvo_1.html",
    save_figure=False,
    show_figure=True)

