In [12]:
from random import random

from hvo_sequence.io_helpers import load_HVO_Sequence_from_file

In [13]:
grouping_types = ["fourvoices"]

In [14]:
import os
root_dir = "data/triple_streams/candombe_hvo"

# search and compile all folders that end in _{grouping_type} in the root_dir
subfolder_dict = {grouping_type: [] for grouping_type in grouping_types}

# subfolders will be formatted as "data/triple_streams/groove_hvo/***_hvo"
for grouping_type in grouping_types:
    subfolder = os.path.join(root_dir, f"*_{grouping_type}")
    subfolder_dict[grouping_type] = [f for f in os.listdir(root_dir) if f.endswith(f"_{grouping_type}")]
print(subfolder_dict)


{'fourvoices': ['UC_212_fourvoices', 'UC_231_fourvoices', 'UC_312_fourvoices', 'UC_242_fourvoices', 'UC_222_fourvoices', 'UC_221_fourvoices', 'UC_213_fourvoices', 'UC_321_fourvoices', 'UC_232_fourvoices', 'UC_311_fourvoices', 'UC_241_fourvoices', 'UC_211_fourvoices']}


In [15]:
# load all files in the dictionary above, use tqdm to show progress
# also the key in the subdictionary is file name without the _{grouping_type}.hvo

# i want the dict to look like this for eg.
# loaded_hvos = {"velocity": {"1_funk-groove1_138_beat_4-4": "filenames": [], "hvo_sequences": []},

import tqdm
loaded_hvos = {grouping_type: {} for grouping_type in grouping_types}

for grouping_type, subfolders in subfolder_dict.items():
    for subfolder in subfolders:
        # remove the _{grouping_type} from the folder name
        subfolder_name = subfolder.replace(f"_{grouping_type}", "")
        loaded_hvos[grouping_type].update({subfolder_name: {
            "filenames": [],
            "hvo_sequences": [],
            "compiled_single_hvo": None,
            "split_n_bar_phrases": None
        }})


# add filenames and hvo_sequences to the dict
from hvo_sequence.hvo_seq import HVO_Sequence

for grouping_type, subfolders in tqdm.tqdm(subfolder_dict.items()):
    for subfolder in subfolders:
        # remove the _{grouping_type} from the folder name
        subfolder_name = subfolder.replace(f"_{grouping_type}", "")
        hvo_files = [f for f in os.listdir(os.path.join(root_dir, subfolder)) if f.endswith('.hvo')]

        for hvo_file in hvo_files:
            hvo_seq_ = load_HVO_Sequence_from_file(os.path.join(root_dir, subfolder, hvo_file))
            voice_mapping = {
            'stream_0': [36],
            'stream_1': [38],
            'stream_2': [42],
            'stream_3': [46]
            }

            time_sigs = hvo_seq_.time_signatures
            tempos = hvo_seq_.tempos

            # max length of all HVO sequences

            single_compiled_hvo_seq = HVO_Sequence(
                beat_division_factors=[4],
                drum_mapping=voice_mapping
            )

            for time_sig in time_sigs:
                single_compiled_hvo_seq.add_time_signature(time_step=time_sig.time_step, numerator=time_sig.numerator, denominator=time_sig.denominator)
            for tempo in tempos:
                single_compiled_hvo_seq.add_tempo(time_step=tempo.time_step, qpm=tempo.qpm)

            metadata_ = {}
            metadata_.update(hvo_seq_.metadata)
            metadata_.update({f'stream_{ix}': 'voice_{ix}' for ix in range(4)})

            single_compiled_hvo_seq.metadata.update(metadata_)
            single_compiled_hvo_seq.adjust_length(hvo_seq_.hvo.shape[0])
            single_compiled_hvo_seq.hvo = hvo_seq_.hvo
            loaded_hvos[grouping_type][subfolder_name]["filenames"].append(hvo_file)
            loaded_hvos[grouping_type][subfolder_name]["hvo_sequences"].append(hvo_seq_)
            loaded_hvos[grouping_type][subfolder_name]["compiled_single_hvo"] = single_compiled_hvo_seq

100%|██████████| 1/1 [00:00<00:00, 69.96it/s]


In [16]:
# Dictionary structure of loaded_hvos

def get_loaded_hvos_text_description(loaded_hvos, title="Groove MIDI"):
    text = f'{title}\n\n'
    text += "Dictionary Structure:\n"
    
    for grouping_type, hvo_dict in loaded_hvos.items():
        text += f"{grouping_type}:\n"
        cnt = 0
        for subfolder_name, subfolder_data in hvo_dict.items():
            text += (f"    |-------> {subfolder_name} ({len(subfolder_data['filenames'])} files) \n")
            text += (f"    |          |-------> {subfolder_name} \n")
            text += (f"    |                     |--> 'filenames:' \n")

            for i, filename in enumerate(subfolder_data["filenames"]):
               text += (f"    |                           |-------> {filename} \n")

            text +=(f"    |                     |--> 'hvo_sequences:'\n")
            for i, hvo_seq in enumerate(subfolder_data["hvo_sequences"]):
                text +=(f"    |                           |-------> {hvo_seq.hvo.shape[0]} steps, i.e. {hvo_seq.hvo.shape[0] // 16} bars.\n")

            text += (f"    |                     |--> 'compiled_single_hvo:'\n")
            text += (f"    |                               |-------> {loaded_hvos[grouping_type][subfolder_name]['compiled_single_hvo']}\n")

            text += (f"    |                     |--> 'split_n_bar_phrases:'\n")
            text += (f"    |                               |-------> {loaded_hvos[grouping_type][subfolder_name]['split_n_bar_phrases']}\n")

            text += (f"    |-------> ...\n")
            text += ("\n")

            break
    
    return text

print(get_loaded_hvos_text_description(loaded_hvos))

Groove MIDI

Dictionary Structure:
fourvoices:
    |-------> UC_212 (1 files) 
    |          |-------> UC_212 
    |                     |--> 'filenames:' 
    |                           |-------> UC_212.hvo 
    |                     |--> 'hvo_sequences:'
    |                           |-------> 1344 steps, i.e. 84 bars.
    |                     |--> 'compiled_single_hvo:'
    |                               |-------> <hvo_sequence.hvo_seq.HVO_Sequence object at 0x147ed95e0>
    |                     |--> 'split_n_bar_phrases:'
    |                               |-------> None
    |-------> ...




In [17]:
# Split in two bar phrases, with a hop size of 16.

def split_into_n_bar_phrases(hvo_seq, n_bars=2, hop_size=16):
    """
    Split the HVO sequence into n-bar phrases with a hop size.
    Returns a list of HVO sequences.
    """
    n_steps_per_bar = hvo_seq.grid_maker.n_steps_per_beat * 4  # Assuming 4/4 time signature
    n_steps_per_phrase = n_steps_per_bar * n_bars

    phrases = []

    for start in range(0, hvo_seq.hvo.shape[0] - n_steps_per_phrase + 1, hop_size):
        end = start + n_steps_per_phrase
        phrase_hvo = hvo_seq.copy_empty()
        phrase_hvo.adjust_length(n_steps_per_phrase)
        phrase_hvo.hvo[:n_steps_per_phrase, :] = hvo_seq.hvo[start:end, :]
        phrase_hvo.metadata.update({'start_bar': start // n_steps_per_bar})
        phrases.append(phrase_hvo)

    return phrases

for grouping_type, hvo_dict in loaded_hvos.items():
    for subfolder_name, subfolder_data in tqdm.tqdm(hvo_dict.items()):
        compiled_single_hvo = subfolder_data["compiled_single_hvo"]
        split_n_bar_phrases = split_into_n_bar_phrases(compiled_single_hvo, n_bars=2, hop_size=16)
        loaded_hvos[grouping_type][subfolder_name]["split_n_bar_phrases"] = split_n_bar_phrases

# reprint

import pickle, bz2
# Save the loaded_hvos dictionary to a compressed file
os.makedirs("data/triple_streams/split_2bars", exist_ok=True)
with bz2.BZ2File("data/triple_streams/split_2bars/candombe.pkl.bz2", "wb") as f:
    pickle.dump(loaded_hvos, f)

# save the description text to a file
with open("data/triple_streams/split_2bars/candombe_description.txt", "w") as f:
    f.write(get_loaded_hvos_text_description(loaded_hvos))


print(get_loaded_hvos_text_description(loaded_hvos))


100%|██████████| 12/12 [00:00<00:00, 132.62it/s]


Groove MIDI

Dictionary Structure:
fourvoices:
    |-------> UC_212 (1 files) 
    |          |-------> UC_212 
    |                     |--> 'filenames:' 
    |                           |-------> UC_212.hvo 
    |                     |--> 'hvo_sequences:'
    |                           |-------> 1344 steps, i.e. 84 bars.
    |                     |--> 'compiled_single_hvo:'
    |                               |-------> <hvo_sequence.hvo_seq.HVO_Sequence object at 0x147ed95e0>
    |                     |--> 'split_n_bar_phrases:'
    |                               |-------> [<hvo_sequence.hvo_seq.HVO_Sequence object at 0x147f18430>, <hvo_sequence.hvo_seq.HVO_Sequence object at 0x147f18130>, <hvo_sequence.hvo_seq.HVO_Sequence object at 0x105d42520>, <hvo_sequence.hvo_seq.HVO_Sequence object at 0x147f180d0>, <hvo_sequence.hvo_seq.HVO_Sequence object at 0x147dc6670>, <hvo_sequence.hvo_seq.HVO_Sequence object at 0x147dc68e0>, <hvo_sequence.hvo_seq.HVO_Sequence object at 0x147dc6bb0>, <

In [18]:
import random

def get_random_set(loaded_hvos):
    """
    Get a random grouping type and a random file name from that grouping type.
    Returns the filenames and HVO sequences.
    """
    random_grouping_type = random.choice(list(loaded_hvos.keys()))
    random_file_name = random.choice(list(loaded_hvos[random_grouping_type].keys()))
    print(loaded_hvos[random_grouping_type][random_file_name]['compiled_single_hvo'].metadata)
    return loaded_hvos[random_grouping_type][random_file_name]

streams_set_dict = get_random_set(loaded_hvos)

from bokeh.io import output_notebook, show
output_notebook()

streams_set_dict['split_n_bar_phrases'][0].to_html_plot(
    filename="hvo_1.html",
    save_figure=False,
    show_figure=True)



{'filename': 'My first score', 'date_of_recording': 'Rock', 'source': 'Dataset Candombe with offsets', 'stream_0': 'voice_{ix}', 'stream_1': 'voice_{ix}', 'stream_2': 'voice_{ix}', 'stream_3': 'voice_{ix}'}




In [19]:
streams_set_dict['split_n_bar_phrases'][1].to_html_plot(
    filename="hvo_1.html",
    save_figure=False,
    show_figure=True)

