In [1]:
import os
import pickle, bz2

import tqdm
from bokeh.io import output_notebook, show


In [2]:
# loader
def load_dataset(file_path):
    with bz2.BZ2File(file_path, 'rb') as f:
        dataset = pickle.load(f)
    return dataset

def print_dataset_structure(dataset_dict, name=None):
    first_level_keys = set()
    second_level_keys = set()
    third_level_keys = set()



    for key, value in dataset_dict.items():
        first_level_keys.add(key)
        if isinstance(value, dict):
            for sub_key in value.keys():
                second_level_keys.add(sub_key)
                if isinstance(value[sub_key], dict):
                    for sub_sub_key in value[sub_key].keys():
                        third_level_keys.add(sub_sub_key)

    if name is not None:
        print(f"Dataset Structure for Dataset: {name}")
    else:
        print("Dataset Structure:")


    print("----"*10)
    print("First Level Keys:")
    print(first_level_keys)
    print("----"*10)
    print("Second Level Keys:")
    print(list(second_level_keys)[:10])
    print("----"*10)
    print("Third Level Keys:")
    print(third_level_keys)
    print("----"*10)
    print("\n")

def print_all_datasets_structure(dataset_dir):
    dataset_pkls = [
        os.path.join(dataset_dir, f)
        for f in os.listdir(dataset_dir)
        if f.endswith('.pkl.bz2')
    ]
    for dataset_pkl in dataset_pkls:
        #dataset_path = os.path.join(dataset_dir, dataset_pkl)
        dataset_dict = load_dataset(dataset_pkl)
        print_dataset_structure(dataset_dict, name=dataset_pkl)
        del dataset_dict

# get all split_n_bar_phrases from last key
def get_split_n_bar_phrases(dataset_path_list):
    if not isinstance(dataset_path_list, list):
        dataset_path_list = [dataset_path_list]

    split_n_bar_phrases = list()

    for dataset_pkl_ in dataset_path_list:
        dataset_dict_ = load_dataset(os.path.join(data_dir, dataset_pkl_))
        for key, value in dataset_dict_.items():
            for sub_key, sub_value in value.items():
                hvo_splits = sub_value['split_n_bar_phrases']
                for hvo_split in hvo_splits:
                    hvo_split.metadata.update({'collection': dataset_pkl_.replace('.pkl.bz2', ''), 'sample_id': sub_key})

                split_n_bar_phrases.extend(sub_value['split_n_bar_phrases'])

    return split_n_bar_phrases

import random

def get_randome_phrases(split_n_bar_phrases, n=1):
    # returns hvos and their indexes in the split_n_bar_phrases list
    if not isinstance(split_n_bar_phrases, list):
        split_n_bar_phrases = [split_n_bar_phrases]
    sample_ix = random.sample(range(len(split_n_bar_phrases)), n)
    sample_hvo_phrases = [split_n_bar_phrases[ix] for ix in sample_ix]

    return sample_hvo_phrases, sample_ix




from bokeh.models import Tabs, TabPanel
import warnings
import logging
from bokeh.util.warnings import BokehUserWarning, BokehDeprecationWarning

def setup_quiet_bokeh():
    """Set up comprehensive bokeh warning suppression"""
    # Suppress warnings
    warnings.filterwarnings("ignore", category=BokehUserWarning)
    warnings.filterwarnings("ignore", category=BokehDeprecationWarning)
    warnings.filterwarnings("ignore", message=".*bokeh.*")

    # Suppress logging
    logging.getLogger('bokeh').setLevel(logging.ERROR)

    # Suppress other common warnings that might appear with bokeh
    warnings.filterwarnings("ignore", category=FutureWarning)
    warnings.filterwarnings("ignore", category=UserWarning)

def create_multitab_from_figures(hvos,  tab_titles=None, show_tabs= True):
    """
    Create a multitab visualizer from already-generated bokeh figures.

    Parameters:
    -----------
    figures : List
        List of bokeh figure objects
    tab_titles : List[str]
        List of titles for each tab
    show_tabs : bool, default=True
        Whether to immediately show the tabs

    Returns:
    --------
    Tabs
        Bokeh Tabs object containing all the figures
    """
    setup_quiet_bokeh()
    output_notebook()

    figures = []

    generate_titles = False
    if tab_titles is None:
        tab_titles = []
        generate_titles = True

    if not isinstance(hvos, list):
        hvos = [hvos]
    for ix, hvo in enumerate(hvos):
        figure = hvo.to_html_plot(
            filename='',
            save_figure=False,
            show_figure=False)
        figures.append(figure)
        if generate_titles:
            tab_titles.append(f"Tab {ix+1}")

    if not isinstance(tab_titles, list):
        tab_titles = [tab_titles]
    if not tab_titles:
        tab_titles = [f"Tab {i+1}" for i in range(len(figures))]
    if len(figures) == 0:
        raise ValueError("No figures provided to create tabs")

    if len(figures) != len(tab_titles):
        raise ValueError("Number of figures must match number of tab titles")

    panels = []

    for figure, title in zip(figures, tab_titles):
        panel = TabPanel(child=figure, title=title)
        panels.append(panel)

    tabs = Tabs(tabs=panels)

    if show_tabs:
        show(tabs)

    return tabs


In [None]:
# 1. Load the dataset
# 2. For track in first level
#       For n_bar_split in second
#           Get best four

In [116]:
print_all_datasets_structure('lmd_curated/filtered')

Dataset Structure for Dataset: lmd_curated/filtered/lmd_batch_56_filtered.pkl.bz2
----------------------------------------
First Level Keys:
{'AllAvailableStreams'}
----------------------------------------
Second Level Keys:
['3aac1c9c6677d61e42413022bb45a387', '9e37e5c264a24135e19b056b7ac8202a', '313b6893e29a54a5872df5c92199eadf', '6e88e2664704745ce96103e0d4b5ac5f', 'fc383fdba087f2bff11808545b0554d1', 'd27a83e6f6acc3d442d637e1966d8f7e', '5c7500f537a6dca04e0260bfbda17d5a', '016c34827cd2c63f39e4622707fff0f6', 'fa7ba52db0589188f5bf2b9405299d19', 'bd20d8fcc9f3865d587ab38c1e1770a5']
----------------------------------------
Third Level Keys:
{'compiled_single_hvos', 'filenames', 'split_n_bar_phrases', 'hvo_sequences'}
----------------------------------------


Dataset Structure for Dataset: lmd_curated/filtered/lmd_batch_33_filtered.pkl.bz2
----------------------------------------
First Level Keys:
{'AllAvailableStreams'}
----------------------------------------
Second Level Keys:
['18701b5

In [117]:
ds1 = load_dataset('data/triple_streams/split_2bars/lmd/lmd_batch_01.pkl.bz2')
ds2 = load_dataset('data/triple_streams/split_2bars/lmd/lmd_batch_01.pkl.bz2')
ds3 = load_dataset('data/triple_streams/split_2bars/lmd/lmd_batch_03.pkl.bz2')

In [4]:
ds_comp = load_dataset('data/triple_streams/split_2bars/mix/lmd_batch_02_filtered.pkl.bz2')


In [5]:
print_dataset_structure(ds_comp)

Dataset Structure:
----------------------------------------
First Level Keys:
{'AllAvailableStreams'}
----------------------------------------
Second Level Keys:
['3caf66d265ed8476bf2d15fbf75a99d5', '76232774460462b1a491b267928746f7', 'ec9accecf61155c316a3c47d7a825090', '5c9c09e492b65c1d67f0418716a043d7', '5296f79c9b2adeddad96c276e67f2fab', 'cac928262e91bfe0cd7219525736a2e4', '88fa4921b95e1b2be457950c512829b5', '1239fcb5f7a81a36b36b3c4752ed3ea5', '74140eddf69501fc1ba0d389cd72009e', '73d01f01a05f53a1f059c9805079b314']
----------------------------------------
Third Level Keys:
{'hvo_sequences', 'filenames', 'split_n_bar_phrases', 'compiled_single_hvos'}
----------------------------------------




<hvo_sequence.hvo_seq.HVO_Sequence object at 0x1045a8af0>
<hvo_sequence.hvo_seq.HVO_Sequence object at 0x15be8f3d0>
<hvo_sequence.hvo_seq.HVO_Sequence object at 0x15be8f850>
<hvo_sequence.hvo_seq.HVO_Sequence object at 0x15be8fb50>
<hvo_sequence.hvo_seq.HVO_Sequence object at 0x15be8fe50>
<hvo_sequence.hvo_seq.HVO_Sequence object at 0x15becf130>
<hvo_sequence.hvo_seq.HVO_Sequence object at 0x15becf5b0>
<hvo_sequence.hvo_seq.HVO_Sequence object at 0x15becf910>
af568aaa79722d52313ec095fa6d4d35_piano.hvo
af568aaa79722d52313ec095fa6d4d35_guitar.hvo
af568aaa79722d52313ec095fa6d4d35_percussive.hvo
af568aaa79722d52313ec095fa6d4d35_bass.hvo
af568aaa79722d52313ec095fa6d4d35_drum.hvo
<hvo_sequence.hvo_seq.HVO_Sequence object at 0x15becfd90>
<hvo_sequence.hvo_seq.HVO_Sequence object at 0x15bed7070>
<hvo_sequence.hvo_seq.HVO_Sequence object at 0x15bed7370>
<hvo_sequence.hvo_seq.HVO_Sequence object at 0x15bed7670>
<hvo_sequence.hvo_seq.HVO_Sequence object at 0x15bed7af0>
<hvo_sequence.hvo_seq.HVO_S

In [12]:
for hvo in ds_comp['AllAvailableStreams']['3caf66d265ed8476bf2d15fbf75a99d5']['split_n_bar_phrases']:
    print(hvo.velocities)

[[0.         0.         0.         0.        ]
 [0.         0.         0.         0.        ]
 [0.         0.         0.         0.        ]
 [0.         0.         0.         0.        ]
 [0.         0.         0.         0.        ]
 [0.         0.         0.         0.        ]
 [0.         0.         0.         0.        ]
 [0.         0.         0.         0.        ]
 [0.         0.         0.         0.        ]
 [0.         0.         0.         0.        ]
 [0.         0.         0.         0.        ]
 [0.         0.         0.         0.        ]
 [0.         0.         0.         0.        ]
 [0.         0.         0.         0.        ]
 [0.         0.         0.         0.        ]
 [0.         0.         0.         0.        ]
 [0.         0.         0.         0.        ]
 [0.         0.         0.         0.        ]
 [0.         0.         0.         0.        ]
 [0.         0.         0.         0.        ]
 [0.         0.         0.80314961 0.        ]
 [0.         

In [110]:
for key, value in ds_comp['AllAvailableStreams'].items():
    print(key)

f944ee53f978826a99821c5b85f2f12d
922e94b551a25419ffca9c406155a4d2
b0ba3340622c50edaefda1b31cedc269
f17697fc8ae78e53d5e4ddfd336dc2b5
8b6f5245ac42ba3893575d4a79b813b9
f3717b39a1d3deebf68f45537cff992b
35b6ae5b7276b3b21845a608b6c496e7
c6aa5100c431801aa8c201dbc3461baa
2473e18eec6cc55b82c5dddab3bea353
efd337847c64c1adf51c94972cb21085
9c559c1b8c9656e97919fc28f04849d3
be42a7b31e850e135265484a729ceebd
5d94332a3d598667cc05852c751dd0d6
fc37976a88bb5f04d2b9bde4eb86a641
3d93f1b9a0b816c43b7d19b1e915c9fc
58723374401a478df5c5752f95f366d4
3e4b7ef7d95b81006b7a0c46303e6933
302dfe3cc9995aa0a7394c70ede3fef7
01c4be2d928a6078b7f4ab8f6ebc1504
c6cd5ea2281f59e0955eb3eef23c165d
0717e632ef8df73b2d8e55095c5c94cb
1bb43de3d0bb68e0e360519ca37e0fa4
0336dec7122bb359c87871836d3d9d87
e396f50db1aa8b6a2eb1380813a21b5e
677f66b7235fe05c49c35a9831ad63c3
bd7ed9f570f39ca05f1bf551462fea7f
6ef8f45b792130986d43162106d83582
a216b447f754d54eb426ba171d2d03b9
f8f8765f6e92a33fa555a3ac73127f64
5e4d42d54becde45f5d892376a8cb17c
afc4eb534d

In [107]:
print_dataset_structure(ds_comp)

Dataset Structure:
----------------------------------------
First Level Keys:
{'AllAvailableStreams'}
----------------------------------------
Second Level Keys:
['7d35d5306f33d662b9f3a3000f0c1d77', 'f7ba74c3046a016f12c45159c0d231e3', '908e8300a00e0bb241686e2b5a35707c', 'ce9540fabb2fa50dbb8442c20c3ab458', '10ed619ada3ca9b660d1123934369748', '56efac7a91db3f73e7d17a2fce7d7d57', 'a54ed88892fd114b9b79b3d228e9cfdc', 'cfb7d7066154db3c6d6fe6548ecfc4e8', '57c6a83aab7c3004f7527fbc4e928714', 'ba048696cc56f509afc9fcb226582a4d']
----------------------------------------
Third Level Keys:
{'compiled_single_hvos', 'filenames', 'split_n_bar_phrases', 'hvo_sequences'}
----------------------------------------




In [None]:

for track in ds['AllAvailableStreams']:
    for name in ds['AllAvailableStreams'][track]:
        print(track)
        print(ds['AllAvailableStreams'][track]['filenames'])
    #for name in track['filenames']:
        #print(name)

In [95]:
split = ds_comp['AllAvailableStreams']['7d35d5306f33d662b9f3a3000f0c1d77']['split_n_bar_phrases']
split_other = ds['AllAvailableStreams']['3ca0f5bf8a30195c5e2f62f10a894c1d']['split_n_bar_phrases']



In [96]:
print(split)

[<hvo_sequence.hvo_seq.HVO_Sequence object at 0x319bebc10>, <hvo_sequence.hvo_seq.HVO_Sequence object at 0x319bf1370>, <hvo_sequence.hvo_seq.HVO_Sequence object at 0x319bf1910>, <hvo_sequence.hvo_seq.HVO_Sequence object at 0x319bf1eb0>]


In [97]:
print(split[0].hits)

[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]
