In [8]:
import collections
import os
import fnmatch
import pickle
import plotly.graph_objs as go
from collections import Counter
import torch
from matplotlib import pyplot as plt
from torch.utils.data import Dataset
from tqdm import tqdm
from utils.midi_processing.mid2numpy import read_midi, midi2numpy, get_info

In [11]:
def get_filenames_and_tags(dataset_dir='datasets/Groove_Monkee_Mega_Pack_GM', filter_common_tags=True
                           ):
    # Dictionary to store file paths and tags
    file_tag_map = {}
    filter_list = ["..", "datasets", "Groove_Monkee_Mega_Pack_GM", "GM", "Bonus"]
    # Walk through directory
    for dir_name, subdir_list, file_list in os.walk(dataset_dir):
        for fname in fnmatch.filter(file_list, '*.mid'):
            # Extract tags from parent folder names and the file name
            tags = dir_name.split(os.sep)[1:] + fname.split('.')[0].split('_')
            tags = [tag for tag_part in tags for tag in tag_part.split(' ')]

            # Store in dictionary with file path as the key
            file_path = os.path.join(dir_name, fname)
            tags = list(filter(lambda x: x not in filter_list, tags))
            file_tag_map[file_path] = tags

    return file_tag_map


In [12]:
file_name_and_tags = get_filenames_and_tags()
dataset = {}  # Dictionary to store NumPy arrays along with their tags
time_signatures = []  # List to store the first dimensions for the histogram
unique_time_sig = set()  # Set to store unique second dimensions

for midi_path, midi_tags in tqdm(file_name_and_tags.items()):
    try:
        drum_track_info = get_info(read_midi(midi_path))
        time_sig = (drum_track_info['time_sig_num'], drum_track_info['time_sig_denom'])
        time_signatures.append(time_sig)
        if time_sig not in unique_time_sig:
            print(f"{time_sig} -> {midi_path} \n")
        unique_time_sig.add(time_sig)
    except Exception as e:
        print(f"Error processing file {midi_path}: {str(e)}")
        continue

# Plot a histogram of the first dimensions of the NumPy arrays
time_sig_counter = collections.Counter(time_signatures)
print("Counts of each unique time signature:")
for _time_sig, _count in time_sig_counter.items():
    print(f"Time Signature: {_time_sig}, Count: {_count}")

# Splitting the time signatures and their counts into separate lists for plotting
labels = [f'{num}/{denom}' for num, denom in time_sig_counter.keys()]
values = list(time_sig_counter.values())

# Creating the pie chart
trace = go.Pie(labels=labels, values=values, hoverinfo='label+percent', textinfo='value')

# Displaying the plot
go.Figure(data=[trace]).show()

  2%|█▉                                                                                       | 698/31075 [00:00<00:04, 6972.87it/s]

(4, 4) -> datasets/Groove_Monkee_Mega_Pack_GM/Twisted GM/Bonus/088 Hip Hop Straight/088 Hip Hop Straight 1 Fill 1.mid 



 13%|███████████▎                                                                            | 4007/31075 [00:01<00:09, 2816.76it/s]

(7, 4) -> datasets/Groove_Monkee_Mega_Pack_GM/Fusion GM/Preview Files/Latin/108 7-4 Samba 01.mid 

(3, 4) -> datasets/Groove_Monkee_Mega_Pack_GM/Fusion GM/Preview Files/Latin/108 3-4 Samba 01.mid 

(5, 4) -> datasets/Groove_Monkee_Mega_Pack_GM/Fusion GM/Preview Files/Latin/108 5-4 Samba 01.mid 

(7, 8) -> datasets/Groove_Monkee_Mega_Pack_GM/Fusion GM/Preview Files/Latin/112 7-8 Songo 01.mid 

(6, 8) -> datasets/Groove_Monkee_Mega_Pack_GM/Fusion GM/Preview Files/Fills/6-8 Fills/200 6-8 Fills.mid 



 26%|███████████████████████▎                                                                | 8228/31075 [00:02<00:04, 5295.12it/s]

(12, 8) -> datasets/Groove_Monkee_Mega_Pack_GM/Variety Pack GM/Blues/070 12-8 Slow Blues Ride.mid 



 32%|████████████████████████████                                                            | 9922/31075 [00:02<00:04, 5132.98it/s]

(2, 4) -> datasets/Groove_Monkee_Mega_Pack_GM/World Beats GM/Bonus/4-4 Bonus/109 Cut Samba 2.mid 

(6, 4) -> datasets/Groove_Monkee_Mega_Pack_GM/World Beats GM/6-4 Grooves/150 Samba 01 Fill 2 (6-4).mid 

(8, 8) -> datasets/Groove_Monkee_Mega_Pack_GM/World Beats GM/Percussion/Conga/4-4 Conga/120 Guaracha 01a.mid 



 35%|███████████████████████████████▏                                                        | 10997/31075 [00:06<00:29, 687.46it/s]

(5, 8) -> datasets/Groove_Monkee_Mega_Pack_GM/Progressive GM/5-8 Grooves/160 5-8 02 F4.mid 

(9, 8) -> datasets/Groove_Monkee_Mega_Pack_GM/Progressive GM/Preview Files/9-8 082 Fills.mid 



100%|███████████████████████████████████████████████████████████████████████████████████████| 31075/31075 [00:11<00:00, 2655.05it/s]

Counts of each unique time signature:
Time Signature: (4, 4), Count: 24973
Time Signature: (7, 4), Count: 242
Time Signature: (3, 4), Count: 1938
Time Signature: (5, 4), Count: 254
Time Signature: (7, 8), Count: 215
Time Signature: (6, 8), Count: 1611
Time Signature: (12, 8), Count: 946
Time Signature: (2, 4), Count: 143
Time Signature: (6, 4), Count: 279
Time Signature: (8, 8), Count: 3
Time Signature: (5, 8), Count: 211
Time Signature: (9, 8), Count: 260



