In [1]:
from collections import Counter
import numpy as np
import scipy.io

X = [x[0] for x in scipy.io.loadmat('sol_description.mat')["data"][0][0][-4].ravel()]
X_folders = [x.split("/")[0] for x in X]
instrmutes_counter = Counter(X_folders)

instrmute_dict = {}
for instrmute_str in instrmutes_counter.keys():
    instrmute_hyphen_split = instrmute_str.split("-")
    instrmute_where = np.where(np.array(list(map(
        lambda x: x.islower(), instrmute_hyphen_split))))[0]
    instrmute_split = [instrmute_str, "NULL"]
    if len(instrmute_where)>0:
        instrmute_split = [
            "-".join(instrmute_hyphen_split[:instrmute_where[0]]),
            "-".join(instrmute_hyphen_split[instrmute_where[0]:])]        
    instrmute_dict[instrmute_str] = instrmute_split
    
instr_dict ={instr_str: sum(map(lambda x: x.startswith(instr_str), X_folders))
 for instr_str in Counter(map(lambda x: x[0], instrmute_dict.values())).keys()}
print("Notes per instrument (#instruments={}):\n".format(len(instr_dict)) +\
      "\n".join(["  {:s} {:s}".format(str(instr_dict[k]).rjust(5), k) 
                   for k in sorted(instr_dict.keys())]) + "\n")

mutes_dict ={must_str: sum(map(lambda x: x.endswith(must_str), X_folders))
 for must_str in Counter(map(lambda x: x[1], instrmute_dict.values())).keys()}
mutes_dict["NULL"] = sum(instr_dict.values()) - sum(mutes_dict.values())
print("Notes per mute (#mutes={}):\n".format(len(mutes_dict)) +\
      "\n".join(["  {:s} {:s}".format(str(mutes_dict[k]).rjust(5), k) 
                   for k in sorted(mutes_dict.keys())]) + "\n")

print("Notes per instrument-mute pair (#pairs={}):\n".format(len(instrmutes_counter)) +\
      "\n".join(["  {:s} {:s}".format(str(instrmutes_counter[k]).rjust(5), k) 
                   for k in sorted(instrmutes_counter.keys())]) + "\n")

ipt_dict = {
    instr_str: Counter([x.split("/")[1] for x in filter(
        lambda x: x.split("/")[0].startswith(instr_str), X)])
    for instr_str in sorted(instr_dict.keys())}
n_techniques = len(sum(ipt_dict.values(), Counter()))
print("Number of playing techniques by instrument (#techniques={}):".format(n_techniques))
print("\n".join(["     " + str(len(ipt_dict[k])) + " " + k for k in ipt_dict]))

Notes per instrument (#instruments=16):
    767 Accordion
   1071 Alto-Sax
    945 Bass-Tuba
    847 Bassoon
   1324 Clarinet-Bb
   2891 Contrabass
   1194 Flute
    636 Guitar
   1318 Harp
   1165 Horn
   1034 Oboe
   1761 Tenor-Trombone
   1027 Trumpet-C
   3380 Viola
   3202 Violin
   2882 Violoncello

Notes per mute (#mutes=7):
  22560 NULL
    126 cup
    131 harmon
   1758 sordina
    391 sordina-piombo
    127 straight
    351 wah

Notes per instrument-mute pair (#pairs=33):
    767 Accordion
   1071 Alto-Sax
    945 Bass-Tuba
    847 Bassoon
   1324 Clarinet-Bb
   2540 Contrabass
    351 Contrabass-sordina
   1194 Flute
    636 Guitar
   1318 Harp
   1078 Horn
     87 Horn-sordina
    998 Oboe
     36 Oboe-sordina
   1385 Tenor-Trombone
     62 Tenor-Trombone-cup
     66 Tenor-Trombone-harmon
     62 Tenor-Trombone-straight
    186 Tenor-Trombone-wah
    668 Trumpet-C
     64 Trumpet-C-cup
     65 Trumpet-C-harmon
     65 Trumpet-C-straight
    165 Trumpet-C-wah
   2875 Viola
 

In [47]:
pitch_classes = ["C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"]
midi_strs = [x + str(n) for n in range(10) for x in pitch_classes]
pitch_dict = {}
for instr_str in sorted(instr_dict.keys()):
    X_instr = [
        x for x in X
        if x.split("/")[0]==instr_str and
           x.split("/")[1]=="ordinario"]
    instr_midi_list = [
        midi_dict[x.split("/")[2].split("-")[2]] for x in X_instr]
    pitch_dict[instr_str] = (
        min(instr_midi_list), max(instr_midi_list), max(instr_midi_list)-min(instr_midi_list))
pitch_dict

{'Accordion': (16, 97, 81),
 'Alto-Sax': (37, 69, 32),
 'Bass-Tuba': (18, 53, 35),
 'Bassoon': (22, 63, 41),
 'Clarinet-Bb': (38, 79, 41),
 'Contrabass': (16, 60, 44),
 'Flute': (47, 86, 39),
 'Guitar': (26, 71, 45),
 'Harp': (11, 91, 80),
 'Horn': (19, 65, 46),
 'Oboe': (46, 81, 35),
 'Tenor-Trombone': (10, 70, 60),
 'Trumpet-C': (42, 74, 32),
 'Viola': (36, 84, 48),
 'Violin': (43, 88, 45),
 'Violoncello': (24, 72, 48)}

In [48]:
len(X)

25444