In [17]:
%matplotlib inline  

import glob
import os
import re

import matplotlib.pyplot as plt
import pandas as pd
import pretty_midi

import config

In [18]:
# total number of files
print('total number of files')
len(glob.glob(config.dir_glob_midi, recursive=True))

total number of files


1150

In [19]:
def pretty_print_dict(input_dict):
    for key, value in input_dict.items():
        print(f'{key} : {value}')

def midifile_iter():
    for filepath in glob.glob(midi_dir, recursive=True):
        yield filepath

def midifile_basename_iter():    
    for filepath in midifile_iter():
        filename = os.path.basename(filepath).replace('.mid','')
        yield filename
        
def pretty_mid_iter():
    for filepath in midifile_iter():
        pm = pretty_midi.PrettyMIDI(filepath)
        yield pm

### genre counts

In [73]:
no_genres = [
    'fill',
    'fast',
    'beat',
    'slow',
    'mediumfast',
    'soft'
]

dict_genre_count= dict()
unique_genre = set()
for filename in midifile_basename_iter():    
    genres = re.findall('[A-Za-z]+', filename)    
    
    for genre in genres:
        if genre not in no_genres:
            dict_genre_count[genre] = dict_genre_count.get(genre,0) + 1    

print(f'Genre counts')
pretty_print_dict(dict_genre_count)

print(f'Total number of genres : {len(dict_genre_count.keys())}')

Genre counts
funk : 233
rock : 362
soul : 63
groove : 40
hiphop : 95
pop : 27
neworleans : 53
secondline : 3
dance : 7
disco : 5
jazz : 101
latin : 99
brazilian : 57
baiao : 32
afrocuban : 60
reggae : 20
halftime : 37
purdieshuffle : 30
samba : 24
shuffle : 9
reggaeton : 1
breakbeat : 2
country : 29
prog : 3
bembe : 30
gospel : 19
rhumba : 19
chacarera : 3
punk : 58
march : 3
folk : 1
afrobeat : 13
bomba : 1
calypso : 2
swing : 11
rockabilly : 1
linear : 2
klezmer : 1
blues : 4
merengue : 3
sambareggae : 3
songo : 5
middleeastern : 1
bossa : 2
maracatu : 2
venezuelan : 3
dominican : 1
joropo : 1
ando : 1
ijexa : 3
sangueo : 1
frevo : 1
chacha : 1
motown : 16
fusion : 11
indie : 10
bounce : 2
highlife : 2
Total number of genres : 58


### Midifile information

In [112]:
# time signiture count
dict_TS_counts = dict()

for pm in pretty_mid_iter():
    numerator = pm.time_signature_changes[0].numerator
    denominator = pm.time_signature_changes[0].denominator
    
    time_signiture = str(numerator) + '/' + str(denominator)    
    dict_TS_counts[time_signiture] = dict_TS_counts.get(time_signiture,0)+1

pretty_print_dict(dict_TS_counts)

4/4 : 1140
5/4 : 1
3/4 : 5
5/8 : 1
6/8 : 5


In [101]:
# play time 
list_end_time = []
for pm in pretty_mid_iter():
    
    list_end_time.append(pm.get_end_time() - pm.get_onsets()[0])

Series_end_time = pd.Series(list_end_time)

plt.figure(figsize = (16,8))
plt.xlim([0, 100])
Series_end_time.hist(bins=1000)

In [6]:
import preprocess
import pprint as pp

pm = preprocess.midi_reader('./1_funk_80_beat_4-4.mid')      
inst = preprocess.get_drum_inst(pm)    

pp.pprint(inst.notes)

[Note(start=2.115625, end=2.176563, pitch=38, velocity=7),
 Note(start=2.176563, end=2.215625, pitch=38, velocity=10),
 Note(start=2.454688, end=2.556250, pitch=38, velocity=56),
 Note(start=2.614063, end=2.715625, pitch=44, velocity=65),
 Note(start=2.628125, end=2.728125, pitch=38, velocity=48),
 Note(start=2.825000, end=2.925000, pitch=38, velocity=47),
 Note(start=2.998438, end=3.098438, pitch=36, velocity=42),
 Note(start=3.007812, end=3.109375, pitch=46, velocity=48),
 Note(start=3.196875, end=3.298438, pitch=42, velocity=25),
 Note(start=3.370313, end=3.471875, pitch=36, velocity=44),
 Note(start=3.385938, end=3.487500, pitch=42, velocity=56),
 Note(start=3.576563, end=3.678125, pitch=42, velocity=21),
 Note(start=3.759375, end=3.859375, pitch=42, velocity=42),
 Note(start=3.768750, end=3.870313, pitch=38, velocity=53),
 Note(start=3.935938, end=4.037500, pitch=42, velocity=39),
 Note(start=3.959375, end=4.059375, pitch=36, velocity=43),
 Note(start=4.120312, end=4.221875, pitch