# WORLD MIDIs Explorer (ver. 1.0)

***

## World MIDIs explorer and statistical analyzer

***

### Project Los Angeles
### Tegridy Code 2021

***

 # Setup environment

In [None]:
#@title Install dependencies
!git clone https://github.com/asigalov61/tegridy-tools
!pip install pretty_midi
!pip install librosa
!pip install mir_eval
!pip install matplotlib

!apt install fluidsynth #Pip does not work for some reason. Only apt works
!pip install midi2audio

In [None]:
#@title Import needed modules

print('Loading modules...Please wait...')

import os
import sys
import copy
import secrets
import pickle
from pprint import pprint

os.chdir('/content/tegridy-tools/tegridy-tools/')
import TMIDI

import numpy as np

import pretty_midi
import librosa
import librosa.display
import mir_eval
import mir_eval.display
import IPython.display

import matplotlib.pyplot as plt
%matplotlib inline

import tables
import json

import joblib
import glob

import seaborn as sns
sns.set_style('white')
sns.set_context('notebook', font_scale=1.5)
import matplotlib.gridspec
import collections

import tqdm.auto

from midi2audio import FluidSynth
from IPython.display import display, Javascript, HTML, Audio

from google.colab import files

if not os.path.exists('/content/Dataset'):
    print('Creating dataset dir...')
    os.makedirs('/content/Dataset')

print('Loading complete.')    

# Create or load the dataset

In [None]:
#@title Create a dataset from your MIDIs
full_path_to_dataset_dir = "/content/Dataset" #@param {type:"string"}
dataset_name = "WORLD MIDIs Dataset" #@param {type:"string"}
file_name_to_output_dataset_to = "/content/WORLD_MIDIs_Dataset" #@param {type:"string"}

os.chdir(full_path_to_dataset_dir)

print('WORLD MIDIs Explorer MIDI Processor')

###########

average_note_pitch = 0
min_note = 127
max_note = 0

files_count = 0

ev = 0
notes = 0

tracks = 0

patch = 128

piano_notes = 0
piano_matrix = []

violin_notes = 0
violin_matrix = []

flute_notes = 0
flute_matrix = []

copyright_events = 0
copyright_matrix = []

song_names = []

rec_song = False
first_note = True

print('Creating a list of MIDI files...')
filez = os.listdir(full_path_to_dataset_dir)

print('Starting processing...')
for f in tqdm.auto.tqdm(filez):
    try:
      files_count += 1

      midi_file = open(f, 'rb')
    
      opus = TMIDI.midi2opus(midi_file.read())
      
    except:
      print('Problematic file. Skipping...')
      print('Problematic file name:', f)
      midi_file.close()
      continue
    
    midi_file.close()
    
    fn = os.path.basename(f) 
    
    score1 = TMIDI.to_millisecs(opus)
    score = TMIDI.opus2score(score1)

    itrack = 1   # skip 1st element which is ticks
    while itrack < len(score):

        for event in score[itrack]:
            ev += 1
            temp_event = [int(0), int(0), int(0), int(0)]           
            if event[0] == 'patch_change':
              patch = event[3]
              
            if event[0] == 'note':
              notes += 1
              
            if event[0] == 'note' and patch in range(0, 1):
              if first_note == True:
                piano_matrix.append(temp_event)
                first_note = False
              temp_event[0] = abs(int(event[1]))
              temp_event[1] = abs(int(event[2]))
              temp_event[2] = abs(int(event[4]))
              temp_event[3] = abs(int(event[5]))
              piano_matrix.append(temp_event)
              rec_song = True
              piano_notes += 1
              
            if event[0] == 'note' and patch == 40:
              if first_note == True:
                violin_matrix.append(temp_event)
                first_note = False
              temp_event[0] = abs(int(event[1]))
              temp_event[1] = abs(int(event[2]))
              temp_event[2] = abs(int(event[4]))
              temp_event[3] = abs(int(event[5]))              
              violin_matrix.append(temp_event)
              rec_song = True
              violin_notes += 1  

            if event[0] == 'note' and patch in range(71, 75):
              if first_note == True:
                flute_matrix.append(temp_event)
                first_note = False
              temp_event[0] = abs(int(event[1]))
              temp_event[1] = abs(int(event[2]))
              temp_event[2] = abs(int(event[4]))
              temp_event[3] = abs(int(event[5]))
              flute_matrix.append(temp_event)
              rec_song = True
              flute_notes += 1             
                
            if event[0] == 'copyright_text_event':
              copyright_events += 1
              copyright_matrix.append(event[2])
           
        itrack += 1
        tracks += 1
    if rec_song == True:    
      song_names.append(fn)
    rec_song = False
    first_note = True    
    if files_count % 100 == 0:
      print('===========================================================================================================')
      
      print('Notes so far:', notes)
      print('Piano notes', piano_notes)
      print('Violin notes', violin_notes)
      print('Flute notes', flute_notes)
      print('Copyright events:', copyright_events)
      print('Last registered MIDI patch:', patch)
      print('Total MIDI Events:', ev, ' / Tracks:', tracks, ' / Files:', files_count, ' / ', len(song_names))
      print('===========================================================================================================')
      
      print('Current memory consumption:')
      print('Piano:', str(sys.getsizeof(piano_matrix)), 'bytes')
      print('Violin:', str(sys.getsizeof(violin_matrix)), 'bytes')
      print('Flute:', str(sys.getsizeof(flute_matrix)), 'bytes')
      print('Copyright:', str(sys.getsizeof(copyright_matrix)), 'bytes')
      print('Total:', str(sys.getsizeof(piano_matrix) + sys.getsizeof(violin_matrix) + sys.getsizeof(flute_matrix) + sys.getsizeof(copyright_matrix)), 'bytes')
      print('===========================================================================================================')

print('MIDI files processing complete!')
print('Crunching final dataset statistics...')
print('===========================================================================================================')

print('Total notes:', notes)
print('Piano notes', piano_notes)
print('Violin notes', violin_notes)
print('Flute notes', flute_notes)
print('Copyright events:', copyright_events)
print('Last registered MIDI patch:', patch)
print('Total MIDI Events:', ev, ' / Tracks:', tracks, ' / Files:', files_count, ' / ', len(song_names))
print('===========================================================================================================')

print('Final memory footprint:')
print('Piano:', str(sys.getsizeof(piano_matrix)), 'bytes')
print('Violin:', str(sys.getsizeof(violin_matrix)), 'bytes')
print('Flute:', str(sys.getsizeof(flute_matrix)), 'bytes')
print('Copyright:', str(sys.getsizeof(copyright_matrix)), 'bytes')
print('Total:', str(sys.getsizeof(piano_matrix) + sys.getsizeof(violin_matrix) + sys.getsizeof(flute_matrix) + sys.getsizeof(copyright_matrix)), 'bytes')
print('===========================================================================================================')

print('Creating final dataset lists...')
stats = ['STATS', files_count, tracks, ev, notes, piano_notes, violin_notes, flute_notes, copyright_events]
s_names = ['SONGS FILE NAMES', song_names]
Data = [dataset_name, s_names, stats, piano_matrix, violin_matrix, flute_matrix, copyright_matrix]

print('===========================================================================================================')
TMIDI.Tegridy_Pickle_File_Writer(Data,file_name_to_output_dataset_to)

In [None]:
#@title Load existing dataset
full_path_to_the_dataset_file = "/content/WORLD_MIDIs_Dataset.pickle" #@param {type:"string"}

print('WORLD MIDIs Explorer Dataset Loader')
print('=' * 70)
print('Reading the dataset file. Please wait...')

Data = []

pkl = open(full_path_to_the_dataset_file, "rb")
Data = pickle.load(pkl)
pkl.close()

print('Restoring and loading the dataset. Please wait...')

dataset_name = Data[0]

s_names = Data[1]

stats = Data[2]

piano_matrix = Data[3]
violin_matrix = Data[4]
flute_matrix = Data[5]
copyright_matrix = Data[6]

song_names = s_names[1]

files_count = stats[1]
tracks = stats[2]
ev = stats[3]
notes = stats[4]
piano_notes = stats[5]
violin_notes = stats[6]
flute_notes = stats[7]
copyright_events = stats[8]

print('Loading complete!')
print('Displaying stats...')
print('=' * 70)
print('Dataset name:', dataset_name)
print('Total number of notes:', notes)
print('Number of piano notes', piano_notes)
print('Number of violin notes', violin_notes)
print('Number of flute notes', flute_notes)
print('Number of copyright events:', copyright_events)
print('Total number of MIDI events:', ev)
print('Total number of tracks:', tracks)
print('Total number of songs:', len(song_names))
print('=' * 70)
print('Task complete! Enjoy! :)')

# Explore the whole processed dataset

In [None]:
#@title Load Plotting and Statistics functions

def list_average(num):
  sum_num = 0
  for t in num:
    sum_num = sum_num + t           

  avg = sum_num / len(num)
  return int(avg)

FC = '#28ABE3'

def plot_hist(data, bins, xlabel=None, ylabel=None, divisor=1000):
    fig = plt.figure()
    # Make it so that all points beyond the bin range get put in the last bin
    data = np.array(data)
    data[data > bins[-1]] = bins[-1] - 1e-10
    # Get histogram bin heights
    heights, _ = np.histogram(data, bins)
    # If the largest difference in heights is more than 20x the median
    # distance in heights, use a split histogram
    height_diffs = np.abs(np.diff(heights))
    if np.median(height_diffs) * 20 < height_diffs.max():
        # Find all bin indices which are > .5*highest bin
        highest = max(heights)
        high_bin_indices = [n for n in range(len(heights))
                            if heights[n] > highest / 2.]
        split_hist(heights, bins, high_bin_indices)
    else:
        pretty_hist(heights, bins)
        divide_yticklabels(divisor=divisor)
    if xlabel:
        plt.xlabel(xlabel)
    if ylabel:
        # When a split histogram is created, ylabel draws in the middle of the
        # bottom histogram - this standardizes the location regardless
        fig.text(0.05, 0.5, ylabel, ha='center', va='center',
                 rotation='vertical')


def uniform_hist(heights, bins, ax, **kwargs):
    ax.bar(np.arange(len(bins) - 1) - .5, height=heights,
           width=1, bottom=0, **kwargs)


def pretty_hist(heights, bins, ax=None, title=None):
    """ Utility method for plotting a nice histogram """
    # If no axis was provided, get current axis
    if ax is None:
        ax = plt.gca()
    # Plot histogram, with specific coloring and axis-alignment
    uniform_hist(heights, bins, ax, fc=FC, alpha=.7)
    # Remove spines from plot
    sns.despine()
    # Add grid to y axis
    ax.yaxis.grid()
    # Set the plotting range to fit the histogram exactly
    bin_spacing = 1.
    ax.set_xlim(-bin_spacing / 2., len(bins) - 1 - bin_spacing / 2.)
    if title is not None:
        plt.suptitle(title, verticalalignment='top', y=.95, size='large')


def divide_yticklabels(ax=None, divisor=1000):
    """ Utility method to scale down all y tick labels """
    # If no axis was provided, get current axis
    if ax is None:
        ax = plt.gca()
    ax.set_yticklabels([int(float(t) / divisor)
                        if (float(t) / divisor).is_integer()
                        else float(t) / divisor
                        for t in ax.get_yticks()])


def split_hist(heights, bin_edges, high_bin_indices, divisor=1000):
    """ Plot a histogram where one or more bins have very large values """
    # Make high_bin_indices a list if an int was passed
    if isinstance(high_bin_indices, int):
        high_bin_indices = [high_bin_indices]
    # Create 2-row, 1-col subplot where the upper sublot is 1/4 the height
    # The upper subplot will be the tops of the very large bins; lower will be
    # the rest
    gs = matplotlib.gridspec.GridSpec(
        2, 1, width_ratios=[1], height_ratios=[1, 4])
    # Set the spacing between subplots to .1
    gs.update(hspace=0.1)
    # Grab axes handles
    ax = plt.subplot(gs[0])
    ax2 = plt.subplot(gs[1])
    # Plot pretty histograms both for the "upper" and "lower" parts of split
    pretty_hist(heights, bin_edges, ax)
    pretty_hist(heights, bin_edges, ax2)
    low_min = 0
    # Compute the height of the largest bin _not_ in high_bin_indices
    low_max = 1.1 * max(heights[n] for n in range(len(bin_edges) - 1)
                        if n not in high_bin_indices)
    # Compute the height of the smallest bin in high_bin_indices
    high_min = .9 * min(heights[n] for n in high_bin_indices)
    # Compute the height of the highest bin in high_bin_indices
    high_max = 1.1 * max(heights[n] for n in high_bin_indices)
    # Set the Y plotting range according to the above.  This will crop things.
    ax.set_ylim(high_min, high_max)
    ax2.set_ylim(low_min, low_max)
    # Hide the spines between ax and ax2
    ax.spines['bottom'].set_visible(False)
    ax2.spines['top'].set_visible(False)
    ax.xaxis.tick_top()
    ax.tick_params(labeltop='off')
    ax2.xaxis.tick_bottom()

    # Compute the spacing between y-ticks on the lower plot
    lowtick_spacing = np.diff(ax2.get_yticks())[0]
    # Create a single tick on the upper plot, rounded to the same spacing as
    # lower plot
    ax.set_yticks([int(lowtick_spacing) *
                   int((high_min + high_max) /
                   (2 * lowtick_spacing))])
    # X-axis start of clip lines (relative to [0, 1])
    start = -.015
    # Compute proportion of x-axis covered by last high_bin_indices (+ .015)
    end = (high_bin_indices[-1] + 1) / float(len(bin_edges) - 1) + .015
    # Plot the lines, allowing for it to expand outside of the axis
    ax.plot([start, end], [0., 0.],
            transform=ax.transAxes, color='k', clip_on=False)
    ax2.plot([start, end], [1., 1.],
             transform=ax2.transAxes, color='k', clip_on=False)

    # Convert count to thousands
    divide_yticklabels(ax, divisor)
    divide_yticklabels(ax2, divisor)


def compute_statistics(midi_file):
    """
    Given a path to a MIDI file, compute a dictionary of statistics about it
    
    Parameters
    ----------
    midi_file : str
        Path to a MIDI file.
    
    Returns
    -------
    statistics : dict
        Dictionary reporting the values for different events in the file.
    """
    # Some MIDI files will raise Exceptions on loading, if they are invalid.
    # We just skip those.
    try:
        pm = pretty_midi.PrettyMIDI(midi_file)
        # Extract informative events from the MIDI file
        return {'n_instruments': len(pm.instruments),
                'program_numbers': [i.program for i in pm.instruments if not i.is_drum],
                'key_numbers': [k.key_number for k in pm.key_signature_changes],
                'tempos': list(pm.get_tempo_changes()[1]),
                'time_signature_changes': pm.time_signature_changes,
                'end_time': pm.get_end_time(),
                'lyrics': [l.text for l in pm.lyrics]}
    # Silently ignore exceptions for a clean presentation (sorry Python!)
    except Exception as e:
        pass            

In [None]:
#@title Basic detailed stats for selected instrument
select_instrument = "Violin" #@param ["Piano", "Violin", "Flute"]

###########    

start_times = []
durations = []
pitches = []
velocities = []

ms_time_counter = 0

avg_st = []
avg_dur = []
avg_pts = []
avg_vel = []

previous_start_time = 0

print(dataset_name, 'statistics')
print('=' * 70)
print('Computing data. Please stand-by...')
print('=' * 70)
###########

if select_instrument == 'Piano':
  for note in piano_matrix:
    start_times.append(note[0])
    durations.append(note[1])
    pitches.append(note[2])
    velocities.append(note[3])
    if previous_start_time < note[0]:
      ms_time_counter += abs(note[0] - previous_start_time)
    previous_start_time = note[0]

if select_instrument == 'Violin':
  for note in violin_matrix:
    start_times.append(note[0])
    durations.append(note[1])
    pitches.append(note[2])
    velocities.append(note[3])
    if previous_start_time < note[0]:
      ms_time_counter += abs(note[0] - previous_start_time)
    previous_start_time = note[0]

if select_instrument == 'Flute':
  for note in flute_matrix:
    start_times.append(note[0])
    durations.append(note[1])
    pitches.append(note[2])
    velocities.append(note[3])
    if previous_start_time < note[0]:
     ms_time_counter += abs(note[0] - previous_start_time)
    previous_start_time = note[0]

avg_st = list_average(start_times)
avg_dur = list_average(durations)
avg_pts = list_average(pitches)
avg_vel = list_average(velocities)

unique_pitches_dictionary = {}

for pitch in pitches: 
    if pitch in unique_pitches_dictionary: 
        unique_pitches_dictionary[pitch] += 1
    else: 
        unique_pitches_dictionary[pitch] = 1

for _, ocu in unique_pitches_dictionary.items():
    if max(unique_pitches_dictionary.values()) == ocu:
      max_ocu = _  

print('Showing statistics for', select_instrument)
print('=' * 70)

print('Average recorded note start time:', avg_st / 1000, 'seconds from the begining')
print('Min. note start-time recorded is', int(min(start_times)) / 1000, 'seconds')
print('Max. note start-time recorded is', int(max(start_times)) / 1000, 'seconds')
print('=' * 70)

print('Average recorded note duration:', avg_dur, 'miliseconds or', avg_dur / 1000, 'seconds')
print('Min. note duraiton recorded is', int(min(durations)) / 1000, 'seconds')
print('Max. note duration recorded is', int(max(durations)) / 1000, 'seconds')
print('=' * 70)

print('Most frequently occuring pitch is', max_ocu, '= octave C', int((max_ocu / 12) - 1), 'note #', int(max_ocu / 12))
print('Average recorded note pitch:', avg_pts, '= octave C', int((avg_pts / 12) - 1), 'note #', int(avg_pts / 12))
print('Min. note pitch recorded is', int(min(pitches)))
print('Max. note pitch recorded is', int(max(pitches)))
print('=' * 70)

print('Average recorded note velocity:', avg_vel, 'which is', int((1 - (127 - avg_vel) / 100) * 100), 'percent of max. volume')
print('Min. note velocity recorded is', int(min(velocities)))
print('Max. note velocity recorded is', int(max(velocities)))
print('=' * 70)

print('Approx. total dataset play-time count:')
print(int(ms_time_counter / 1000), 'seconds /', 
      int(ms_time_counter / 1000 / 60), 'minutes /', 
      int(ms_time_counter / 1000 / 60 / 60), 'hours /',
      int(ms_time_counter / 1000 / 60 / 60 / 24), 'days /',
      int(ms_time_counter / 1000 / 60 / 60 / 24 / 7), 'weeks /',
      int(ms_time_counter / 1000 / 60 / 60 / 24 / 30), 'months')

ms_time_counter = sum(durations)

print('=' * 70)
print('Approx. total sum of durations of all notes in the dataset:')
print(int(ms_time_counter / 1000), 'seconds /', 
      int(ms_time_counter / 1000 / 60), 'minutes /', 
      int(ms_time_counter / 1000 / 60 / 60), 'hours /',
      int(ms_time_counter / 1000 / 60 / 60 / 24), 'days /',
      int(ms_time_counter / 1000 / 60 / 60 / 24 / 7), 'weeks /',
      int(ms_time_counter / 1000 / 60 / 60 / 24 / 30), 'months')
print('=' * 70)
plt.scatter(unique_pitches_dictionary.keys(), unique_pitches_dictionary.values())
plt.xlabel('Pitch')
plt.ylabel('Number of occurences')
plt.plot

In [None]:
#@title Compute statistics for the entire dataset
#@markdown This may take as long as the main processing above, so please keep it in mind before running this cell/code.

statistics = joblib.Parallel(n_jobs=-1, verbose=1)(
    joblib.delayed(compute_statistics)(midi_file)
    for midi_file in glob.glob(os.path.join(full_path_to_dataset_dir, '*.mid')))

# When an error occurred, None will be returned; filter those out.
statistics = [s for s in statistics if s is not None]

In [None]:
#@title Display computed statistics

# Get strings for all time signatures
time_signatures = ['{}/{}'.format(c.numerator, c.denominator)
                   for s in statistics for c in s['time_signature_changes']]
# Only display the n_top top time signatures
n_top = 5
# Get the n_top top time signatures
top = collections.Counter(time_signatures).most_common(n_top)
# Create a dict mapping an integer index to the time signature string
top_signatures = {n: s[0] for n, s in enumerate(top)}
# Add an additional index for non-top signatures
top_signatures[n_top] = 'Other'
# Compute the number of non-top time signatures
n_other = len(time_signatures) - sum(s[1] for s in top)
# Create a list with each index repeated the number of times
# each time signature appears, to be passed to plt.hist
indexed_time_signatures = sum([[n]*s[1] for n, s in enumerate(top)], [])
indexed_time_signatures += [n_top]*n_other

plot_hist([s['end_time'] for s in statistics], range(0, 500, 15),
                   'Length in seconds', 'Thousands of MIDI files')
plt.xticks(np.arange(0, len(range(0, 500, 15)), 4) + .5,
           range(0, 500, 60), rotation=45, ha='right');

plot_hist([s['n_instruments'] for s in statistics], range(22),
                   'Number of instruments', 'Thousands of MIDI files')
plt.xticks(range(0, 22, 5), range(0, 22 - 5, 5));

plot_hist([i for s in statistics for i in s['program_numbers']], range(128),
                   'Program number', 'Thousands of occurrences')

plot_hist([len(s['tempos']) for s in statistics], range(1, 12),
                   'Number of tempo changes', 'Thousands of MIDI files')
plt.xticks(np.arange(13) + .3, range(1, 11),
           rotation=45, ha='right');

plot_hist([i for s in statistics for i in s['tempos']], range(0, 260, 10),
                   'Tempo', 'Thousands of occurrences')
plt.xticks(np.arange(0, len(range(0, 260, 10)), 3) + .5, range(0, 240, 30),
           rotation=45, ha='right');

plot_hist([len(s['time_signature_changes']) for s in statistics], range(12),
                   'Number of time signature changes', 'Thousands of MIDI files')
plt.xticks(range(11), range(10));

plot_hist(indexed_time_signatures, range(n_top + 2),
                   'Time signature', 'Thousands of occurrences')
plt.xticks(np.array(list(top_signatures.keys())), top_signatures.values() , rotation=45);

plot_hist([i for s in statistics for i in s['key_numbers']], range(25),
                   'Key', 'Thousands of occurrences')
plt.xticks([0, 2, 4, 5, 7, 9, 11, 12, 14, 16, 17, 19, 21, 23],
           ['C', 'D', 'E', 'F', 'G', 'A', 'B', 'c', 'd', 'e', 'f', 'g', 'a', 'b']);

# WORLD MIDIs Explorer Tools

In [None]:
#@title Search processed dataset
#@markdown Standard MIDI timings are 400/120(80)

 #@markdown start_index = -1 is random index selection

select_instrument = "Violin" #@param ["Piano", "Violin", "Flute"]
start_index = -1 #@param {type:"number"}
number_of_notes_to_play = 300 #@param {type:"slider", min:10, max:500, step:10}
simulate_velocity = True #@param {type:"boolean"}
number_of_ticks_per_quarter = 432 #@param {type:"slider", min:8, max:1000, step:8}
show_detailed_MIDI_stats = False #@param {type:"boolean"}
download_generated_composition = False #@param {type:"boolean"}

print('WORLD MIDIs Explorer')

output_song = []

print('Selected instrument:', select_instrument)

if select_instrument == 'Piano':
  if start_index < 0:
    start_index = secrets.randbelow(len(piano_matrix))
  output = piano_matrix[start_index:start_index+number_of_notes_to_play]

if select_instrument == 'Violin':
  if start_index < 0:
    start_index = secrets.randbelow(len(violin_matrix))
  output = violin_matrix[start_index:start_index+number_of_notes_to_play]

if select_instrument == 'Flute':
  if start_index < 0:
    start_index = secrets.randbelow(len(flute_matrix))
  output = flute_matrix[start_index:start_index+number_of_notes_to_play]

print('Song index start-time:', start_index)
print('Number of notes requested:', number_of_notes_to_play)

delta = 0

ptime = 0
time = 0

for n in range(len(output)-1):
  no = copy.deepcopy(output[n])
  note =['note', 0, 0, 0, 0, 0]

  note[1] = delta

  if no[1] < number_of_ticks_per_quarter:
    note[2] = abs(no[1])
  else:
    note[2] = number_of_ticks_per_quarter  

  if select_instrument == 'Piano':
    note[3] = 0

  if select_instrument == 'Violin':
    note[3] = 3

  if select_instrument == 'Flute':
    note[3] = 8

  note[4] = no[2]

  if simulate_velocity:
    note[5] = no[2]
  else:
    note[5] = no[3]  

  output_song.append(note)
  ptime = output[n][0]
  time = output[n+1][0]

  if abs(time - ptime) <= number_of_ticks_per_quarter:
    delta += abs(time - ptime)



f_name = '/content/WORLD_MIDIs_Dataset_Composition_'

fname = TMIDI.Tegridy_File_Time_Stamp(f_name)

track_name = select_instrument + ' Index ' + str(start_index)

detailed_MIDI_stats = TMIDI.Tegridy_SONG_to_MIDI_Converter(output_song, 
                                                          output_file_name=fname, 
                                                          number_of_ticks_per_quarter=number_of_ticks_per_quarter,
                                                          output_signature='WORLD MIDIs Dataset',
                                                          track_name=track_name)

if download_generated_composition:
  files.download(fname + '.mid')

if show_detailed_MIDI_stats:
  print('Detailed MIDI stats:')
  pprint(detailed_MIDI_stats)

In [None]:
#@title Play and display generated composition

fn = os.path.basename(fname + '.mid')
fn1 = fn.split('.')[0]
print('Playing and plotting composition...')

pm = pretty_midi.PrettyMIDI(fname + '.mid')

# Retrieve piano roll of the MIDI file
piano_roll = pm.get_piano_roll()

plt.figure(figsize=(16, 5))
librosa.display.specshow(piano_roll, x_axis='time', y_axis='cqt_note', cmap=plt.cm.hot)
plt.title(fn1 + ' / Start Index #' + str(start_index))

FluidSynth("/usr/share/sounds/sf2/FluidR3_GM.sf2", 16000).midi_to_audio(str(fname + '.mid'), str(fname + '.wav'))
audio, fs = librosa.load(str(fname + '.wav'))
Audio(audio, embed=True, rate=16000)

# Explore individual songs/MIDIs in the original dataset

In [None]:
#@title You must provide the original AND the processed MIDI datasets for this to work.
full_path_to_original_MIDIs_dataset_dir = "/content/Dataset" #@param {type:"string"}
song_index = 3 #@param {type:"integer"}
plots_lengths = 16 #@param {type:"slider", min:1, max:20, step:1}
plots_widths = 5 #@param {type:"slider", min:1, max:10, step:1}

fname = os.path.join(full_path_to_original_MIDIs_dataset_dir, song_names[song_index])

print('WORLD MIDIs Dataset Explorer')
print('Song:', song_names[song_index], 'with index #', song_index)
print('Loading song. Please wait...')
pm = pretty_midi.PrettyMIDI(fname)

# Retrieve piano roll of the MIDI file
piano_roll = pm.get_piano_roll()

print('Plotting MIDI...')
plt.figure(figsize=(plots_lengths, plots_widths))
librosa.display.specshow(piano_roll, y_axis='cqt_note', cmap=plt.cm.hot)
plt.title('Song: ' + song_names[song_index] + ' / Piano Roll')
plt.figure(figsize=(plots_lengths, plots_widths))
librosa.display.specshow(piano_roll, x_axis='time', y_axis='cqt_note', cmap=plt.cm.hot)

print('Rendering MIDI to audio...')
FluidSynth("/usr/share/sounds/sf2/FluidR3_GM.sf2", 16000).midi_to_audio(str(fname), str(fname + '.wav'))
audio, fs = librosa.load(str(fname + '.wav'))

print('Plotting constant-Q spectrogram...')
# Compute constant-Q spectrogram
cqt = librosa.amplitude_to_db(np.abs(librosa.cqt(audio)))
# Normalize for visualization
cqt = librosa.util.normalize(cqt)

librosa.display.specshow(cqt, x_axis = 'time', y_axis='cqt_note',
                      cmap=plt.cm.hot, vmin=np.percentile(cqt, 25))
plt.title('Song: ' + song_names[song_index] + ' / Audio CQT');

print('Loading audio player...')
Audio(audio, embed=True, rate=16000)

In [None]:
#@title Print instruments/MIDI patch numbers list
print('Total number of instruments detected:', len(pm.instruments))
print('Showing program indexes from 0 to', len(pm.instruments)-1)
pprint(pm.instruments)

In [None]:
#@title Explore a specific instrument from a specific MIDI

#@markdown Enter program index from the cell above here. Please note that you need the index, not the actual program number.

program_index_to_plot = 0 #@param {type:"integer"}

# Retrieve piano roll of one of the instruments
piano_roll = pm.instruments[program_index_to_plot].get_piano_roll()
plt.figure(figsize=(plots_lengths, plots_widths))
librosa.display.specshow(piano_roll, y_axis='cqt_note', cmap=plt.cm.hot)
# Get the text name of this instrument's program number
program_name = pretty_midi.program_to_instrument_name(pm.instruments[program_index_to_plot].program)
plt.title('Instrument ' + str(program_index_to_plot) + ' ({}) piano roll'.format(program_name));

# pretty_midi also provides direct access to the pitch and start/end time of each note
intervals = np.array([[note.start, note.end] for note in pm.instruments[program_index_to_plot].notes])
notes = np.array([note.pitch for note in pm.instruments[program_index_to_plot].notes])
plt.figure(figsize=(plots_lengths, plots_widths))
mir_eval.display.piano_roll(intervals, midi=notes, facecolor='black')
plt.title('Instrument ' + str(program_index_to_plot) + ' ({}) piano roll'.format(program_name))
plt.xlabel('Time')
plt.ylabel('MIDI note number');

#@title Key Changes and Lyrics info
# Print out all key changes in the MIDI file
for key_change in pm.key_signature_changes:
    print('Key {} starting at time {:.2f}'.format(
        pretty_midi.key_number_to_key_name(key_change.key_number), key_change.time))
    

# Get the boundaries of each line in the lyrics
lines = [0] + [n for n, lyric in enumerate(pm.lyrics) if '\r' in lyric.text]
for start, end in zip(lines[:-1], lines[1:]):
    # Print the times of each lyric in the line, delimited by |
    print('|'.join('{:>8.3f}'.format(lyric.time))
                   for lyric in pm.lyrics[start:end]
                   if lyric.text != '\r')
    # Print the text of each lyric in the line, delimited by |
    print('|'.join('{:>8}'.format(lyric.text))
                   for lyric in pm.lyrics[start:end]
                   if lyric.text != '\r')    
