<a href="https://colab.research.google.com/github/spaceBearAmadeus/OUTDATED-studentNeedsNavigator/blob/main/ROSE_Composer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!sudo apt install -y fluidsynth

In [None]:
!pip install --upgrade pyfluidsynth

In [None]:
!pip install pretty_midi

In [1]:
import collections
import datetime
import fluidsynth
import glob
import numpy as np
import pathlib
import pandas as pd
import pretty_midi
import seaborn as sns
import tensorflow as tf

from IPython import display
from matplotlib import pyplot as plt
from typing import Dict, List, Optional, Sequence, Tuple

In [2]:
tf.random.set_seed(42)
np.random.seed(42)

AUDIO_DIR = pathlib.Path("data/maestro-v2.0.0")
ANNOTATIONS_FILE = "/content/data/maestro-v2.0.0.csv"

if not AUDIO_DIR.exists():
  tf.keras.utils.get_file(
      'maestro-v2.0.0-midi.zip',
      origin='https://storage.googleapis.com/magentadata/datasets/maestro/v2.0.0/maestro-v2.0.0-midi.zip',
      extract=True,
      cache_dir='.', cache_subdir='data',
  )

In [169]:
class MaestroDataset: 
  def __init__(self, annotations_file, audio_dir):  
    self.annotations = pd.read_csv(f"{annotations_file}")
    self.audio_dir = audio_dir 
    self._SAMPLING_RATE = 16000

  def __len__(self):
    return len(self.annotations)

  def __getitem__(self, index):
    audio_sample_path = self._get_audio_sample_path(index)
    label = self._get_audio_sample_label(index)
    return audio_sample_path, label 

  def _get_audio_sample_path(self, index):
    filenames = glob.glob(str(self.audio_dir/'**/*.mid*'))
    return filenames[index]

  def _get_audio_sample_label(self, index):
    return self.annotations.iloc[index, 6]
  
  def get_pretty_midi(self, midi_file_index:int):
    '''retrieves pretty_midi object'''
    pm = pretty_midi.PrettyMIDI(self.__getitem__(midi_file_index)[0])
    return pm

  #DATA VISUALIZATION METHODS
  def display_audio(self, pm:pretty_midi.PrettyMIDI, seconds=30):
    '''generates a mini .wav player in the markdown cell'''
    waveform = pm.fluidsynth(fs=self._SAMPLING_RATE)
    # Take a sample of the generated waveform to mitigate kernel resets
    waveform_short = waveform[:seconds*self._SAMPLING_RATE]
    return display.Audio(waveform_short, rate=self._SAMPLING_RATE)

  def fetch_inspect_MIDI_instrument(self, pm:pretty_midi.PrettyMIDI, show:bool=False):
    '''retrieves instrument(s) in song, right now just [0], and shows data if show==True'''
    instrument = pm.instruments[0] 
    instrument_name = pretty_midi.program_to_instrument_name(instrument.program)
    if show:
      print("\n*-------------------------------------------*")
      print(f"{'            '} \033[1;36mINSTRUMENT INFORMATION\033[0m")
      print(f"*-------------------------------------------*")
      print(f"\033[0;34m{' '}NUMBER_OF_INSTRUMENTS:\033[0m {len(pm.instruments)}")
      print("*-------------------------------------------*")
      print(f"\033[0;34m{' '}INSTRUMENT_NAME:\033[0m {instrument_name}") 
      print("*-------------------------------------------*") 
      for i, note in enumerate(instrument.notes[:10]):
        note_name = pretty_midi.note_number_to_name(note.pitch)
        duration = note.end - note.start
        print(f"{' '}{i}: \033[0;31mPITCH\033[0m={note.pitch}, \033[0;32mNOTE_NAME\033[0m={note_name}, \033[0;33mDURATION\033[0m={duration:.4f}\n")
    print("*-------------------------------------------*") 
    print("\n")
    return instrument 

  def midi_to_notes_df(self, prettyMidi:Optional[pretty_midi.PrettyMIDI]=None, 
                       midi_file_index:Optional[int]=None) -> pd.DataFrame:
    
    '''if pm is given, default to the instantiated pm object, else custom index with same method!'''
    try:
      if midi_file_index:
        instrument = self.get_pretty_midi(midi_file_index).instruments[0]
      elif prettyMidi:
        instrument = prettyMidi.instruments[0]

      notes = collections.defaultdict(list)
      sorted_notes = sorted(instrument.notes, key=lambda note: note.start)#<-sort by start time
      prev_start = sorted_notes[0].start

      for note in sorted_notes:
        start = note.start 
        end = note.end
        notes['pitch'].append(note.pitch)
        notes['start'].append(start)
        notes['end'].append(end)
        notes['step'].append(start - prev_start)
        notes['duration'].append(end - start)
        prev_start = start
      
      return pd.DataFrame({name: np.array(value) for name, value in notes.items()})
    
    except: 
      if midi_file_index and prettyMidi:
        print("Cannot have pm AND midi_file_index!")
        return None 


rose = MaestroDataset(ANNOTATIONS_FILE, AUDIO_DIR)

In [170]:
pm = rose.get_pretty_midi(1111) #<-index number of the sample we are inspecting

In [171]:
rose.display_audio(pm)

In [172]:
midi_1 = rose.fetch_inspect_MIDI_instrument(pm, show=True)


*-------------------------------------------*
             [1;36mINSTRUMENT INFORMATION[0m
*-------------------------------------------*
[0;34m NUMBER_OF_INSTRUMENTS:[0m 1
*-------------------------------------------*
[0;34m INSTRUMENT_NAME:[0m Acoustic Grand Piano
*-------------------------------------------*
 0: [0;31mPITCH[0m=75, [0;32mNOTE_NAME[0m=D#5, [0;33mDURATION[0m=0.1120

 1: [0;31mPITCH[0m=63, [0;32mNOTE_NAME[0m=D#4, [0;33mDURATION[0m=0.3516

 2: [0;31mPITCH[0m=72, [0;32mNOTE_NAME[0m=C5, [0;33mDURATION[0m=0.1029

 3: [0;31mPITCH[0m=60, [0;32mNOTE_NAME[0m=C4, [0;33mDURATION[0m=0.0924

 4: [0;31mPITCH[0m=52, [0;32mNOTE_NAME[0m=E3, [0;33mDURATION[0m=0.1107

 5: [0;31mPITCH[0m=68, [0;32mNOTE_NAME[0m=G#4, [0;33mDURATION[0m=1.2057

 6: [0;31mPITCH[0m=80, [0;32mNOTE_NAME[0m=G#5, [0;33mDURATION[0m=0.2253

 7: [0;31mPITCH[0m=72, [0;32mNOTE_NAME[0m=C5, [0;33mDURATION[0m=0.2930

 8: [0;31mPITCH[0m=75, [0;32mNOTE_NAME[0m=D#5, [

In [173]:
rose.midi_to_notes_df(prettyMidi=pm).head()

Unnamed: 0,pitch,start,end,step,duration
0,68,0.959635,2.165365,0.0,1.205729
1,63,1.388021,1.739583,0.428385,0.351562
2,75,1.619792,1.731771,0.231771,0.111979
3,72,1.644531,1.747396,0.02474,0.102865
4,60,1.751302,1.84375,0.106771,0.092448


In [174]:
rose.midi_to_notes_df(midi_file_index=6).head()

Unnamed: 0,pitch,start,end,step,duration
0,67,0.158854,4.994792,0.0,4.835937
1,65,1.002604,1.244792,0.84375,0.242188
2,63,1.647135,1.851562,0.644531,0.204427
3,61,2.291667,2.490885,0.644531,0.199219
4,59,2.897135,3.067708,0.605469,0.170573
