<a href="https://colab.research.google.com/github/olaviinha/MidiTurmoil/blob/main/Transcriber.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#<font face="Trebuchet MS" size="6">Transcriber <font color="#999" size="3">v0.0.1</font><font color="#999" size="4">&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;</font><a href="https://github.com/olaviinha/MidiExperiments" target="_blank"><font color="#999" size="4">Github</font></a><font color="#999" size="4">&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;</font><font size="3" color="#999"><a href="https://inha.se" target="_blank"><font color="#999">O. Inha</font></a></font></font>

Transcriber takes an audio file or a youtube link, separates it into stems using Deezer Spleeter, then transcribes the track or selected stem to MIDI notation using Google Magenta's Onsets and Frames Piano Transcription. Decent accuracy is reached only with piano.

In [None]:
#@title #Setup
#@markdown This cell needs to be run only once. It will mount your Google Drive and setup prerequisities.

force_setup = False
pip_packages = 'youtube-dl magenta spleeter mido pychord pyfluidsynth midi2audio musthe midiutil'
apt_packages = 'fluidsynth sox'

import os
from google.colab import drive, output

# inhagcutils
if not os.path.isfile('/content/inhagcutils.ipynb') or force_setup == True:
  %cd /content/
  !apt-get install {apt_packages}
  output.clear()
  !pip -q install import-ipynb {pip_packages}
  output.clear()
  !curl -s -O https://raw.githubusercontent.com/olaviinha/inhagcutils/master/inhagcutils.ipynb
  !curl -s -O https://raw.githubusercontent.com/olaviinha/MIDIGenerators/main/roll.py
  !gsutil -q -m cp -R gs://neural-research/olaviinha/spleeter-configs/custom-5stems-22kHz-z.json /content/cfg.json
import import_ipynb
from inhagcutils import *

# Mount Drive
if not os.path.isdir('/content/drive') and force_setup == False:
  drive.mount('/content/drive')

# Drive symlink
if not os.path.isdir('/content/mydrive') and force_setup == False:
  os.symlink('/content/drive/My Drive', '/content/mydrive')
  drive_root_set = True
drive_root = '/content/mydrive/'

tmp = '/content/tmp/'
osf = tmp+'osf/'
splt = tmp+'spleets/'
tbe = tmp+'tube/'
scribd = tmp+'scribd/'
create_dirs([tmp, osf, splt])
tmp_dir = tmp

whl_installed = False

if not os.path.isdir('/content/sf') and force_setup == False:
  !gsutil -q -m cp -R gs://neural-research/olaviinha/sf/* {tmp}

op(c.title, 'Get checkpoint...\n')
!gsutil -q -m cp -R gs://magentadata/models/onsets_frames_transcription/* {osf}
!unzip -o "{osf}/maestro_checkpoint.zip" -d "{osf}"

output.clear()
op(c.ok, 'Setup finished.')



In [None]:
#@markdown #Transcribe

#@markdown <small>Input may be a youtube-link or an audio file located in your Google Drive.</small>
input = "https://www.youtube.com/watch?v=oUFJJNQGwhk" #@param {type:"string"}
transcribe = 'piano' #@param ["all_stems", "vocals", "piano", "bass", "other", "drums"]

#@markdown ##Save transcribed MIDI
#@markdown <small>Directory path to Google Drive in which the transcribed MIDI will be saved. If left empty, MIDI file will not be saved to your Drive.</small>
output_dir = "" #@param {type:"string"}

#----------------------------------------------------------------------------#

def fix_path(path, add_slash=False):
  if not os.path.isdir(path):
    create_dirs([path])
  if os.path.isdir(path) and not path.endswith('/'):
    path = path+"/"
  if path.startswith('/') and add_slash == True:
    path = path[1:]
  return path

#----------------------------------------------------------------------------#

if output_dir == '':
  output_dir = tmp
  fixed_output_dir = tmp
else:
  fixed_output_dir = drive_root+fix_path(output_dir)

#----------------------------------------------------------------------------#

import tensorflow.compat.v1 as tf
import os
from os.path import isfile, join
from glob import glob
from shutil import copyfile

input_type = check_input_type(input)

if input_type == 'unknown':
  input = drive_root+fix_path(input)
  input_type = check_input_type(input)

if input_type == 'youtube':
  !rm {tbe}*
  !youtube-dl --restrict-filenames -x --no-continue --audio-format wav -o "{tbe}%(title)s.%(ext)s" {input}
  output.clear()

if input_type == 'file':
  filename, extension = path.splitext(str(input))

if transcribe == 'all_stems':
  use_stem = input
else:
  op(c.title, 'Separate stems...\n')
  from spleeter.separator import Separator
  from spleeter.audio.adapter import get_default_audio_adapter
  separator = Separator('spleeter:5stems')
  file_list = list_audio(tbe)
  for audiofile in file_list:
    separator.separate_to_file(audiofile, splt)
    use_stem = splt+basename(audiofile)+'/'+transcribe+'.wav'

# DO

CHECKPOINT_DIR = str(osf)+"train"

import ctypes.util
orig_ctypes_util_find_library = ctypes.util.find_library
def proxy_find_library(lib):
  if lib == 'fluidsynth':
    return 'libfluidsynth.so.1'
  else:
    return orig_ctypes_util_find_library(lib)
  ctypes.util.find_library = proxy_find_library

output.clear()
op(c.title, 'Transcribe...\n')
!onsets_frames_transcription_transcribe --model_dir="{CHECKPOINT_DIR}" "{use_stem}"

if output_dir != '':
  from shutil import copyfile
  scribd_midi = splt+basename(audiofile)+'/'+transcribe+'.wav.midi'
  final_output_file = fixed_output_dir+basename(audiofile)+'_'+transcribe+'_'+rnd_str(4)+'.mid'
  copyfile(scribd_midi, final_output_file)

output.clear()
op(c.ok, 'Done.\n\n')

# print(final_output_file)

from mido import MidiFile
import librosa
from midi2audio import FluidSynth
from IPython.display import display, Audio
import numpy as np
import matplotlib.pyplot as plt
import roll
from random import shuffle
from musthe import *

from mido import MidiFile

sf2dir = tmp+'base/'
pi = sf2dir+'SalC5Light2.sf2'

mid = MidiFile(final_output_file)

## Remove very short notes
# min_time = 0
# new_msgs = [None] * len(mid.tracks)
# for i, track in enumerate(mid.tracks):
#   new_msgs[i] = []
#   for ii, msg in enumerate(track):
#     if msg.type == 'note_on' and msg.time < min_time:
#       msg.time = 0
#       msg.velocity = 0

def plot_notation(midifile):
  midr = roll.mfplt(midifile)
  midr.draw_roll()

fname = rnd_str(4)
midi_filename = tmp+fname+'.mid'
mid.save(midi_filename)
audio_preview = tmp+fname+'.wav'
FluidSynth(pi).midi_to_audio(midi_filename, audio_preview)
plot_notation(midi_filename)
test = librosa.load(audio_preview, sr=44100, mono=False)

audio_player(test[0])

op(c.title, '\n\nPath to the generated MIDI file (input for other notebooks):', final_output_file.replace(drive_root,''))

