# Piano to Midi Transcription
## Process outline:
    1. Load data
    2. Preprocess data (remove left channel (piano in aebersold is only on right channel) - optional)
    3. Extract piano source with DEMUCS source separator
    4. Transcribe piano with Piano Transcription Inference (PTI)-model to MIDI
    (5. Listen to MIDI-files in ./evaluation.ipynb)


### Install and load Packages

In [1]:
#!conda install mamba --yes
#!mamba install ffmpeg==4.1.3 --yes
# needed for torchaudio to be able to handle mp3 

### Troubleshooting / make necessary installations (should not be required if used provided requirements.txt to create environment)
#!pip install piano_transcription_inference
#!pip install pyfluidsynth
#!conda install mamba --yes # mamba makes installations in conda faster
#!mamba install numpy<1.24.0 --yes # pretty midi needs an older numpy version, use conda or pip if you dont have mamba
#!mamba install librosa==0.9.2 --yes #needed version of librosa, restart kernel if error does not go away

### Restart kernel after installing packages! ###

In [2]:
from piano_transcription_utils import *
from IPython.display import Audio
import soundfile as sf
import numpy as np
import scipy
import librosa
from glob import glob
import os
import logging
import shutil

#demucs imports
import torch
import torchaudio
from torchaudio.transforms import Fade
from torchaudio.utils import download_asset

#PTI imports
#PTI will install files locally when first run
from piano_transcription_inference import PianoTranscription, sample_rate, load_audio


DEBUG:matplotlib data path: /home/seb/miniconda3/envs/practical/lib/python3.11/site-packages/matplotlib/mpl-data
DEBUG:CONFIGDIR=/home/seb/.config/matplotlib
DEBUG:interactive is False
DEBUG:platform is linux
DEBUG:CACHEDIR=/home/seb/.cache/matplotlib
DEBUG:Using fontManager instance from /home/seb/.cache/matplotlib/fontlist-v330.json


### Setup 

In [3]:
# configure logger
logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO)

#specify global variables
PATH_TO_FILES = "audio/" #relative filepath
OUT_PATH = "audio/out"  #relative filepath
RECURSIVE = True # set to <True> to include subfolders
PAN = None  # str:"0" for left channel, str:"1" for right channel (piano in aebersold), leave <None> for no panning (e.g already separated Filosax data)
FILETYPE = ["wav","mp3"] #specify filetype(s) e.g. ["wav","mp3"] , have to be supported by torchaudio
DEVICE = "cpu" # make it str:"cuda" if available
TRIM = True # set to <True> if you want to trim audio (shorter waiting times for testing)
START = 0 # set starting point in seconds for trimming, default is 0
STOP = 30 # set end point for trimming, set to int:0 if no end trimming.
KEEP_DEMUCS = False # set to <False> if you don't want demucs-separated sources saved locally
FORMATTER = ":03d" # set to str:"02d" for max 100, str:"03d" for max 1000 files if you want file formatting, else leave <None>

In [4]:
# initialize demucs
initialize_demucs()

INFO:Initialized Demucs successfully


### Load files

In [5]:
# get input audio files
f_list = get_files(PATH_TO_FILES,OUT_PATH,FILETYPE,RECURSIVE)
'''
### Uncomment this if you want to filter the files by a given substring
substring = "Piano"
valid = []
for i in range(len(f_list)):
    if f_list[i].find(substring) != -1:
        valid.append(f_list[i])
    else:
        continue
f_list = valid
'''
f_list # show found files


INFO:Created output folders.
INFO:Found 1 audiofiles.


['audio/jazz_piano.mp3']

### Preprocess and Transcribe audio

In [6]:
# start transcription
logging.getLogger().setLevel(logging.INFO)
for i,f in enumerate(f_list):
    # configure out-file formatter
    fname = os.path.basename(f)[:-4]
    if FORMATTER:
        fname_formatted = (fname+"{"+FORMATTER+"}").format(i+1)
    else:
        fname_formatted = fname
    n = len(f_list)
    logging.info('#'*99)
    logging.info(f"Processing file {i+1}/{n}: {fname} ...")
    # pan audio
    wave,sr = load_and_pan(f,pan=PAN,trim=TRIM,start=START,stop=STOP)
    # separate piano source
    source_p, sr = separate_audio(file=wave,sr=sr,filename=f'{fname_formatted}.wav',source='piano',device = DEVICE, save_to_disk = True,out_path=OUT_PATH)    
    # make transcription
    logging.info(os.path.join(os.path.dirname(f),'demucs',f"{fname}.wav"))    
    make_transcription(os.path.join(OUT_PATH,'demucs',f'{fname_formatted}.wav'),os.path.join(OUT_PATH,f'{fname_formatted}.mid'))

# delete demucs files if specified
logging.info('#'*99)
if not KEEP_DEMUCS:
    logging.info(f"{os.path.join(OUT_PATH,'demucs')} and all its content will be removed, are you sure? y/n ")
    if input() == "y":
        shutil.rmtree(os.path.join(OUT_PATH,'demucs'))
    else:
        logging.info("Aborted removal.")
    
logging.info("Finished.")
    
    
    

INFO:###################################################################################################
INFO:Processing file 1/1: jazz_piano ...
INFO:The local file (/home/seb/.cache/torch/hub/torchaudio/models/hdemucs_high_trained.pt) exists. Skipping the download.
INFO:Separating track from local data ...
INFO:...finished.
INFO:Saved file to: audio/out/demucs/jazz_piano001.wav .
INFO:audio/demucs/jazz_piano.wav
  y = librosa.core.audio.resample(y, sr_native, sr, res_type=res_type)


Checkpoint path: /home/seb/piano_transcription_inference_data/note_F1=0.9677_pedal_F1=0.9186.pth
Using cpu for inference.
Using CPU.
Segment 0 / 5
Segment 1 / 5
Segment 2 / 5
Segment 3 / 5
Segment 4 / 5
Segment 5 / 5


INFO:Transcribed jazz_piano001.mid and saved to disk.
INFO:###################################################################################################
INFO:audio/out/demucs and all its content will be removed, are you sure? y/n 


Write out to audio/out/jazz_piano001.mid


 y


INFO:Finished.


In [None]:
# put <y> or <n> to make sure nothing is deleted involuntarily!