In [21]:
from mido import MidiFile
import soundfile as sf
import numpy as np
import psola

In [2]:
midi_to_hz = lambda midi: 220*(2**((midi-57)/12))

In [3]:
def gen_note_times(tracks):
    note_times = [[] for pitch in range(128)]
    tempo = 0

    for track in tracks:
        cur_time = 0
        for msg in track:
            if msg.type == 'set_tempo':
                tempo = msg.tempo
                
            if msg.type != 'note_on':
                continue
                
            note = msg.note
            time = msg.time
            vel = msg.velocity
            cur_time += time

            if (vel == 0) and note_times[note]:
                note_times[note][-1].append(cur_time)

            if vel != 0:
                note_times[note].append([cur_time])

    for note in range(128):
        if note_times[note] and len(note_times[note][-1]) == 1:
            note_times[note][-1].append(cur_time)
            
    return tempo, note_times

In [4]:
def gen_cutoffs(note_times):
    _cutoffs_set = set([
        time
        for note in note_times
        for section in note
        for time in section[:2]
    ])
    cutoffs_list = list(_cutoffs_set)
    cutoffs_list.sort()
    
    cutoffs_map = {
        cutoff: cutoff
        for cutoff in cutoffs_list
    }
    
    index = 0
    while index < (len(cutoffs_list) - 1):
        a,b = cutoffs_list[index], cutoffs_list[index + 1]
        if (b - a) < 200:
            cutoffs_map[cutoffs_list[index + 1]] = cutoffs_list[index]
            del cutoffs_list[index + 1]
        else:
            index += 1

    cutoffs_dict = {
        time: index
        for index, time in enumerate(cutoffs_list)
    }
    return (cutoffs_list, cutoffs_dict, cutoffs_map)

In [5]:
def actually_insert_note(time_range, cutoffs_dict, note, lo, hi):
    lo_index = cutoffs_dict[lo]
    hi_index = cutoffs_dict[hi]
    time_range[lo_index:hi_index] = [
        note
        for i in range(lo_index, hi_index)
    ]
def check_if_note_fits(time_blocks, cutoffs_dict, note, lo, hi):
    lo_index = cutoffs_dict[lo]
    hi_index = cutoffs_dict[hi]
    if all([
        cur_note == 0
        for cur_note in time_blocks[lo_index:hi_index]
    ]):
        return True
    return False
    
def insert_voice(time_blocks, cutoffs_dict, cutoffs_map, note, lo, hi):
    for voice in time_blocks:
        if check_if_note_fits(voice, cutoffs_dict, note, cutoffs_map[lo], cutoffs_map[hi]):
            actually_insert_note(voice, cutoffs_dict, note, cutoffs_map[lo], cutoffs_map[hi])
            return
    time_blocks.append([
        0
        for i in range(len(cutoffs_dict) - 1)
    ])
    actually_insert_note(time_blocks[-1], cutoffs_dict, note, cutoffs_map[lo], cutoffs_map[hi])

In [6]:
def gen_time_blocks(tracks):
    time_blocks = []
    tempo, note_times = gen_note_times(tracks)
    cutoffs_list, cutoffs_dict, cutoffs_map = gen_cutoffs(note_times)
    
    for midi, times in enumerate(note_times):
        for time_range in times:
            lo, hi = time_range[:2]
            insert_voice(time_blocks, cutoffs_dict, cutoffs_map, midi, lo, hi)
    
    return cutoffs_list, cutoffs_dict, time_blocks

In [7]:
def get_length_of_midi(mid, tempo, note_times):
    tempo *= 1.5
    total_ticks = max([
        end_time
        for pitch_blocks in note_times
        for start_time, end_time in pitch_blocks
    ])
    return (tempo / 1_000_000) * (total_ticks / mid.ticks_per_beat) / 1.6

In [8]:
def gen_tone_of_length(seconds, freq=200):
    return list(np.sin(np.arange(
        0, 2*np.pi*freq*seconds, 2*np.pi*freq/44100
    )))

In [9]:
def gen_f0s_from_blocks(time_blocks, cutoffs_list, cutoffs_dict):
    f0s = np.zeros([len(time_blocks), max(cutoffs_dict.keys())])

    for block_index, block in enumerate(time_blocks):
        for note_index, note in enumerate(block):
            lo = cutoffs_list[note_index]
            hi = cutoffs_list[note_index + 1]
            f0s[block_index][np.arange(lo, hi)] = midi_to_hz(note)
            
    return f0s

In [10]:
def gen_f0s(filepath):
    mid = MidiFile(filepath, clip=True)

    cutoffs_list, cutoffs_dict, time_blocks = gen_time_blocks(mid.tracks)
    f0s = gen_f0s_from_blocks(time_blocks, cutoffs_list, cutoffs_dict)
    
    return f0s

In [20]:
def vocode(sound, f0s):
    full = np.zeros(len(sound))

    for f0 in f0s:
        # Partially taken from https://thewolfsound.com/how-to-auto-tune-your-voice-with-python/
        vocoded = psola.vocode(sound,
                     sample_rate=int(44100),
                     target_pitch=f0,
                     fmin=65.41,   # C2 = 65.41 Hz
                     fmax=2093.00) # C7 = 2093.00 Hz

        size_ratio = len(sound) / len(f0)
        zeros_indices = [
            int(index * size_ratio + offset)
            for index, i in enumerate(f0) if i < 10
            for offset in range(int(size_ratio // 1 + 1))
        ]
        included = [0 for i in range(int(len(zeros_indices)))]
        for i in zeros_indices:
            included.append(int(i / size_ratio))

        vocoded[zeros_indices] = 0
        full += vocoded

    full /= len(f0s)
    return full

In [12]:
def add_metronome(vocoded, tempo):
    skip = int((tempo / 1_000_000) * 44100)
    
    for start_index in range(0, len(vocoded), skip):
        for index in range(start_index, start_index + 20):
            vocoded[index] = 1
        
    return vocoded

In [13]:
def gen_tone_file(midi_in, mp3_out):
    if not(midi_in.endswith('.mid')):
        raise Exception('Not a MIDI file')
           
    file_prefix = midi_in[:-4]
    mid = MidiFile(midi_in, clip=True)
    tracks = mid.tracks
    
    tempo, note_times = gen_note_times(tracks)
    length_of_midi = get_length_of_midi(mid, tempo, note_times)
    tone = gen_tone_of_length(length_of_midi)
    f0s = gen_f0s(midi_in)
    
    vocoded = vocode(tone, f0s)
    sf.write(mp3_out, vocoded, 44100)

In [17]:
def gen_vocoded(midi_in, mp3_in, mp3_out):
    if not(midi_in.endswith('.mid')):
        raise Exception('Not a MIDI file')
    
    if not(mp3_in.endswith('.mp3')):
        raise Exception('Not an MP3 file')
        
    f0s = gen_f0s(midi_in)
    waveform, sample_rate = sf.read(mp3_in)
    vocoded = vocode(waveform, f0s)
    
    file_prefix = mp3_in[:-4]
    sf.write(mp3_out, vocoded, 44100)

In [15]:
gen_tone_file(
    'midis/auldlangsyne.mid',
    'audio/auldlangsyne_tone.mp3'
)

In [18]:
gen_vocoded(
    'midis/auldlangsyne.mid',
    'audio/auldlangsyne_raw.mp3',
    'audio/auldlangsyne_vocoded.mp3'
)