# 1. Collect Data

In [109]:
import numpy as np
class Note:
    def __init__(self,s,i,v):
        self.start = s
        self.relative_interval = i
        self.duration = v #duration


class Accompany:
    def __init__(self,num=4,den=4,notelist=[],classjson=None):
        if classjson is None:
            self.numerator = num
            self.denominator = den
            self.notelist = notelist
        else:
            self.numerator = classjson["numerator"]
            self.denominator = classjson["denominator"]
            self.notelist = classjson['notes']
    
    def export_dict(self):
        self.calculate_rhythm()
        tmp = dict()
        tmp['numerator'] = self.numerator
        tmp['denominator'] = self.denominator
        tmp['notes'] = []
        for notes in self.notelist:
            tmp2 = dict()
            tmp2['s'] = notes.start
            tmp2['i'] = notes.relative_interval
            tmp2['d'] = notes.duration
            tmp['notes'].append(tmp2)
        tmp['rhythm'] = self.rhythm
        return tmp

    def add_notes(self,n:Note):
        self.notelist.append(n)
    
    def calculate_rhythm(self):
        total_duration = self.numerator * (0.5 ** (math.log(self.denominator,2)-2)) #number of quarter notes
        self.rhythm = [0 for _ in range(24)]
        interval = total_duration / 24
        rhythmtime = [i*interval for i in range(24)]
        for note in self.notelist:
            onset = note.start
            idx,val = self.find_closest(rhythmtime,onset)
            if val < interval/2:
                self.rhythm[idx] = 1

    def find_closest(self,arr,val):
        newlist = [abs(x-val) for x in arr]
        return np.argmin(newlist),np.min(newlist)

In [110]:
def notes_bar_processing(notes,begin_tick,tpb,num,den):
    #find min pitch
    min_pitch = 128
    for note in notes:
        if note.pitch < min_pitch:
            min_pitch = note.pitch
    accom = Accompany(num=num,den=den,notelist=[])
    for note in notes:
        start = (note.start-begin_tick)/tpb
        rpitch = note.pitch - min_pitch
        dur = (note.end-note.start)/tpb
        accom.add_notes(Note(start,rpitch,dur))
    return accom
    

In [111]:
import glob
from miditoolkit.midi import parser as mid_parser  
from miditoolkit.midi import containers as ct
import math


database = []
for midifile in glob.glob("../data/nice_format/*.mid"): #Replace it with your own directory
    tempdb = []
    mido = mid_parser.MidiFile(midifile)
    # print(mido.time_signature_changes)
    tschanges = dict()
    for ts in mido.time_signature_changes:
        tschanges[ts.time] = (ts.numerator,ts.denominator)
        if ts.numerator == 37:
            print("Crazy",midifile)
    tpb = mido.ticks_per_beat
    numerator = tschanges[0][0]
    denominator = tschanges[0][1]
    del tschanges[0]
    idx = -1
    # if len(mido.instruments) > 2:
    #     print(f"{midifile} has more than two channels, please check")
    #     continue
    for i,inst in enumerate(mido.instruments):
        if inst.name.find("left") != -1 or inst.name.find("Left") != 1:
            idx = i
            break
    if idx == -1:
        print(f"{midifile} may not have left channel, please check.")
        continue
    add_interval = int(tpb*numerator*(0.5 ** (math.log(denominator,2)-2)))
    current_tick = add_interval
    begin_tick = 0    
    notelist = []
    tmp_notelist = []
    for note in mido.instruments[idx].notes:
        if note.start < current_tick:
            if note.end > current_tick:
                tmp_notelist.append(ct.Note(start=current_tick,end=note.end,pitch=note.pitch,velocity=note.velocity))
                notelist.append(ct.Note(start=note.start,end=current_tick,pitch=note.pitch,velocity=note.velocity))
            else:
                notelist.append(note)
        else:
            if notelist != []:
                tempdb.append(notes_bar_processing(notelist,begin_tick,tpb,numerator,denominator))
            notelist = []
            begin_tick = current_tick
            if begin_tick in tschanges:
                numerator,denominator = tschanges[begin_tick]
                add_interval = int(tpb*numerator*(0.5 ** (math.log(denominator,2)-2)))
                del tschanges[begin_tick]
            current_tick += add_interval
            tmp2 = []
            for note2 in tmp_notelist:
                if note2.end > current_tick:
                    tmp2.append(ct.Note(start=current_tick,end=note2.end,pitch=note2.pitch,velocity=note2.velocity))
                    notelist.append(ct.Note(start=note2.start,end=current_tick,pitch=note2.pitch,velocity=note2.velocity))
                else:
                    notelist.append(note2)
            tmp_notelist = tmp2
            if note.end > current_tick:
                tmp_notelist.append(ct.Note(start=current_tick,end=note.end,pitch=note.pitch,velocity=note.velocity))
                notelist.append(ct.Note(start=note.start,end=current_tick,pitch=note.pitch,velocity=note.velocity))
            else:
                notelist.append(note)
    if notelist != []:
        tempdb.append(notes_bar_processing(notelist,begin_tick,tpb,numerator,denominator))
    try:
        assert len(tschanges) == 0
    except AssertionError:
        print(f"{midifile} time signature problem, it will be skipped.")
        #Probably a time signature change in middle of bar, I will ignore it
    else:
        database.extend(tempdb)
print(len(database))

Crazy ../data/nice_format\liz_donjuan.mid
47803


In [112]:
print(database[2].export_dict()) #sanity check, seems fine

{'numerator': 3, 'denominator': 4, 'notes': [{'s': 0.0, 'i': 2, 'd': 0.5}, {'s': 0.5, 'i': 3, 'd': 0.5}, {'s': 1.0, 'i': 5, 'd': 0.5}, {'s': 1.5, 'i': 2, 'd': 0.5}, {'s': 2.0, 'i': 3, 'd': 0.5}, {'s': 2.5, 'i': 2, 'd': 0.25}, {'s': 2.75, 'i': 0, 'd': 0.125}, {'s': 2.875, 'i': 2, 'd': 0.125}], 'rhythm': [1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1]}


In [113]:
# Analysis of different TS frequencies
frequency = dict()
for records in database:
    ks = str(records.numerator) +"/" + str(records.denominator)
    if ks in frequency:
        frequency[ks] += 1
    else:
        frequency[ks] = 1
frequency

{'3/4': 11231,
 '2/4': 10283,
 '3/8': 4629,
 '10/4': 6,
 '4/4': 11664,
 '5/8': 149,
 '6/8': 4960,
 '12/8': 309,
 '4/8': 1944,
 '2/8': 182,
 '8/8': 49,
 '6/4': 670,
 '7/4': 11,
 '9/8': 526,
 '9/4': 8,
 '4/16': 2,
 '3/16': 372,
 '5/4': 55,
 '15/16': 1,
 '14/16': 1,
 '19/16': 1,
 '20/16': 1,
 '21/16': 1,
 '28/16': 1,
 '8/4': 247,
 '13/16': 6,
 '17/16': 3,
 '7/8': 12,
 '2/2': 130,
 '12/16': 178,
 '44/32': 3,
 '48/32': 1,
 '37/32': 2,
 '36/32': 1,
 '13/8': 3,
 '11/4': 5,
 '21/8': 1,
 '10/8': 1,
 '12/4': 3,
 '6/16': 126,
 '11/32': 1,
 '14/4': 1,
 '4/2': 17,
 '9/16': 1,
 '3/2': 2,
 '15/32': 1,
 '16/8': 2}

# 2. Extract Rhythm of the piece

In [137]:
piece = "../aligned/aligned/hand_picked_spotify-51/orchestra.mid"

In [138]:
#Find channel with lowest pitch (Now: basically channels using bass clef, any better way?)
mido_obj = mid_parser.MidiFile(piece)
minpitch = 129
chosen_channel = []
for idx, inst in enumerate(mido_obj.instruments):
    if inst.is_drum:
        continue
    total_pitch = 0
    total_note = 0
    for note in inst.notes:
        total_pitch += note.pitch
        total_note += 1
    avg_pitch = total_pitch/total_note
    # print(idx,avg_pitch)
    if avg_pitch <= 54:
        chosen_channel.append(idx)
print(chosen_channel)

[4, 8, 12, 13]


In [139]:
#Aggregate Notes from the selected channels
final_notelist = []
for channel in chosen_channel:
    for note in mido_obj.instruments[channel].notes:
        final_notelist.append(note)
final_notelist = sorted(final_notelist,key = lambda x:x.start)

In [149]:
import numpy as np
class lhMatchInstance:
    def __init__(self,key,chord,starttick,tpb,num,den,rhythm):
        self.chord = chord
        self.key = key
        self.starttick = starttick
        self.tpb = tpb
        self.numerator = num
        self.denominator = den
        self.rhythm = rhythm
    
    def __str__(self):
        return f"{self.key}{self.chord} rhythm {self.rhythm}"
    

def find_closest(arr,val):
    newlist = [abs(x-val) for x in arr]
    return np.argmin(newlist),np.min(newlist)

def rhythm_processing(notes,begin_tick,tpb,numerator,denominator):
    '''
    input:
        notes: list of notes in the miditoolkit.notes class
        bar_length: length of a bar in number of ticks
    returns:
        a 24D vector where each dimension = 1 if the corresponding time has a note onset.
    '''
    rhythm_list = [0 for _ in range(24)]
    bar_length = numerator * (0.5 ** (math.log(denominator,2)-2))
    interval = bar_length/24
    rhythm_tick = [i*interval for i in range(24)]
    for note in notes:
        onset = (note.start-begin_tick)/tpb
        idx,val = find_closest(rhythm_tick,onset)
        if val < interval/2:
            rhythm_list[idx] = 1
    return lhMatchInstance("Cmajor","I",begin_tick,tpb,numerator,denominator,rhythm_list)# TODO change to automatic chord label

tschanges = dict()
for ts in mido_obj.time_signature_changes:
    print(ts)
    tschanges[ts.time] = (ts.numerator,ts.denominator)
if len(tschanges) == 0:
    tschanges[0] = (4,4) #TODO manually add the time signature 
tpb = mido_obj.ticks_per_beat
numerator = tschanges[0][0]
denominator = tschanges[0][1]
add_interval = int(tpb*numerator*(0.5 ** (math.log(denominator,2)-2)))
current_tick = add_interval
begin_tick = 0    
notelist = []
tmp_notelist = []
del tschanges[0]
song_rhythm = []
for note in final_notelist:
    if note.start < current_tick:
        if note.end > current_tick:
            tmp_notelist.append(ct.Note(start=current_tick,end=note.end,pitch=note.pitch,velocity=note.velocity))
            notelist.append(ct.Note(start=note.start,end=current_tick,pitch=note.pitch,velocity=note.velocity))
        else:
            notelist.append(note)
    else:
        if notelist != []:
            song_rhythm.append(rhythm_processing(notelist,begin_tick,tpb,numerator,denominator))
        notelist = []
        begin_tick = current_tick
        if begin_tick in tschanges:
            numerator,denominator = tschanges[begin_tick]
            add_interval = int(tpb*numerator*(0.5 ** (math.log(denominator,2)-2)))
            del tschanges[begin_tick]
        current_tick += add_interval
        tmp2 = []
        for note2 in tmp_notelist:
            if note2.end > current_tick:
                tmp2.append(ct.Note(start=current_tick,end=note2.end,pitch=note2.pitch,velocity=note2.velocity))
                notelist.append(ct.Note(start=note2.start,end=current_tick,pitch=note2.pitch,velocity=note2.velocity))
            else:
                notelist.append(note2)
        tmp_notelist = tmp2
        if note.end > current_tick:
            tmp_notelist.append(ct.Note(start=current_tick,end=note.end,pitch=note.pitch,velocity=note.velocity))
            notelist.append(ct.Note(start=note.start,end=current_tick,pitch=note.pitch,velocity=note.velocity))
        else:
            notelist.append(note)
if notelist != []:
    song_rhythm.append(rhythm_processing(notelist,begin_tick,tpb,numerator,denominator))


In [151]:
#sanity check
print(song_rhythm[0])
print(song_rhythm[1])
print(song_rhythm[2])

Cmajor_I rhythm [1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Cmajor_I rhythm [1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Cmajor_I rhythm [1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0]


# 3. Extract Chord information

In [None]:
#Use the ipervious built thing
#Since then environment would need to be different, I assume the code is run independently and imported here using csv
import pandas as pd
# df = pd.read_csv("../data/chord.csv")
#get tick and chord information
#TODO now use C major I chord first

# 4. build the rest of the things!

In [None]:
import sys
sys.path.append("../melody_extraction")
from skyline import skyline_melody
import random

In [None]:
def match_rhythm():
    pass

def harmonize():
    pass

In [None]:
'''
for each bar in the selected bass track:
    find a record in db with same time signature and nearest note
    then, do harmonization based on the chord
    then insert the notes to the midi
    then combine with the melody obtained from skyline!! Yeah.
    #Try on self zoked melody first
'''
lh_notelist = []
prev_idx = -1
for bar in song_rhythm:
    idxs = match_rhythm(database,bar.rhythm,bar.numerator,bar.denominator)
    if prev_idx in idxs:
        idx = prev_idx
    else:
        idx = random.choice(idxs)
    lh_notelist.extend(harmonize(database[idx],bar))
rh_notelist = skyline_melody(piece)


mido_out = mid_parser.MidiFile()
mido_out.ticks_per_beat = tpb
track1 = ct.Instrument(program=0,is_drum=False,name='righthand')
track2 = ct.Instrument(program=0,is_drum=False,name='lefthand')
mido_out.instruments = [track1,track2]
for note in rh_notelist:
    mido_out.instruments[0].notes.append(ct.Note(start=note.onset,end=note.offset,pitch=note.pitch,velocity=30))
for note in lh_notelist:
    mido_out.instruments[1].notes.append(ct.Note(start=note.onset,end=note.offset,pitch=note.pitch,velocity=30))
mido_out.dump("result.mid")