In [67]:
import numpy as np
import miditoolkit
import math
import os
from tqdm import tqdm

In [68]:
# midi folder
# midi file -> quantize
# quantized midi file -> graph

In [69]:
#midi_file = "/home/ee/btech/ee1190531/scratch/Clarinet/Data/Midi/Collection/Original Collection/001.mid"
midi_file = "/Users/kshitij1/Documents/GitHub/Clarinet/Data/Midi/Collection/Original Collection/001.mid"

In [70]:
def process_midiFile(midi_file):
    mid_in=miditoolkit.midi.parser.MidiFile(midi_file)    
    notes = []
    for i in range(len(mid_in.instruments)):
        notes.extend(mid_in.instruments[i].notes)
    notes.sort(key=lambda x: x.start)
    return notes

In [72]:
notes = process_midiFile(midi_file)

In [73]:
def quantize(notes, planck=1, rest=True):
    start = notes[0].start

    for note in notes:
        note.start = note.start - start
        note.end = note.end - start
    
    notes.sort(key=lambda x: x.end)
    end = notes[-1].end
    notes.sort(key=lambda x: x.start)

    quantized_notes = []
    for i in range(int(end/planck)):
        quantized_notes.append([])
    
    for note in notes:
        cur_start = note.start
        cur_end = note.end
        first_idx = int(cur_start/planck)
        last_idx = int(cur_end/planck)
        pitch = note.pitch
        vel = note.velocity
        for i in range(first_idx, last_idx):
            quantized_notes[i].append((pitch,vel))
            
    if rest:
        for i in range(len(quantized_notes)):
            quantized_notes[i].append((129,-1))
    else:
        for i in range(len(quantized_notes)):
            if len(quantized_notes[i]) == 0:
                quantized_notes[i].append((129,-1))
    return quantized_notes

In [74]:
qn = quantize(notes, planck=1)

In [75]:
def makeMelodicTransitionMat(midi_folder):
    mat = np.zeros((131,131))
    for f in tqdm(midi_folder):
        mid_in=miditoolkit.midi.parser.MidiFile(f)    
        notes = mid_in.instruments[0].notes
        qn = quantize(notes, planck=1, rest=False)
        for i in range(len(qn)-1):
            note1 = qn[i][0]
            note2 = qn[i+1][0]
            p1 = note1[0]
            p2 = note2[0]
            mat[p1][p2] += 1
    # start idx = 0
    # actual (rest is idx=129)
    # end idx = 130

    for j in range(131):
        mat[0][j] = j
        mat[j][0] = j
    for j in range(131):
        mat[130][j] = j
        mat[j][130] = j

    for i in range(0,131):
        if np.sum(mat[i]) != 0:
            mat[i] = mat[i]/np.sum(mat[i])
    return mat
    

In [76]:
def makeNonMelodicTransitionMat(midi_folder):
    mat = np.zeros((131,131))
    for f in tqdm(midi_folder):
        mid_in=miditoolkit.midi.parser.MidiFile(f)    
        for channel in range(0,3):
            notes = mid_in.instruments[channel].notes
            qn = quantize(notes, planck=1)
            for i in range(len(qn)-1):
                box1 = qn[i]
                box2 = qn[i+1]
                for _ in range(5):
                    note1 = box1[np.random.randint(len(box1))]
                    note2 = box2[np.random.randint(len(box2))]
                    p1 = note1[0]
                    p2 = note2[0]
                    mat[p1][p2] += 1
    
    for j in range(131):
        mat[0][j] = j
        mat[j][0] = j
    for j in range(131):
        mat[130][j] = j
        mat[j][130] = j
    
    for i in range(131):
        if np.sum(mat[i]) != 0:
            mat[i] = mat[i]/np.sum(mat[i])
    return mat
    

In [77]:
def dumpMelodic(midi_folder,output_folder,num_files=5):
    files = [midi_folder+"/"+f for f in os.listdir(midi_folder) if f.endswith(".mid")]
    if num_files>0:
        files = files[:num_files]
    mat = makeMelodicTransitionMat(files)
    np.save(output_folder+"/melodic.npy",mat)

In [78]:
def dumpNonMelodic(midi_folder,output_folder,num_files=5):
    files = [midi_folder+"/"+f for f in os.listdir(midi_folder) if f.endswith(".mid")]
    files = files[:num_files]
    mat = makeNonMelodicTransitionMat(files)
    np.save(output_folder+"/nonmelodic.npy",mat)

In [79]:
output_folder = "../../../Data/Numpy/noBERT"
midi_folder = "/Users/kshitij1/Documents/GitHub/Clarinet/Data/Midi/Collection/Original Collection"

In [80]:
dumpMelodic(midi_folder,output_folder,-1)

100%|██████████| 909/909 [05:44<00:00,  2.64it/s]


In [81]:
dumpNonMelodic(midi_folder,output_folder,5)

100%|██████████| 5/5 [01:58<00:00, 23.67s/it]


In [82]:
def dumpWeight(numpy_folder):
    melodic = np.load(numpy_folder+"/melodic.npy")
    nonmelodic = np.load(numpy_folder+"/nonmelodic.npy")
    mat = np.zeros((131,131))
    for i in range(131):
        for j in range(131):
            if nonmelodic[i][j] != 0:
                mat[i][j] = melodic[i][j]/nonmelodic[i][j]
            else:
                mat[i][j] = melodic[i][j] * pow(10,5)
            if mat[i][j] == 0:
                mat[i][j] = pow(10,-5)
            mat[i][j] = -1 * math.log(mat[i][j])
    np.save(numpy_folder+"/weight.npy",mat)

In [83]:
dumpWeight(output_folder)

In [84]:
def loadWeight(numpy_folder):
    return np.load(numpy_folder+"/weight.npy")

In [85]:
weights = loadWeight(output_folder)

In [88]:
'''
i -> (i+2), (i+4), (i+5), (i+7), (i+9), (i+11)
7 > 5 > 4 > 2~9 > 11
'''
def check(weights):
    i = 60
    for j in range(1,12):
        print(i,i+j, weights[i][i+j])
check(weights)

60 61 5.735667006948658
60 62 3.3852935865695915
60 63 5.62164401674084
60 64 7.543923696512291
60 65 7.4476120529465835
60 66 7.6148182982917225
60 67 6.757105763356936
60 68 7.755368238889199
60 69 6.4051991270777675
60 70 0.4074777480555948
60 71 6.265489131405304


In [69]:
class Graph:
    def __init__(self, midi_file,weight_matrix):
        self.midi_file = midi_file
        self.box_list = quantize(process_midiFile(midi_file),planck=1)
        self.weight_matrix = weight_matrix
        self.layers = [[] for i in range(len(self.box_list))]
        # each layer will contain [ [(pitch,velocity,[list of weights corresponding next layer])], ... ]
    
    def create_graph(self):
        for i in range(len(self.box_list)-1):
            box1 = self.box_list[i]
            box2 = self.box_list[i+1]
            for note1 in box1:
                weight_list = []
                for note2 in box2:
                    weight_list.append(self.get_weights(note1,note2))
                self.layers[i].append([note1, weight_list])
    
    def shortestPath(self):
        '''
        input   -> layers information (nD array of numberoflayer,...)
                    layers number of neurons per layer
                -> dist (3D array of layer,start,end)
                    dist (layer idx, neuron in idx^th layer, neuron in idx+1^th layer)
                -> neurons (1D array of number of neurons in each layer)

        '''
        # neurons=[]
        # for i in range(len(self.layers)):
        #     neurons.append(len(self.layers[i]))
        # dist=np.zeros((len(self.layers),neurons[0],neurons[-1]))

        layers = []
        for i in range(len(self.layers)):
            layers.append(len(self.layers[i]))
        
        neurons = []
        for i in range(len(layers)):
            num_neurons = layers[i]
            neurons.append(np.arange(num_neurons))

        maxnum_neurons = max(layers)
        dist = np.zeros((len(layers),maxnum_neurons,maxnum_neurons))

        for i in range(len(layers)-1):
            for j in range(layers[i]):
                for k in range(layers[i+1]):
                    dist[i][j][k] = self.weight_matrix[self.layers[i][j][0][0]][self.layers[i+1][k][0][0]]

        emptyarr = [[] for _ in range(len(layers)+1)]
        shortestpaths = [emptyarr.copy() ,emptyarr.copy()]
        shortestpathsdist = []
        for i in range(len(layers)):
            shortestpathsdist.append(np.zeros((layers[i])))
        for i in tqdm(range(len(layers))):
            for j,neuron in enumerate(neurons[i]):
                if i == 0:
                    shortestpaths[0][j] = [j]
                    shortestpathsdist[i][j] = 0
                else:
                    dummy = [shortestpathsdist[i-1][k] + dist[i-1][k][j] for k in neurons[i-1]]
                    shortestpathsdist[i][j] = np.min(dummy)
                    prevstate = np.argmin(dummy)
                    shortestpaths[1][j] = (np.append(shortestpaths[0][prevstate].copy(), j))
            shortestpaths[0] = shortestpaths[1].copy()
            shortestpaths[1] = emptyarr.copy()
        return shortestpaths[0]
        
    def get_weights(self,note1,note2):
        p1 = note1[0]
        p2 = note2[0]
        return self.weight_matrix[p1][p2]
        

In [89]:
class Node:
    def __init__(self,pitch,velocity,weight_list,layer_idx,parent_idx):
        self.pitch = pitch
        self.velocity = velocity
        self.weight_list = weight_list
        self.layer_idx = layer_idx
        self.parent_idx = parent_idx

In [102]:
class Graph:
    def __init__(self, midi_file,weight_matrix):
        self.midi_file = midi_file
        self.box_list = quantize(process_midiFile(midi_file),planck=1)
        self.box_list = [[(0,-1)]] + self.box_list + [[(130,-1)]]
        self.weight_matrix = weight_matrix
        self.layers = [[] for i in range(len(self.box_list))]
        # each layer will contain [ [(pitch,velocity,[list of weights corresponding next layer])], ... ]
    
    def create_graph(self):
        for i in range(0,len(self.box_list)-1):
            box1 = self.box_list[i]
            box2 = self.box_list[i+1]
            for note1 in box1:
                weight_list = []
                for note2 in box2:
                    weight_list.append(self.get_weights(note1,note2))
                #self.layers[i].append([note1, weight_list])
                pitch = note1[0]
                velocity = note1[1]
                self.layers[i].append(Node(pitch=pitch,velocity=velocity,weight_list=weight_list,layer_idx=i,parent_idx=-1))
        cur = len(self.box_list)-1
        box = self.box_list[cur]
        for note in box:
            weight_list = []
            pitch = note[0]
            velocity = note[1]
            self.layers[cur].append(Node(pitch=pitch,velocity=velocity,weight_list=weight_list,layer_idx=cur,parent_idx=-1))
    
    def get_weights(self,note1,note2):
        p1 = note1[0]
        p2 = note2[0]
        return self.weight_matrix[p1][p2]

    def shortestPath(self):
        shortestpathsdist = []
        for i in range(len(self.layers)):
            shortestpathsdist.append(np.zeros((len(self.layers[i]))))
        shortestpathsdist[0][0] = 0
        for i in range(1,len(self.layers)-1):
            for j in range(len(self.layers[i])):
                for k in range(len(self.layers[i-1])):
                    newdist = shortestpathsdist[i-1][k] + self.weight_matrix[self.layers[i-1][k].pitch][self.layers[i][j].pitch]
                    if newdist < shortestpathsdist[i][j]:
                        shortestpathsdist[i][j] = newdist
                        self.layers[i][j].parent_idx = k
        return self.getPath()
    
    def getPath(self):
        path = []
        path.append(self.layers[-1][0])
        for i in range(len(self.layers)-2,0,-1):
            parent = path[-1].parent_idx
            path.append(self.layers[i][parent])
        path.reverse()
        notes = []
        for i in range(len(path)):
            notes.append(path[i].pitch)
        return notes
    
    def melody(self):
        pitches=self.shortestPath()
        melody=[]
        for i in range(len(pitches)):
            if len(melody)==0:
                melody.append(pitches[i])
            else:
                if pitches[i]!=melody[-1]:
                    melody.append(pitches[i])
        return(melody)
                    

In [103]:
G = Graph(midi_file,weights)
G.create_graph()

In [104]:
melody = G.melody()