In [1]:
import io
import re
import string
import tqdm
import os
import numpy as np
import string
from nltk.corpus import stopwords
from nltk import Text

from music21 import *

np.random.seed(42)

In [2]:
def transpose_notes(notes):
    midi_stream = stream.Stream(notes)
    ori_key = midi_stream.analyze('key')
    k_interval = interval.Interval(ori_key.tonic, key.Key('C').tonic)
    new_stream = midi_stream.transpose(k_interval)
    return new_stream

def softmax(vec):
    return np.exp(vec) / np.exp(vec).sum()

In [3]:
file_list = os.listdir(r"./scores/beethoven")
mxl_file_list = [file for file in file_list if '.mxl' in file]

beethoven_list = []

In [4]:
for mxl_file in mxl_file_list:
    beethoven = corpus.parse(f'beethoven/{mxl_file}')
    if 'major' in str(beethoven.analyze('key')):
        sb = beethoven.getElementsByClass('Part')
        for part in sb:
            if 'Violin' in str(part):
                sp_ind = [str(soprano) for soprano in sb].index(str(part))
                sp = sb[sp_ind]
                tn = transpose_notes(sp)
                note_list = [(str(notes.pitches)[22:-3],notes.beat) for notes in tn.recurse().notes if 'music' not in str(notes.pitches)[22:-3]]
                beethoven_list.append(note_list)

In [5]:
note_vertex, index = {}, 0  # start indexing from 1
for i in range(len(beethoven_list)):
    for beethoven_note in beethoven_list[i]:
        if beethoven_note not in note_vertex:
            note_vertex[beethoven_note] = index
            index += 1
vertex_size = len(note_vertex)
print(note_vertex)

{('C4', 1.0): 0, ('C4', 2.0): 1, ('D4', 2.5): 2, ('C4', 2.75): 3, ('B3', 3.0): 4, ('C4', 3.5): 5, ('G3', 1.0): 6, ('A3', 1.0): 7, ('C5', 1.0): 8, ('C5', 2.0): 9, ('D5', 2.5): 10, ('C5', 2.75): 11, ('B4', 3.0): 12, ('C5', 3.5): 13, ('D5', 1.0): 14, ('F4', 3.0): 15, ('E4', 1.0): 16, ('A4', 3.0): 17, ('F4', 3.75): 18, ('D4', 3.0): 19, ('E5', 1.0): 20, ('D5', 3.0): 21, ('D5', 2.0): 22, ('E5', 2.5): 23, ('D5', 2.75): 24, ('C#5', 3.0): 25, ('D5', 3.5): 26, ('F5', 1.0): 27, ('E5', 3.0): 28, ('E5', 2.0): 29, ('F5', 2.5): 30, ('E5', 2.75): 31, ('E5', 3.5): 32, ('G5', 1.0): 33, ('C5', 3.0): 34, ('A5', 2.5): 35, ('G5', 2.75): 36, ('F#5', 3.0): 37, ('G5', 3.5): 38, ('A5', 1.0): 39, ('B-5', 2.5): 40, ('A5', 2.75): 41, ('G#5', 3.0): 42, ('A5', 3.5): 43, ('B5', 1.0): 44, ('B5', 2.0): 45, ('B5', 3.0): 46, ('C6', 1.0): 47, ('E4', 2.5): 48, ('E4', 3.0): 49, ('D4', 1.0): 50, ('G4', 1.0): 51, ('F4', 2.5): 52, ('B3', 1.0): 53, ('F4', 1.0): 54, ('A4', 1.0): 55, ('G4', 2.5): 56, ('D4', 3.5): 57, ('C5', 2.5):

In [6]:
beethoven_list_total = []
for i in range(len(beethoven_list)):
    for j in range(len(beethoven_list[i])):
        beethoven_list_total.append(beethoven_list[i][j])
len(beethoven_list_total)

23703

In [7]:
text = Text(beethoven_list_total)
text.plot(20)
plt.show()

ValueError: The plot function requires matplotlib to be installed.See http://matplotlib.org/

In [None]:
str_note_vertex = [str(note_v) for note_v in note_vertex]
df = pd.DataFrame(0, index=str_note_vertex, columns=str_note_vertex)
for i in range(len(beethoven_list)):
    for j in range(len(beethoven_list[i])-1):
        df.loc[str(beethoven_list[i][j]),str(beethoven_list[i][j+1])] += 1
df = df.sort_index()
df = df.reindex(sorted(df.columns), axis=1)
df

In [None]:
plt.pcolor(df)

In [None]:
class Skipgram:
    X_train = []
    y_train = []
    
    def __init__(self, d, window_size, alpha, epochs):
        self.d = d    # dimension of embedding vector
        self.window_size = window_size    # window_size
        self.alpha = alpha    # learning rate
        self.epochs = epochs
        
    def preprocessing(self, sentences):
        self.words = {}    # 모든 단어 dict
        # sentence : 1개의 곡(선율)
        for sentence in sentences:
            # word : 1개의 음표
            for word in sentence:
                if word not in self.words:
                    self.words[word] = 1
                else:
                    self.words[word] += 1

        self.n = len(self.words)   # n : 모든 단어(음표)의 수
        self.words = sorted(list(self.words.keys()))
        self.word_dict = {word : i for i,word in enumerate(self.words)}

        for sentence in sentences:
            for i in range(len(sentence)):
                # 원핫인코딩
                center_word = np.zeros(self.n)
                center_word[self.word_dict[sentence[i]]] = 1
                context = np.zeros(self.n)
                for j in range(max(0, i-self.window_size), min(len(sentence)-1, i+self.window_size)):
                    context[self.word_dict[sentence[j]]] += 1
                self.X_train.append(center_word)
                self.y_train.append(context)
        
    def feedforward(self, X):
        self.h = np.dot(self.W1.T,X)
        self.u = np.dot(self.W2.T,self.h)
        self.y = softmax(self.u)
        return self.y
    
    def backpropagate(self,x,t):
        error_mat = np.zeros((int(t.sum()), self.n))
        for i, context_note in enumerate(np.where(t)[0]):
            y_true = np.zeros(self.n)
            y_true[context_note] = 1
            error = self.y - y_true
            error_mat[i] = error.T
        dedw2 = np.zeros(self.W2.shape)
        ei = error_mat.sum(axis=0)
        for i in range(self.d):
            for j in range(self.n):
                dedw2[i][j] = ei[j] * self.h[i]
        eh = ei*self.W2
        self.W2 -= self.alpha*dedw2
        self.W1 -= self.alpha*eh.T
        
    def fit(self, sentences):
        self.preprocessing(sentences)
        self.W1 = np.random.uniform(-0.8, 0.8, (self.n, self.d))
        self.W2 = np.random.uniform(-0.8, 0.8, (self.d, self.n))
        self.prevloss = int(1e9)
        for epoch in range(self.epochs):
            self.loss = 0
            for j in range(len(self.X_train)):
                self.feedforward(self.X_train[j])
                self.backpropagate(self.X_train[j],self.y_train[j])
                for m in np.where(self.y_train[j])[0]:
                    self.loss += -1*self.u[m]
                self.loss += int(self.y_train[j].sum())*np.log(np.sum(np.exp(self.u)))
            print("epoch ", epoch+1, " loss = ",self.loss)
            if self.loss > self.prevloss:
                print("Callback executed")
                break
            self.alpha *= 1/(1+self.alpha*epoch)
        
    def predict(self,word,number_of_predictions):
        if word in self.words:
            index = self.word_dict[word]
            X = [0 for i in range(self.n)]
            X[index] = 1
            prediction = self.feedforward(X)
            output = {}
            for i in range(self.n):
                output[prediction[i]] = i

            top_context_words = []
            for k in sorted(output,reverse=True):
                top_context_words.append(self.words[output[k]])
                if(len(top_context_words)>=number_of_predictions):
                    break

            return top_context_words
        else:
            print("Word not found in dictionary")

In [None]:
sg = Skipgram(4, 2, 0.001, 80)
sg.fit(beethoven_list)

In [None]:
plt.plot(range(80),sg.loss_list)

In [None]:
print(sg.predict(('C4', 1.0),10))
print(sg.predict(('C4', 2.0),10)) #2
print(sg.predict(('B4', 3.0),10)) #3
print(sg.predict(('C5', 3.5),10)) #3

print(sg.predict(('G4', 1.0),10)) #6
print(sg.predict(('C5', 2.5),10)) #4
print(sg.predict(('B4', 2.75),10)) #3
print(sg.predict(('A4', 3.0),10)) #6

print(sg.predict(('G4', 1.0),10)) #4
print(sg.predict(('G4', 2.0),10)) #6
print(sg.predict(('C5', 2.5),10)) #1
print(sg.predict(('B4', 2.75),10)) #3
print(sg.predict(('A4', 3.0),10)) #6
print(sg.predict(('F5', 3.5),10)) #9

print(sg.predict(('A4', 1.0),10))  #1

In [None]:
# plot embedded matrix and color points with respect to beat

N = len(df.columns)
beats = ['others','1.0','2.0','3.0','4.0']
labels = np.zeros(N)
for i in range(N):
    if '1.0' in df.columns[i]:
        labels[i] = 1
    if '2.0' in df.columns[i]:
        labels[i] = 2
    if '3.0' in df.columns[i]:
        labels[i] = 3
    if '4.0' in df.columns[i]:
        labels[i] = 4

import matplotlib.pyplot as plt
from sklearn.manifold import TSNE

cdict = {0: 'black', 1:'red', 2: 'green', 3:'orange', 4:'blue'}

model = TSNE()
transformed = model.fit_transform(sg.W1)

xs = transformed[:,0]
ys = transformed[:,1]
fig, ax = plt.subplots()
for g in np.unique(labels):
    ix = np.where(labels == g)
    ax.scatter(xs[ix], ys[ix], c = cdict[g], label = beats[int(g)], s = 10)
ax.legend()
plt.show()

In [None]:
# plot embedded matrix and color points with respect to pitch


labels = np.zeros(N)
tonics = ['others','C4','E4','G4']
for i in range(N):
    if 'C4' in df.columns[i]:
        labels[i] = 1
    if 'E4' in df.columns[i]:
        labels[i] = 2
    if 'G4' in df.columns[i]:
        labels[i] = 3

import matplotlib.pyplot as plt
from sklearn.manifold import TSNE

cdict = {0: 'black', 1:'red', 2: 'green', 3:'orange', 4:'blue'}

model = TSNE()
transformed = model.fit_transform(sg.W1)

xs = transformed[:,0]
ys = transformed[:,1]
fig, ax = plt.subplots()
for g in np.unique(labels):
    ix = np.where(labels == g)
    ax.scatter(xs[ix], ys[ix], c = cdict[g], label = tonics[int(g)], s = 10)
ax.legend()
plt.show()