In [33]:
import os
import pandas as pd 
import torch.nn as nn
import torch
import torch.nn.functional as F
from music21 import *
import random

# Data Pre-Processing

### Transfroming the data into more readable input to the model

In [250]:
def encode_song(song):
    result = []
    prev = {'note0': -1, 'note1': -1, 'note2': -1, 'note3': -1}
    result.append('START')
    
    for index, row in song.iterrows():
        for voice in ['note0', 'note1', 'note2', 'note3']:
            pitch = row[voice]
            previous_pitch = prev[voice]
            
            tied = 1 if pitch == previous_pitch else 0
            result.append((pitch,tied))
            prev[voice] = pitch
        result.append('|||')
    result.append('END')
    return result

In [251]:
folder_path = 'Data/'
test_set = []
train_set = []
validation_set = []
for dirname in os.listdir(folder_path):
    if dirname != '.DS_Store':
        for filename in os.listdir(folder_path + dirname):
            if filename != '.ipynb_checkpoints':
                df = pd.read_csv(folder_path + dirname + '/' + filename)
                song = encode_song(df)
                if dirname == 'test':
                    test_set.append(song)
                if dirname == 'train':
                    train_set.append(song)
                if dirname == 'valid':
                    validation_set.append(song)

In [252]:
class Model(nn.Module):
    def __init__(self, embedding_dim=128, hidden_dim=256, vocab_size=259, num_layers=2, droput_rate=.3):
        super(Model, self).__init__()
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(in_features=hidden_dim, out_features=vocab_size)
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.dropout = nn.Dropout(droput_rate)
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.batch_size = 1

    def init_hidden_state(self):
        return (torch.zeros(self.num_layers, self.batch_size, self.hidden_dim),
                torch.zeros(self.num_layers, self.batch_size, self.hidden_dim))
    
    def forward(self, song, mask, hidden=None, teacher_forcing_ratio=None):
        if hidden is None:
            hidden = self.init_hidden_state()
            
        output = []
        x = song[0]
        
        for i in range(len(song)):
            x = self.embedding(torch.tensor(x, dtype=torch.long))
            x = x.unsqueeze(0).unsqueeze(0)
            curr, hidden = self.lstm(x, hidden)
            curr = self.dropout(curr)
            curr = self.fc(curr)
            
            if mask[i]:
                clamped_output = torch.zeros_like(curr)
                clamped_output[0, 0, song[i]] = 1.0
                output.append(clamped_output)
                curr = song[i]
            else: 
                if teacher_forcing_ratio and random.random() < teacher_forcing_ratio:
                    output.append(curr)
                    curr = song[i]
                else:
                    output.append(curr)
                    curr = curr.argmax(dim=2).item()
            x = curr
        output = torch.cat(output, dim=1).view(-1, self.fc.out_features)
        return output, hidden

In [261]:
def compute_mask_training(song):
    result = []
    result.append(True) # start
    for i in range(1, len(song) - 1):
        if i % 5 == 1 or i % 5 == 0:
            result.append(True)
        else:
            result.append(False)
    result.append(True) # end
    return result

def compute_mask_testing(song):
    result = []
    for i in range(len(song)):
        if i % 5 == 1 and i != len(song) - 1:
            result.append(True)
        else:
            result.append(False)
    return result

def embedding_dictionary():
    token_to_index = {("START"): 256, ("END"): 257, ("|||"): 258}
    for note in range(128):
        token_to_index[(note, 0)] = note
        token_to_index[(note, 1)] = note + 128
    return token_to_index

def harmonies_to_zero(song):
    result = []
    for i in range(len(song)):
        if i % 5 == 2 or i % 5 == 3 or i % 5 == 4:
            result.append(-1)
        else:
            result.append(song[i])
    return result
        
token_dictionary = embedding_dictionary()

def train_model(model, optimizer, criterion, num_epochs):
    for index, song in enumerate(train_set[:10]):
        model.train()
        melody_mask = compute_mask_training(song)
        embeded_song = [token_dictionary[token] for token in song]
        hidden = None
        for epoch in range(num_epochs):
            teacher_forcing_rate = max(0.5 * (1 - epoch / num_epochs), 0)
            optimizer.zero_grad()
            output, hidden = model(embeded_song, melody_mask, hidden, teacher_forcing_rate)
            loss = criterion(output, torch.tensor(embeded_song))
            loss.backward()
            hidden = tuple(h.detach() for h in hidden)
            optimizer.step()
            if (epoch + 1) % 10 == 0:
                print(f"Song {index + 1}, Epoch {epoch + 1}/{num_epochs}, Loss: {loss.item()}")
                
        with torch.no_grad():
            model.eval()
            total_val_loss = 0
            for val_song in validation_set:
                val_embeded_song = [token_dictionary[token] for token in val_song]
                val_melody_mask = compute_mask_testing(val_song)
                val_input_song = harmonies_to_zero(val_embeded_song)
                val_output, _ = model(val_input_song, val_melody_mask)
                val_loss = criterion(val_output, torch.tensor(val_embeded_song))
                total_val_loss += val_loss.item()
            print(f'Validation loss: {total_val_loss / len(validation_set)}')

In [262]:
criterion = nn.CrossEntropyLoss()
num_epochs = 100

model = Model()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-5)
train_model(model, optimizer, criterion, num_epochs)

Song 1, Epoch 10/100, Loss: 5.138826847076416
Song 1, Epoch 20/100, Loss: 5.098159313201904
Song 1, Epoch 30/100, Loss: 4.977606773376465
Song 1, Epoch 40/100, Loss: 4.549571990966797
Song 1, Epoch 50/100, Loss: 4.185001850128174
Song 1, Epoch 60/100, Loss: 4.035775661468506
Song 1, Epoch 70/100, Loss: 3.970472812652588
Song 1, Epoch 80/100, Loss: 3.9295005798339844
Song 1, Epoch 90/100, Loss: 3.9066169261932373
Song 1, Epoch 100/100, Loss: 3.896476984024048
Validation loss: 5.312666281675681
Song 2, Epoch 10/100, Loss: 4.404332160949707
Song 2, Epoch 20/100, Loss: 4.199508190155029
Song 2, Epoch 30/100, Loss: 4.06709623336792
Song 2, Epoch 40/100, Loss: 4.0031914710998535
Song 2, Epoch 50/100, Loss: 3.930015802383423
Song 2, Epoch 60/100, Loss: 3.895909309387207
Song 2, Epoch 70/100, Loss: 3.8473329544067383
Song 2, Epoch 80/100, Loss: 3.8211450576782227
Song 2, Epoch 90/100, Loss: 3.7989022731781006
Song 2, Epoch 100/100, Loss: 3.781846046447754
Validation loss: 4.908000921591734
Son

In [239]:
import numpy as np
import torch
from music21 import stream, note

def midi_to_note(part):
    result = stream.Part()
    count = 1
    prev = round(part[0])
    for i in range(1, len(part)):
        curr = round(part[i])
        if curr == prev:
            count += 1
        else:
            result.append(note.Note(prev, quarterLength=count / 4))
            count = 1
        prev = curr
    result.append(note.Note(prev, quarterLength=count / 4))
    return result

def process_sequence(sequence, delimiter_token="|||"):
    index_to_token = {v: k for k, v in token_dictionary.items()}
    original_sequence = [index_to_token[embedded_value] for embedded_value in sequence]
    original_sequence = original_sequence[1:-1]
    melody, alto, tenor, bass = [], [], [], []
    for i in range(0, len(original_sequence), 5):
        melody.append(original_sequence[i][0])
        alto.append(original_sequence[i+1][0])
        tenor.append(original_sequence[i+2][0])
        bass.append(original_sequence[i+3][0])
    
    return melody, alto, tenor, bass

def output_to_sheet_music(result):
    result = torch.argmax(result, dim=-1)
    result = result.squeeze(0)
    melody_notes, alto_notes, tenor_notes, bass_notes = process_sequence(result.numpy())

    melody_part = midi_to_note(melody_notes)
    alto_part = midi_to_note(alto_notes)
    tenor_part = midi_to_note(tenor_notes)
    bass_part = midi_to_note(bass_notes)

    score = stream.Score()
    score.append(melody_part)
    score.append(alto_part)
    score.append(tenor_part)
    score.append(bass_part)

    score.show('midi')
    score.write('musicxml', 'output.xml')

In [270]:
test_song = test_set[3]
embedded_test = [token_dictionary[token] for token in test_song]
test_mask = compute_mask_testing(test_song)
input_embedded_test = harmonies_to_zero(embedded_test)

output, _ = model(input_embedded_test, test_mask)
output_to_sheet_music(output)

[64, 52, 52, 45, 45, 45, 45, 45, 45, 61, 45, 45, 64, 64, 61, 61, 45, 64, 68, 68, 68, 68, 45, 64, 64, 61, 61, 61, 45, 52, 52, 52, 61, 45, 45, 59, 59, 45, 64, 61, 61, 61, 61, 61, 45, 45, 45, 45, 45, 57, 57, 57, 62, 59, 59, 59, 59, 61, 61, 61, 45, 45, 52, 52, 52, 64, 52, 45, 45, 59, 59, 59, 59, 59, 52, 45, 64, 64, 61, 61, 45, 64, 68, 68, 68, 68, 45, 64, 61, 61, 61, 61, 45, 52, 52, 52, 52, 59, 52, 45, 64, 64, 61, 61, 61, 61, 61, 61, 45, 45, 45, 45, 45, 57, 57, 57, 62, 59, 59, 59, 59, 61, 61, 61, 45, 45, 52, 52, 52, 64, 52, 45, 45, 64, 64, 64, 68, 68, 68, 68, 45, 45, 45, 56, 68, 68, 68, 64, 45, 64, 68, 68, 68, 45, 45, 45, 45, 45, 68, 68, 68, 68, 57, 57, 57, 66, 66, 66, 66, 66, 66, 68, 68, 52, 52, 52, 45, 61, 61, 61, 57, 68, 68, 68, 68, 64, 61, 61, 45, 45, 59, 59, 59, 59, 52, 45, 45, 45, 45, 45, 45, 45, 45, 59, 59, 59, 59, 59, 52, 45, 45, 45, 45, 45, 45, 52, 52, 68, 64, 45, 45, 45, 45, 52, 52, 52, 52, 52, 45, 61, 45, 45, 59, 59, 59, 59, 59, 45, 61, 61, 45, 64, 68, 68, 68, 68, 45, 64, 61, 61,

In [None]:
import torch
import torch.nn as nn

# Define the model output size as 127 (for each note class)
# Assuming model output shape: (batch_size, seq_len, 3, 127)
output = torch.randn(5, 10, 3, 127)  # Random output for demonstration
print(output)

# Assume target is structured as note indices for alto, tenor, and bass
# Shape of target: (batch_size, seq_len, 3) - with each entry as an index in [0, 126]
target = torch.randint(0, 127, (5, 10, 3))  # Random target for demonstration
print(target)

# CrossEntropyLoss expects shape (N, C) for input and (N) for target, so reshape
criterion = nn.CrossEntropyLoss()

# Reshape output to (batch_size * seq_len * 3, 127) and target to (batch_size * seq_len * 3)
loss = criterion(output.view(-1, 127), target.view(-1))
print(loss)