In [1]:
import os
import pandas as pd 
import torch.nn as nn
import torch
import torch.nn.functional as F
from music21 import *
import random

# Data Pre-Processing

### Transfroming the data into more readable input to the model

In [2]:
def encode_song(song):
    result = []
    prev = {'note0': -1, 'note1': -1, 'note2': -1, 'note3': -1}
    result.append('START')
    
    for index, row in song.iterrows():
        for voice in ['note0', 'note1', 'note2', 'note3']:
            pitch = row[voice]
            previous_pitch = prev[voice]
            
            tied = 1 if pitch == previous_pitch else 0
            result.append((pitch,tied))
            prev[voice] = pitch
        result.append('|||')
    result.append('END')
    return result

In [9]:
folder_path = 'Data/'
test = []
train = []
validation = []
for dirname in os.listdir(folder_path):
    if dirname != '.DS_Store':
        for filename in os.listdir(folder_path + dirname):
            if filename != '.ipynb_checkpoints':
                df = pd.read_csv(folder_path + dirname + '/' + filename)
                song = encode_song(df)
                if dirname == 'test':
                    test.append(song)
                if dirname == 'train':
                    train.append(song)
                if dirname == 'valid':
                    validation.append(song)

['START', (66, 0), (61, 0), (57, 0), (54, 0), '|||', (66, 1), (61, 1), (57, 1), (54, 1), '|||', (68, 0), (61, 1), (59, 0), (54, 1), '|||', (68, 1), (61, 1), (59, 1), (54, 1), '|||', (69, 0), (66, 0), (61, 0), (54, 1), '|||', (69, 1), (66, 1), (61, 1), (56, 0), '|||', (69, 1), (66, 1), (61, 1), (57, 0), '|||', (69, 1), (66, 1), (61, 1), (59, 0), '|||', (68, 0), (65, 0), (61, 1), (61, 0), '|||', (68, 1), (65, 1), (61, 1), (61, 1), '|||', (68, 1), (65, 1), (59, 0), (49, 0), '|||', (68, 1), (65, 1), (59, 1), (49, 1), '|||', (66, 0), (66, 0), (57, 0), (50, 0), '|||', (66, 1), (66, 1), (57, 1), (50, 1), '|||', (66, 1), (66, 1), (57, 1), (50, 1), '|||', (66, 1), (66, 1), (57, 1), (50, 1), '|||', (66, 1), (66, 1), (59, 0), (50, 1), '|||', (66, 1), (66, 1), (59, 1), (50, 1), '|||', (68, 0), (66, 1), (59, 1), (50, 1), '|||', (68, 1), (66, 1), (59, 1), (50, 1), '|||', (69, 0), (66, 1), (61, 0), (49, 0), '|||', (69, 1), (66, 1), (61, 1), (49, 1), '|||', (69, 1), (66, 1), (61, 1), (47, 0), '|||', (

In [4]:
class Model(nn.Module):
    def __init__(self, input_size=2, hidden_size=128, output_size=127, num_harmony_parts=3, num_layers=1, dropout_rate=0.5):
        super(Model, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout_rate if num_layers > 1 else 0)
        
        self.fc_alto = nn.Linear(hidden_size, output_size)
        self.fc_tenor = nn.Linear(hidden_size, output_size)
        self.fc_bass = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        
        output_alto = self.fc_alto(lstm_out)
        output_tenor = self.fc_tenor(lstm_out)
        output_bass = self.fc_bass(lstm_out)
        
        output = torch.stack([output_alto, output_tenor, output_bass], dim=2)
        return output
    
class HarmonyLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers):
        super(HarmonyLSTM, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.alto_fc = nn.Linear(hidden_size, output_size)
        self.tenor_fc = nn.Linear(hidden_size, output_size)
        self.bass_fc = nn.Linear(hidden_size, output_size)
    
    def forward(self, melody, hidden_state=None):
        lstm_out, hidden_state = self.lstm(melody, hidden_state)
        alto = self.alto_fc(lstm_out)
        tenor = self.tenor_fc(lstm_out)
        bass = self.bass_fc(lstm_out)
        return alto, tenor, bass, hidden_state
    
    

    
    
    
# import torch.nn as nn
# import torch

# class Model(nn.Module):
#     def __init__(self, vocab_size, embedding_dim=64, hidden_size=128, output_size=127, num_harmony_parts=3, num_layers=1, dropout_rate=0.5):
#         super(Model, self).__init__()
        
#         # Embedding layer for tokenized chords
#         self.embedding = nn.Embedding(vocab_size, embedding_dim)
        
#         # LSTM with input size set to embedding_dim
#         self.lstm = nn.LSTM(embedding_dim, hidden_size, num_layers, batch_first=True, dropout=dropout_rate if num_layers > 1 else 0)
        
#         # Fully connected layers for each harmony part
#         self.fc_alto = nn.Linear(hidden_size, output_size)
#         self.fc_tenor = nn.Linear(hidden_size, output_size)
#         self.fc_bass = nn.Linear(hidden_size, output_size)

#     def forward(self, x):
#         # Convert token indices to embeddings
#         x = self.embedding(x)  # Shape: (batch_size, sequence_length, embedding_dim)
        
#         # LSTM layer
#         lstm_out, _ = self.lstm(x)  # Shape: (batch_size, sequence_length, hidden_size)
        
#         # Linear layers for each harmony part
#         output_alto = self.fc_alto(lstm_out)
#         output_tenor = self.fc_tenor(lstm_out)
#         output_bass = self.fc_bass(lstm_out)
        
#         # Stack outputs for alto, tenor, and bass
#         output = torch.stack([output_alto, output_tenor, output_bass], dim=2)  # Shape: (batch_size, sequence_length, 3, output_size)
#         return output

In [None]:
import random

def train_model(model, optimizer, criterion, num_epochs, teacher_forcing_ratio=0.7):
    model.train()
    for song_index, song in enumerate(train[:25]):
        print(f"Training on song {song_index + 1}")
        
        melody = song[:, 0, :].unsqueeze(0).float()
        harmonies = song[:, 1:, :].permute(1, 0, 2).float()
        harmonies = harmonies[:, :, 0]
        harmonies = harmonies.permute(1, 0).unsqueeze(0)
        hidden = None
        
        for epoch in range(num_epochs):
            optimizer.zero_grad()
            output = model(melody)
            
            loss = criterion(output.view(-1, 127), harmonies.reshape(-1).long())
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)
            optimizer.step()
            
            if (epoch + 1) % 10 == 0:
                print(f"Song {song_index + 1}, Epoch {epoch + 1}/{num_epochs}, Loss: {loss.item()}")
                print(f"Targets    : {harmonies}")
                print(f"Predictions: {output.argmax(dim=-1)}")
        
        model.eval()
        total_val_loss = 0
        with torch.no_grad():
            for val_song in validation:
                val_melody = val_song[:, 0, :].unsqueeze(0).float()
                val_harmonies = val_song[:, 1:, :].permute(1, 0, 2).float() 
                val_harmonies = val_harmonies[:, :, 0]
                val_harmonies = val_harmonies.permute(1, 0).unsqueeze(0)

                val_output = model(val_melody)
                val_loss = criterion(val_output.view(-1, 127), val_harmonies.reshape(-1).long())
                total_val_loss += val_loss.item()

        average_val_loss = total_val_loss / len(validation)
        print(f"Validation Loss after song {song_index + 1}: {average_val_loss}")

        model.train()


In [None]:
criterion = nn.CrossEntropyLoss()
num_epochs = 100

model = Model(2, 3)
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-5)
train_model(model, optimizer, criterion, num_epochs)

In [None]:
import numpy as np
import torch
from music21 import stream, note

def midi_to_note(part):
    result = stream.Part()
    count = 1
    prev = round(part[0])
    for i in range(1, len(part)):
        curr = round(part[i])
        if curr == prev:
            count += 1
        else:
            result.append(note.Note(prev, quarterLength=count / 4))
            count = 1
        prev = curr
    result.append(note.Note(prev, quarterLength=count / 4))
    return result

def output_to_sheet_music(melody, result):
    melody = melody[:, :, 0].squeeze()
    melody_part = midi_to_note(melody.numpy())
    print("melody part is: ", melody)

    result = torch.argmax(result, dim=-1)
    result = result.squeeze(0)

    alto_notes = result[:, 0].numpy()
    tenor_notes = result[:, 1].numpy()
    bass_notes = result[:, 2].numpy()

    alto_part = midi_to_note(alto_notes)
    print("alto part is: ", alto_notes)
    tenor_part = midi_to_note(tenor_notes)
    print("tenor part is: ", tenor_notes)
    bass_part = midi_to_note(bass_notes)
    print("bass part is: ", bass_notes)

    score = stream.Score()
    score.append(melody_part)
    score.append(alto_part)
    score.append(tenor_part)
    score.append(bass_part)

    score.show('midi')
    score.write('musicxml', 'output.xml')

In [None]:
test_song = test[2]

melody = test_song[:, 0, :].unsqueeze(0).float()
result = model(melody)
output_to_sheet_music(melody, result)

In [None]:
import torch
import torch.nn as nn

# Define the model output size as 127 (for each note class)
# Assuming model output shape: (batch_size, seq_len, 3, 127)
output = torch.randn(5, 10, 3, 127)  # Random output for demonstration
print(output)

# Assume target is structured as note indices for alto, tenor, and bass
# Shape of target: (batch_size, seq_len, 3) - with each entry as an index in [0, 126]
target = torch.randint(0, 127, (5, 10, 3))  # Random target for demonstration
print(target)

# CrossEntropyLoss expects shape (N, C) for input and (N) for target, so reshape
criterion = nn.CrossEntropyLoss()

# Reshape output to (batch_size * seq_len * 3, 127) and target to (batch_size * seq_len * 3)
loss = criterion(output.view(-1, 127), target.view(-1))
print(loss)