In [317]:
# Include necessary imports
import os
import torch 
import torch.nn as nn
import pandas as pd
from torch.utils.data import DataLoader, TensorDataset
from music21 import *
import numpy as np
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import MinMaxScaler
from torch.nn.utils.rnn import pad_sequence
from transformers import T5ForConditionalGeneration, T5Tokenizer
import random

In [318]:
# Preprocess the data

def detect_tonic(df):
    bass_notes = df.iloc[3].values
    unique, counts = np.unique(bass_notes, return_counts=True)
    
    most_frequent_note = unique[np.argmax(counts)]
    return most_frequent_note

def key_transposition(df):
    tonic_note = detect_tonic(df)
    
    transpose_val = 48 - tonic_note
    df += transpose_val
    
    df = df.clip(lower=0, upper=127)
    return df

# Min-Max normalization technique
def normalize_df(df):
    X_std = df / 127
    return X_std

folder_path = 'Data/'
test = []
train = []
validation = []
for dirname in os.listdir(folder_path):
    if dirname != '.DS_Store':
        for filename in os.listdir(folder_path + dirname):
            if filename != '.ipynb_checkpoints':
                df = pd.read_csv(folder_path + dirname + '/' + filename)
                transposed_df = key_transposition(df.transpose())
                normalized_df = normalize_df(transposed_df)
                if dirname == 'test':
                    test.append(normalized_df)
                if dirname == 'train':
                    train.append(normalized_df)
                if dirname == 'valid':
                    validation.append(normalized_df)

# Model

In [319]:
class Model(torch.nn.Module):
    def __init__(self, input_size, output_size, hidden_dim=40, n_layers=2, dropout_rate=.3):
        super(Model, self).__init__()
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        self.lstm = torch.nn.LSTM(input_size, hidden_dim, n_layers, batch_first=True, dropout=dropout_rate)
        self.fc = torch.nn.Linear(hidden_dim, output_size * 128)
        self.dropout = torch.nn.Dropout(dropout_rate)

    def init_hidden(self, batch_size):
        return (torch.zeros(self.n_layers, batch_size, self.hidden_dim).to(next(self.parameters()).device),
                torch.zeros(self.n_layers, batch_size, self.hidden_dim).to(next(self.parameters()).device))

    def forward(self, x, target=None, hidden=None, teacher_forcing=.7):
        batch_size, seq_len, _ = x.size()
        if hidden is None:
            hidden = self.init_hidden(batch_size)
        lstm_input = x[:, 0:1, :]

        outputs = []

        for t in range(seq_len):
            lstm_output, hidden = self.lstm(lstm_input, hidden)
            lstm_output_step = lstm_output[:, -1, :] 
            model_output_step = self.fc(lstm_output_step)
            model_output_step = model_output_step.view(batch_size, 3, 128)

            outputs.append(model_output_step.unsqueeze(1))
            
            if target is not None and random.random() < teacher_forcing:
                next_harmony = target[:, t:t+1, :]
            else:
                next_harmony = model_output_step.argmax(dim=-1).unsqueeze(1)

            next_harmony = next_harmony.view(batch_size, 1, 3)

            if t + 1 < seq_len:
                melody_at_next_step = x[:, t + 1:t + 2, :1]  
                lstm_input = torch.cat((melody_at_next_step, next_harmony), dim=2) 

        output = torch.cat(outputs, dim=1)
        return output

# Train

In [330]:
def train_model(model, optimizer, criterion, num_epochs):
    model.train()
    for song_index, song in enumerate(train[:10]):
        print(f"Training on song {song_index + 1}")
        
        melody = torch.tensor(song.iloc[0].values.reshape(-1, 1), dtype=torch.float32).unsqueeze(0).reshape(1, song.shape[1], 1)
        harmonies = torch.tensor(song.iloc[1:].values.T, dtype=torch.float32).unsqueeze(0)
        harmonies_with_zero = torch.zeros(1, song.shape[1], 3)
        melody_with_empty_harmonies = torch.cat((melody, harmonies_with_zero), dim = -1)
        harmonies_for_loss = harmonies_to_class(harmonies)
       
        
        for epoch in range(num_epochs):
            optimizer.zero_grad()
            output = model(melody_with_empty_harmonies, harmonies)
            output = output.reshape(-1, 128)
            harmonies_for_loss = harmonies_for_loss.reshape(-1)
            loss = criterion(output, harmonies_for_loss)
            loss.backward()
            optimizer.step()

            if (epoch + 1) % 10 == 0:
                print(f"Song {song_index + 1}, Epoch {epoch + 1}/{num_epochs}, Loss: {loss.item()}")
                if torch.isnan(loss):
                    print("harmonies: ", harmonies)
                    print("melody:" , melody)
                    print("output: ", output)
                    

        model.eval()
        total_val_loss = 0
        
        # validation songs
        with torch.no_grad():
            for val_song in validation:
                val_melody = torch.tensor(val_song.iloc[0].values.reshape(-1, 1), dtype=torch.float32).unsqueeze(0).reshape(1, val_song.shape[1], 1)
                val_harmonies = torch.tensor(val_song.iloc[1:].values.T, dtype=torch.float32).unsqueeze(0)
                val_harmonies = harmonies_to_class(val_harmonies)
                val_harmonies_with_zero = torch.zeros(1, val_song.shape[1], 3)
                val_melody_with_empty_harmonies = torch.cat((val_melody, val_harmonies_with_zero), dim=-1)
                
                val_output = model(val_melody_with_empty_harmonies)
                val_output = val_output.reshape(-1, 128)
                val_harmonies = val_harmonies.reshape(-1)
                
                val_loss = criterion(val_output, val_harmonies)
                total_val_loss += val_loss.item()

        average_val_loss = total_val_loss / len(validation)
        print(f"Validation Loss after song {song_index + 1}: {average_val_loss}")

In [331]:
def harmonies_to_class(harmonies):
    harmonies_classes = torch.round(harmonies * 127).long()  
    return harmonies_classes


criterion = torch.nn.CrossEntropyLoss()
num_epochs = 100

model = Model(4, 3)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
train_model(model, optimizer, criterion, num_epochs)

Training on song 1
Song 1, Epoch 10/100, Loss: 4.768758296966553
Song 1, Epoch 20/100, Loss: 4.4057745933532715
Song 1, Epoch 30/100, Loss: 3.623469829559326
Song 1, Epoch 40/100, Loss: 2.835541009902954
Song 1, Epoch 50/100, Loss: 2.3465893268585205
Song 1, Epoch 60/100, Loss: 2.1647231578826904
Song 1, Epoch 70/100, Loss: 2.1007566452026367
Song 1, Epoch 80/100, Loss: 2.075883388519287
Song 1, Epoch 90/100, Loss: 2.0496203899383545
Song 1, Epoch 100/100, Loss: 2.0485832691192627
Validation Loss after song 1: 4.21611886146741
Training on song 2
Song 2, Epoch 10/100, Loss: 3.9056408405303955
Song 2, Epoch 20/100, Loss: 3.2147433757781982
Song 2, Epoch 30/100, Loss: 2.757871389389038
Song 2, Epoch 40/100, Loss: 2.4775021076202393
Song 2, Epoch 50/100, Loss: 2.287827253341675
Song 2, Epoch 60/100, Loss: 2.1536097526550293
Song 2, Epoch 70/100, Loss: 2.097165584564209
Song 2, Epoch 80/100, Loss: 2.0674352645874023
Song 2, Epoch 90/100, Loss: 2.051413059234619
Song 2, Epoch 100/100, Loss: 

# Hyperparameter Tuning

In [None]:
learning = [0.01, .001]
n_layers= [1,2,3]
hidden_dim = [20, 40, 50]
epochs= [5000, 10000]
best_loss = float('inf')
best_params = {}

for LR in learning:
    for n_layer in n_layers:
        for epoch in epochs:
            for dims in hidden_dim:
                print(f"Training with LR={LR} and n_layers={n_layer} and epochs={epoch} and hidden_dims={dims}")
                model = Model(input_size=1, output_size=harmonies.shape[2], n_layers=n_layer, hidden_dim=dims)
                optimizer = torch.optim.Adam(model.parameters(), lr=LR)
                train_model(model, melody, harmonies, optimizer, criterion, epoch)
                with torch.no_grad():
                    output = model(melody)
                    loss = criterion(output, harmonies)
                    print(f"Final Loss: {loss.item()}")        
                # Keep track of the best model (with lowest loss)
                if loss.item() < best_loss:
                    best_loss = loss.item()
                    best_params = {'learning': LR, 'n_layers': n_layer, 'epochs': epoch, 'hidden_dim': dims}
print("BEST: ", best_params)

In [328]:
def inverse_df(df):
    X_scaled = df * 127
    return X_scaled

def midi_to_note_melody(part):
    result = stream.Part()
    count = 1
    prev = round(part[0])
    for i in range(1, len(part)):
        pitch = part[i]
        curr = round(pitch)
        if curr == prev:
            count += 1
        else:
            this_note = note.Note(prev, quarterLength=count/4)
            result.append(this_note)
            count = 1
        prev = curr
    this_note = note.Note(prev, quarterLength=count/4)
    result.append(this_note)
    return result

def midi_to_note_harmonies(part):
    probabilities = torch.softmax(torch.tensor(part), dim=-1).numpy()
    print(probabilities)
    result = stream.Part()
    count = 1
    predicted_notes = np.argmax(part, axis=1)
    print(predicted_notes)
    prev = predicted_notes[0]
    for i in range(1, len(part)):
        curr = predicted_notes[i]
        if curr == prev:
            count += 1
        else:
            this_note = note.Note(prev, quarterLength=count/4)
            result.append(this_note)
            count = 1
        prev = curr
    this_note = note.Note(prev, quarterLength=count/4)
    result.append(this_note)
    return result

def output_to_sheet_music(melody, result):
    result_numpy = result.squeeze(0).detach().numpy()
    melody = inverse_df(melody).squeeze()
    inversed = result_numpy.transpose(1, 0, 2) 
    
    score = stream.Score()
    melody_part = midi_to_note_melody(melody.numpy())
    
    alto_notes = inversed[0]
    tenor_notes = inversed[1]
    bass_notes = inversed[2]  
    
    alto_part = midi_to_note_harmonies(alto_notes)
    tenor_part = midi_to_note_harmonies(tenor_notes)
    bass_part = midi_to_note_harmonies(bass_notes)

    score.append(melody_part)
    score.append(alto_part)
    score.append(tenor_part)
    score.append(bass_part)
    score.show('midi')
    score.write('musicxml', 'output.xml')

In [332]:
test_song = test[0]
melody = test_song.iloc[0]

melody = torch.tensor(test_song.iloc[0].values.reshape(-1,1), dtype=torch.float32).unsqueeze(0).reshape(1, test_song.shape[1], 1)
harmonies = torch.tensor(test_song.iloc[1:].values.T, dtype=torch.float32).unsqueeze(0)
harmonies_with_zero = torch.zeros(1, test_song.shape[1], 3)

model_input = torch.cat((melody, harmonies_with_zero), dim = -1)
result = model(model_input)
output_to_sheet_music(melody, result)

[[1.1552622e-03 1.1503743e-03 9.8342227e-04 ... 1.1710112e-03
  1.3715901e-03 1.1881663e-03]
 [3.4769218e-05 2.6771555e-05 2.1490961e-05 ... 3.8753926e-05
  4.4865825e-05 3.5381920e-05]
 [1.6395101e-05 1.1469007e-05 8.9925252e-06 ... 1.8158784e-05
  2.0702908e-05 1.5664140e-05]
 ...
 [1.3433211e-05 9.0677722e-06 6.8864083e-06 ... 1.5123687e-05
  1.6168968e-05 1.2281536e-05]
 [1.3432962e-05 9.0674830e-06 6.8862405e-06 ... 1.5123321e-05
  1.6168606e-05 1.2281203e-05]
 [1.3432710e-05 9.0672102e-06 6.8860595e-06 ... 1.5122966e-05
  1.6168273e-05 1.2280880e-05]]
[64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64
 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64
 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64
 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64
 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64
 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 6

In [218]:
# Finetune (hyperparameters, move around test data (refer to notes), etc)

In [None]:
# Test with new data + evaluate

In [None]:
# Make any other changes

In [None]:
# Sheet music + audio (musicAI)

In [None]:
# Create new models if time permits (follow steps 3 - 7)

In [None]:
# Compare models

In [None]:
# Front end ** if time permits
# - Interactive sheet music
# - musescore front end??

In [None]:
# real one and generated compare
# train on all songs + test on a different song
# measure the test loss not just the training loss

In [None]:
[60, 0, 0, 0] -> [60, 70, 70, 70] -> [61, ]