In [154]:
import os
import pandas as pd
import torch.nn as nn
import torch
import torch.nn.functional as F

# Data Pre-Processing

### Transfroming the data into more readable input to the model

In [155]:
def encode_song(song):
    result = []
    prev = {'note0': -1, 'note1': -1, 'note2': -1, 'note3': -1} # COME BACKKKK
    for index, row in song.iterrows():
        frame = []
        for voice in ['note0', 'note1', 'note2', 'note3']:
            pitch = row[voice]
            previous_pitch = prev[voice]
            
            tied = 1 if pitch == previous_pitch else 0
            frame.append((int(pitch), tied))
            prev[voice] = pitch
        result.append(frame)
    tensor = torch.tensor(result, dtype=torch.int64)
    return tensor

In [166]:
folder_path = 'Data/'
test = []
train = []
validation = []
for dirname in os.listdir(folder_path):
    if dirname != '.DS_Store':
        for filename in os.listdir(folder_path + dirname):
            if filename != '.ipynb_checkpoints':
                df = pd.read_csv(folder_path + dirname + '/' + filename)
                song = encode_song(df)
                if dirname == 'test':
                    test.append(song)
                if dirname == 'train':
                    train.append(song)
                if dirname == 'valid':
                    validation.append(song)

In [185]:
import torch
import torch.nn as nn

class Model(nn.Module):
    def __init__(self, input_dim, output_dim, hidden_dim=100, num_layers=2, dropout=0.3):
        super(Model, self).__init__()
        self.encoder = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True, dropout=dropout)
        self.decoder = nn.LSTM(hidden_dim, hidden_dim, num_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.input_proj = nn.Linear(6, hidden_dim)

    def forward(self, melody, target=None, teacher_forcing_ratio=0.5):
        batch_size = melody.size(0)
        seq_length = melody.size(1)

        encoder_outputs, (hidden, cell) = self.encoder(melody)
        decoder_input = torch.zeros(batch_size, 1, hidden.size(2))
        
        outputs = torch.zeros(batch_size, seq_length, self.fc.out_features)

        for t in range(seq_length):
            decoder_output, (hidden, cell) = self.decoder(decoder_input, (hidden, cell))
            
            output = self.fc(decoder_output.squeeze(1))
            outputs[:, t, :] = output

            use_teacher_forcing = True if torch.rand(1).item() < teacher_forcing_ratio else False
            
            if target is not None and use_teacher_forcing:   
                flattened_target = target[:, t, :].view(batch_size, -1)
                projected_target = self.input_proj(flattened_target).unsqueeze(1)
                decoder_input = projected_target
            else:
                ecoder_input = output.unsqueeze(1)
                
        probabilities = F.softmax(output, dim=-1)
        print("probabilities are: ", probabilities)
        predicted_harmony = torch.argmax(probabilities, dim=-1)
        print("predicted harmony: ", predicted_harmony)
        return outputs


In [186]:
def train_model(model, optimizer, criterion, num_epochs):
    model.train()

    for song_index, song in enumerate(train[:5]):
        print(f"Training on song {song_index + 1}")
        
        melody = song[:, 0, :].unsqueeze(0).float()
        print("Melody is: ", melody)
        harmonies = song[:, 1:, :].permute(1, 0, 2).float()
        print("Harmonies are: ", harmonies)

        harmonies_for_loss = harmonies_to_class(harmonies[:, :, 0])
        print("Harmonies for loss: ", harmonies_for_loss)

        for epoch in range(num_epochs):
            optimizer.zero_grad()
            
            ratio = 0.9 - (0.9 - 0.1) * (epoch / num_epochs)
            ratio = max(0.1, ratio)
            
            output = model(melody, target=harmonies, teacher_forcing_ratio=ratio)
            print("Output of the model is: ", output)
            
            output = output.view(-1, 128)
            harmonies_for_loss = harmonies_for_loss.view(-1) 
            
            loss = criterion(output, harmonies_for_loss)
            loss.backward()
            optimizer.step()
            
            if (epoch + 1) % 10 == 0:
                print(f"Song {song_index + 1}, Epoch {epoch + 1}/{num_epochs}, Loss: {loss.item()}")
        
        model.eval()
        total_val_loss = 0
        with torch.no_grad():
            for val_song in validation:
                val_melody = val_song[:, 0, :].unsqueeze(0).float()
                val_harmonies = val_song[:, 1:, :].permute(1, 0, 2).float() 
                val_harmonies_for_loss = harmonies_to_class(val_harmonies[:, :, 0])

                val_output = model(val_melody)
                val_output = val_output.view(-1, 128)
                val_harmonies_for_loss = val_harmonies_for_loss.view(-1)
                
                val_loss = criterion(val_output, val_harmonies_for_loss)
                total_val_loss += val_loss.item()

        average_val_loss = total_val_loss / len(validation)
        print(f"Validation Loss after song {song_index + 1}: {average_val_loss}")

        model.train()

In [187]:
def harmonies_to_class(harmonies):
    harmonies_classes = torch.round(harmonies * 127).long()  
    return harmonies_classes

criterion = torch.nn.CrossEntropyLoss()
num_epochs = 100

model = Model(2, 3)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)
train_model(model, optimizer, criterion, num_epochs)

Training on song 1
Melody is:  tensor([[[66.,  0.],
         [66.,  1.],
         [68.,  0.],
         [68.,  1.],
         [69.,  0.],
         [69.,  1.],
         [69.,  1.],
         [69.,  1.],
         [68.,  0.],
         [68.,  1.],
         [68.,  1.],
         [68.,  1.],
         [66.,  0.],
         [66.,  1.],
         [66.,  1.],
         [66.,  1.],
         [66.,  1.],
         [66.,  1.],
         [68.,  0.],
         [68.,  1.],
         [69.,  0.],
         [69.,  1.],
         [69.,  1.],
         [69.,  1.],
         [68.,  0.],
         [68.,  1.],
         [68.,  1.],
         [68.,  1.],
         [66.,  0.],
         [66.,  1.],
         [66.,  1.],
         [66.,  1.],
         [73.,  0.],
         [73.,  1.],
         [73.,  1.],
         [73.,  1.],
         [71.,  0.],
         [71.,  1.],
         [71.,  1.],
         [71.,  1.],
         [69.,  0.],
         [69.,  1.],
         [69.,  1.],
         [69.,  1.],
         [68.,  0.],
         [68.,  1.],
   

RuntimeError: shape '[-1, 128]' is invalid for input of size 540