# Setup

In [1]:
!wget https://github.com/ageron/handson-ml2/raw/master/datasets/jsb_chorales/jsb_chorales.tgz
!tar xzvf jsb_chorales.tgz
!pip install midiutil

--2022-04-21 19:11:13--  https://github.com/ageron/handson-ml2/raw/master/datasets/jsb_chorales/jsb_chorales.tgz
Resolving github.com (github.com)... 140.82.121.3
Connecting to github.com (github.com)|140.82.121.3|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.githubusercontent.com/ageron/handson-ml2/master/datasets/jsb_chorales/jsb_chorales.tgz [following]
--2022-04-21 19:11:13--  https://raw.githubusercontent.com/ageron/handson-ml2/master/datasets/jsb_chorales/jsb_chorales.tgz
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 117661 (115K) [application/octet-stream]
Saving to: ‘jsb_chorales.tgz.2’


2022-04-21 19:11:14 (634 KB/s) - ‘jsb_chorales.tgz.2’ saved [117661/117661]

x test/
x test/chorale_359.csv
x test/ch

Loading data

In [2]:
import os
import pandas as pd
import numpy as np
import torch


TRAIN_FOLDER = "train"
TEST_FOLDER = "test"
VALIDATION_FOLDER = "valid"

file_list = [os.path.join(TRAIN_FOLDER, x) for x in os.listdir(TRAIN_FOLDER)]\
 + [os.path.join(TEST_FOLDER, x) for x in os.listdir(TEST_FOLDER)]\
 + [os.path.join(VALIDATION_FOLDER, x) for x in os.listdir(VALIDATION_FOLDER)]

notes_data = [pd.read_csv(curr_file).to_numpy() for curr_file in file_list]
longest_music_length = max([curr_data.shape[0] for curr_data in notes_data])
final_notes_data = np.zeros((len(notes_data), longest_music_length, notes_data[0].shape[1]))
for idx, curr_note_data in enumerate(notes_data):
    final_notes_data[idx, :curr_note_data.shape[0], :] = curr_note_data
final_notes_data = torch.tensor(final_notes_data, dtype=torch.float)

In [3]:
max_val = final_notes_data.max()
final_notes_data /= max_val
lowest_val = final_notes_data.reshape([1, 1, final_notes_data.shape[0] * final_notes_data.shape[1] * final_notes_data.shape[2]]).unique().kthvalue(2)[0]

# LSTM Test

In [4]:
from torch import nn, optim
import torch


class MusicNet(nn.Module):
    def __init__(self, input_size=4, hidden_size=100, num_layers=1, dropout=0.1) -> None:
        super().__init__()
        self.hidden = [None, None]
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, dropout=dropout, batch_first=True)
        self.lin1 = nn.Linear(hidden_size, hidden_size) # Input Size = Output input_size
        self.relu = nn.ReLU()
        self.lin2 = nn.Linear(hidden_size, input_size)
    
    def forward(self, x):
        ini1, self.hidden = self.lstm(x) if self.hidden[0] is None else self.lstm(x, self.hidden)
        if torch.nan in ini1:
            print("AHHH!")
        return self.relu(self.lin2(self.relu(self.lin1(ini1))))


In [None]:
model = MusicNet(hidden_size=10)
criterion = nn.L1Loss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

In [None]:

from tqdm import trange


MAX_TRAIN_INPUT = 32
NUM_EPOCHS = 20
MAX_BATCH_SIZE = 1
# curr_song = torch.tensor([final_notes_data[0]], dtype=torch.float)
# print(curr_song.shape)
curr_data = final_notes_data[:MAX_BATCH_SIZE, :, :]
target = curr_data[:, :-MAX_TRAIN_INPUT, :]
# print(target.shape)
last_loss = None

with trange(NUM_EPOCHS, position=0) as progress_bar:
    for _ in progress_bar:
        pred = torch.zeros(target.shape)
        optimizer.zero_grad()

        last_slice_idx = curr_data.shape[1] - MAX_TRAIN_INPUT
        for i in range(last_slice_idx):
            curr_segment = curr_data[:, i: i + MAX_TRAIN_INPUT, :]
            # curr_segment = curr_song[:, i: i + MAX_TRAIN_INPUT, :]
            predicted_notes = model(curr_segment)
            # print(predicted_notes.shape)
            pred[:, i, :] = predicted_notes[:, 0, :]
            # pred.append(predicted_notes[:, 0, :])
            progress_bar.set_postfix(loss=last_loss, itr=f"{i}/{last_slice_idx}")

        # stacked_pred = torch.stack(pred).unsqueeze(dim=0)
        # print(pred)
        loss = criterion(target, pred)
        loss.backward()
        optimizer.step()
        model.hidden = [None, None]

        last_loss = float(loss.data)


In [None]:
# Music Generation

music_input = final_notes_data[MAX_BATCH_SIZE, :MAX_TRAIN_INPUT, :].unsqueeze(dim=0)
pred = torch.zeros([1, final_notes_data.shape[1], final_notes_data.shape[2]])
pred[:, :MAX_TRAIN_INPUT, :] = music_input
last_slice_idx = pred.shape[1] - MAX_TRAIN_INPUT

for i in range(last_slice_idx):
    curr_slice = pred[:, i: i + MAX_TRAIN_INPUT, :]
    predicted_notes = model(curr_slice)
    pred[:, i + MAX_TRAIN_INPUT, :] = predicted_notes[:, 0, :]

In [None]:
pred[pred > 1] = 1
lowest_val = final_notes_data.reshape([1, 1, final_notes_data.shape[0] * final_notes_data.shape[1] * final_notes_data.shape[2]]).unique().kthvalue(2)[0]
pred[pred < lowest_val / 2] = 0

In [None]:
pred *= max_val
pred = torch.round(pred)

# Feed Forward

In [12]:
from torch import nn, optim
import torch

class LinearMusicNet(nn.Module):
    def __init__(self, input_size=4, hidden_size=100, num_layers=1, dropout=0.1) -> None:
        super().__init__()
        self.lin1 = nn.Linear(input_size, hidden_size)
        self.lin2 = nn.Linear(hidden_size, hidden_size)
        self.lin3 = nn.Linear(hidden_size, input_size)
        self.relu = nn.ReLU()
    
    def forward(self, x):
        return self.relu(self.lin3(self.relu(self.lin2(self.relu(self.lin1(x))))))

In [13]:
model = LinearMusicNet(hidden_size=10)
criterion = nn.L1Loss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

In [15]:
from tqdm import trange


MAX_TRAIN_INPUT = 32
NUM_EPOCHS = 20
MAX_BATCH_SIZE = 1
# curr_song = torch.tensor([final_notes_data[0]], dtype=torch.float)
# print(curr_song.shape)
curr_data = final_notes_data[:MAX_BATCH_SIZE, :, :]
target = curr_data[:, :-MAX_TRAIN_INPUT, :]
# print(target.shape)
last_loss = None

with trange(NUM_EPOCHS, position=0) as progress_bar:
    for _ in progress_bar:
        pred = torch.zeros(target.shape)
        optimizer.zero_grad()

        last_slice_idx = curr_data.shape[1] - MAX_TRAIN_INPUT
        for i in range(last_slice_idx):
            curr_segment = curr_data[:, i: i + MAX_TRAIN_INPUT, :]
            predicted_notes = model(curr_segment)
            pred[:, i, :] = predicted_notes[:, 0, :]
            progress_bar.set_postfix(loss=last_loss, itr=f"{i}/{last_slice_idx}")

        loss = criterion(target, pred)
        loss.backward()
        optimizer.step()
        model.hidden = [None, None]

        last_loss = float(loss.data)


100%|██████████| 20/20 [00:07<00:00,  2.83it/s, itr=607/608, loss=0.137]


In [16]:
# Music Generation

music_input = final_notes_data[MAX_BATCH_SIZE, :MAX_TRAIN_INPUT, :].unsqueeze(dim=0)
pred = torch.zeros([1, final_notes_data.shape[1], final_notes_data.shape[2]])
pred[:, :MAX_TRAIN_INPUT, :] = music_input
last_slice_idx = pred.shape[1] - MAX_TRAIN_INPUT

for i in range(last_slice_idx):
    curr_slice = pred[:, i: i + MAX_TRAIN_INPUT, :]
    predicted_notes = model(curr_slice)
    pred[:, i + MAX_TRAIN_INPUT, :] = predicted_notes[:, 0, :]

In [17]:
pred[pred > 1] = 1
lowest_val = final_notes_data.reshape([1, 1, final_notes_data.shape[0] * final_notes_data.shape[1] * final_notes_data.shape[2]]).unique().kthvalue(2)[0]
pred[pred < lowest_val / 2] = 0

In [18]:
pred *= max_val
pred = torch.round(pred)

# CRNN with GRU
TODO: Implement

## Generate a MIDI File

In [19]:
from midiutil import MIDIFile

degrees  = [60, 62, 64, 65, 67, 69, 71, 72]  # MIDI note number
track    = 0
channel  = 0
time     = 0    # In beats
duration = 1    # In beats
tempo    = 60   # In BPM
volume   = 100  # 0-127, as per the MIDI standard

MyMIDI = MIDIFile(1)  # One track
MyMIDI.addTempo(track, time, tempo)

numpy_vals = pred.detach().numpy()[0, :, :]

for curr_channel in range(numpy_vals.shape[1]):
    print(numpy_vals[:, curr_channel])
    for i, pitch in enumerate(numpy_vals[:, curr_channel].tolist()):
        # print(pitch)
        if pitch != 0:
            MyMIDI.addNote(track, curr_channel, int(pitch), time + i, duration, volume)

with open("test-file.mid", "wb") as output_file:
    MyMIDI.writeFile(output_file)

[64. 64. 64. 64. 64. 64. 64. 64. 64. 64. 64. 64. 71. 71. 71. 71. 71. 71.
 71. 71. 71. 71. 71. 71. 69. 69. 69. 69. 71. 71. 71. 71.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0