In [2]:
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
from torch import tensor

from pathlib import Path

import pennylane as qml
from pennylane import numpy as np
from pennylane.templates import embeddings as emb
from pennylane.templates import layers as lay

from typing import Union

import matplotlib.pyplot as plt

from music21 import converter, instrument, note, chord, stream
import glob, pickle, time

In [20]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [4]:
!wget https://github.com/theerfan/Maqenta/raw/main/data/notes.pk

--2021-10-06 01:18:29--  https://github.com/theerfan/Maqenta/raw/main/data/notes.pk
Resolving github.com (github.com)... 140.82.114.4
Connecting to github.com (github.com)|140.82.114.4|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.githubusercontent.com/theerfan/Maqenta/main/data/notes.pk [following]
--2021-10-06 01:18:29--  https://raw.githubusercontent.com/theerfan/Maqenta/main/data/notes.pk
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 250124 (244K) [application/octet-stream]
Saving to: ‘notes.pk’


2021-10-06 01:18:29 (8.32 MB/s) - ‘notes.pk’ saved [250124/250124]



In [22]:
# Midi.py

notes_dir = "notes.pk"

class Midi:
    def __init__(self, seq_length, device):
        self.seq_length = seq_length
        self.device = device

        if Path(notes_dir).is_file():
            self.notes = pickle.load(open(notes_dir, "rb"))
        else:
            self.notes = self.get_notes()
            pickle.dump(self.notes, open(notes_dir, "wb"))

        self.network_input, self.network_output = self.prepare_sequences(self.notes)
        print(f"Input shape: {self.network_input.shape}")
        print(f"Output shape: {self.network_output.shape}")

    def get_notes(self):
        """Get all the notes and chords from the midi files in the ./midi_songs directory"""
        # This is assuming that every interval between notes is the same (0.5)
        notes = []

        for file in glob.glob("midi_songs/*.mid"):
            midi = converter.parse(file)

            print("Parsing %s" % file)

            notes_to_parse = None

            try:  # file has instrument parts
                s2 = instrument.partitionByInstrument(midi)
                notes_to_parse = s2.parts[0].recurse()
            except:  # file has notes in a flat structure
                notes_to_parse = midi.flat.notes

            for element in notes_to_parse:
                if isinstance(element, note.Note):
                    notes.append(str(element.pitch))
                elif isinstance(element, chord.Chord):
                    notes.append(".".join(str(n) for n in element.normalOrder))

        with open(notes_dir, "wb") as filepath:
            pickle.dump(notes, filepath)

        return notes

    def prepare_sequences(self, notes):
        """Prepare the sequences used by the Neural Network"""
        self.n_vocab = len(set(notes))

        # get all pitch names
        pitchnames = sorted(set(item for item in notes))

        # create a dictionary to map pitches to integers
        self.note_to_int = {note: number for number, note in enumerate(pitchnames)}
        self.int_to_note = {number: note for number, note in enumerate(pitchnames)}

        network_input = []
        network_output = []

        # create input sequences and the corresponding outputs
        for i in range(len(self.notes) - self.seq_length):
            sequence_in = self.notes[i : i + self.seq_length]
            sequence_out = self.notes[i + self.seq_length]
            network_input.append([self.note_to_int[char] for char in sequence_in])
            network_output.append(self.note_to_int[sequence_out])

        n_patterns = len(network_input)

        # reshape the input into a format compatible with LSTM layers
        # So this is actuallyt (number of different inputs, sequence length, number of features)
        network_input = np.reshape(network_input, (n_patterns, self.seq_length, 1))
        # normalize input
        network_input = network_input / float(self.n_vocab)

        # network_output = to_categorical(network_output)

        return (tensor(network_input, device=self.device), tensor(network_output, device=self.device))

    def create_midi_from_model(self, prediction_output, filename):
        """ convert the output from the prediction to notes and create a midi file
        from the notes """
        offset = 0
        output_notes = []

        # create note and chord objects based on the values generated by the model
        for pattern in prediction_output:
            # pattern is a chord
            if ('.' in pattern) or pattern.isdigit():
                notes_in_chord = pattern.split('.')
                notes = []
                for current_note in notes_in_chord:
                    new_note = note.Note(int(current_note))
                    new_note.storedInstrument = instrument.Piano()
                    notes.append(new_note)
                new_chord = chord.Chord(notes)
                new_chord.offset = offset
                output_notes.append(new_chord)
            # pattern is a note
            else:
                new_note = note.Note(pattern)
                new_note.offset = offset
                new_note.storedInstrument = instrument.Piano()
                output_notes.append(new_note)

            # increase offset each iteration so that notes do not stack
            offset += 0.5

        midi_stream = stream.Stream(output_notes)

        midi_stream.write('midi', fp=filename)


In [23]:
# QLSTM.py

Embedding = Union[emb.AngleEmbedding, emb.AmplitudeEmbedding, emb.BasisEmbedding]
Layer = Union[
    lay.BasicEntanglerLayers,
    lay.ParticleConservingU1,
    lay.ParticleConservingU2,
    lay.RandomLayers,
    lay.StronglyEntanglingLayers,
]


class QLSTMCell(nn.Module):
    def quantum_op(
        self,
        wires,
        embedding: Embedding = emb.AngleEmbedding,
        layer: Layer = lay.RandomLayers,
    ):
        def circuit_part(inputs, weights):
            embedding(inputs, wires=wires)
            if layer == lay.RandomLayers:
              seed = np.random.randint(1, 2**12)
              layer(weights, wires=wires, seed=seed)
            else:
              layer(weights, wires=wires)
            return [qml.expval(qml.PauliZ(wires=w)) for w in wires]

        return circuit_part

    def __init__(
        self,
        input_size,
        hidden_size,
        n_qubits=4,
        n_qlayers=1,
        dropout=0,
        batch_first=True,
        return_sequences=False,
        return_state=True,
        backend="default.qubit",
        device="cpu"
    ):
        super(QLSTMCell, self).__init__()
        self.n_inputs = input_size
        self.hidden_size = hidden_size
        self.concat_size = self.n_inputs + self.hidden_size
        self.n_qubits = n_qubits
        self.n_qlayers = n_qlayers
        self.backend = backend  # "default.qubit", "qiskit.basicaer", "qiskit.ibm"
        self.device = device # "cpu", "cuda"
        self.dropout = dropout

        self.batch_first = batch_first
        self.return_sequences = return_sequences
        self.return_state = return_state

        # self.dev = qml.device("default.qubit", wires=self.n_qubits)
        # self.dev = qml.device('qiskit.basicaer', wires=self.n_qubits)
        # self.dev = qml.device('qiskit.ibm', wires=self.n_qubits)
        # use 'qiskit.ibmq' instead to run on hardware

        self.wires_forget = [f"wire_forget_{i}" for i in range(self.n_qubits)]
        self.wires_input = [f"wire_input_{i}" for i in range(self.n_qubits)]
        self.wires_update = [f"wire_update_{i}" for i in range(self.n_qubits)]
        self.wires_output = [f"wire_output_{i}" for i in range(self.n_qubits)]

        self.dev_forget = qml.device(self.backend, wires=self.wires_forget)
        self.dev_input = qml.device(self.backend, wires=self.wires_input)
        self.dev_update = qml.device(self.backend, wires=self.wires_update)
        self.dev_output = qml.device(self.backend, wires=self.wires_output)

        self.qlayer_forget = qml.QNode(
            self.quantum_op(self.wires_forget), self.dev_forget, interface="torch"
        )

        self.qlayer_input = qml.QNode(
            self.quantum_op(self.wires_input), self.dev_input, interface="torch"
        )

        self.qlayer_update = qml.QNode(
            self.quantum_op(self.wires_update), self.dev_update, interface="torch"
        )

        self.qlayer_output = qml.QNode(
            self.quantum_op(self.wires_output), self.dev_output, interface="torch"
        )

        weight_shapes = {"weights": (n_qlayers, n_qubits)}
        print(f"weight_shapes = (n_qlayers, n_qubits) = ({n_qlayers}, {n_qubits})")

        self.clayer_in = torch.nn.Linear(self.concat_size, n_qubits)
        self.VQC = {
            "forget": qml.qnn.TorchLayer(self.qlayer_forget, weight_shapes).to(device),
            "input": qml.qnn.TorchLayer(self.qlayer_input, weight_shapes).to(device),
            "update": qml.qnn.TorchLayer(self.qlayer_update, weight_shapes).to(device),
            "output": qml.qnn.TorchLayer(self.qlayer_output, weight_shapes).to(device),
        }
        self.clayer_out = torch.nn.Linear(self.n_qubits, self.hidden_size)
        # self.clayer_out = [torch.nn.Linear(n_qubits, self.hidden_size) for _ in range(4)]

    def forward(self, x, init_states=None):
        """
        x.shape is (batch_size, seq_length, feature_size)
        recurrent_activation -> sigmoid
        activation -> tanh
        """
        # Automatically assumes single batch
        x = x.to(self.device)
        if len(x.shape) == 2:
            x = x.reshape(1, x.shape[0], x.shape[1])
        
        if self.batch_first is True:
            batch_size, seq_length, features_size = x.size()
        else:
            seq_length, batch_size, features_size = x.size()

        hidden_seq = []
        if init_states is None:
            h_t = torch.zeros(batch_size, self.hidden_size, device=self.device) # hidden state (output)
            c_t = torch.zeros(batch_size, self.hidden_size, device=self.device) # cell state
        else:
            # for now we ignore the fact that in PyTorch you can stack multiple RNNs
            # so we take only the first elements of the init_states tuple init_states[0][0], init_states[1][0]
            h_t, c_t = init_states
            # h_t = h_t[0]
            # c_t = c_t[0]

        for t in range(seq_length):
            # get features from the t-th element in seq, for all entries in the batch
            x_t = x[:, t, :]

            # Concatenate input and hidden state
            v_t = torch.cat((h_t, x_t), dim=1).float().to(device)

            # match qubit dimension
            y_t = self.clayer_in(v_t).to(self.device)
            
            f_t = torch.sigmoid(
                self.clayer_out(self.VQC["forget"](y_t).to(self.device))
            ).to(self.device)  # forget block
            i_t = torch.sigmoid(self.clayer_out(self.VQC["input"](y_t).to(self.device)))  # input block
            g_t = torch.tanh(self.clayer_out(self.VQC["update"](y_t).to(self.device)))  # update block
            o_t = torch.sigmoid(
                self.clayer_out(self.VQC["output"](y_t).to(self.device)).to(self.device)
            ).to(self.device)  # output block

            c_t = (f_t * c_t) + (i_t * g_t)
            h_t = o_t * torch.tanh(c_t)

            hidden_seq.append(h_t.unsqueeze(0))

        hidden_seq = torch.cat(hidden_seq, dim=0)
        hidden_seq = hidden_seq.transpose(0, 1).contiguous()

        # Wow, such pseudo-keras!
        h_t, c_t = h_t.float(), c_t.float()

        if self.dropout:
          F.dropout(h_t, self.dropout, inplace=True)

        if self.return_state:
            if self.return_sequences:
                return hidden_seq, (h_t, c_t)
            else:
                return (h_t, c_t)
        else:
            if self.return_sequences:
                return hidden_seq
            else:
                return h_t
    
    def predict(self, x, init_states=None):
        return self.forward(x, init_states)

In [24]:
class QLSTM(nn.Module):

  def __init__(self, input_size: int, hidden_size: int, n_layers: int, n_qubits: list, n_qlayers: list, dropouts: list, backend="default.qubit", device="cpu"):
    super(QLSTM, self).__init__()
    self.models = nn.ModuleList()
    self.n_layers = n_layers
    for i in range(self.n_layers):
      self.models.append(
          QLSTMCell(input_size, hidden_size, n_qubits[i], n_qlayers[i], dropouts[i], backend=backend, device=device)
      )
    
  def forward(self, note_sequences):
    # A tuple of (h_t, c_t)
    outputs = []
    h_t_c_t = None
    for i in range(self.n_layers):
      h_t_c_t = self.models[i](note_sequences[i], h_t_c_t)
      # Only output c_t
      outputs.append(h_t_c_t[1])
    
    # print(outputs)
    return torch.stack(outputs)

  def predict(self, note_sequences):
    return self.forward(note_sequences)


In [26]:
# LSTMusic.py

# import torch
# import torch.nn as nn
# from torch import optim
# import torch.nn.functional as F

# from QLTSM.qlstm import QLSTM

import numpy as np


class LSTMusic(nn.Module):
    def __init__(
        self,
        n_qlayers=1,
        n_layers=1,
        dropout=0.3,
        n_vocab=None,
        input_dim=1,
        hidden_dim=512,
        n_qubits=4,
        backend="default.qubit",
        device="cpu",
    ):
        super(LSTMusic, self).__init__()
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers

        # The LSTM takes word embeddings as inputs, and outputs hidden states
        # with dimensionality hidden_dim.
        if n_qubits > 0:
            print(f"Generator will use Quantum LSTM running on backend {backend}")
            r_n = range(n_layers)
            n_qubits = [n_qubits for _ in r_n]
            n_qlayers = [n_qlayers for _ in r_n]
            dropouts = [dropout for _ in r_n]
            self.model = QLSTM(input_dim, hidden_dim, n_layers, n_qubits, n_qlayers, dropouts, device=device).to(device)

            # self.model = QLSTM(
            #     input_dim,
            #     hidden_dim,
            #     n_qubits=n_qubits,
            #     backend=backend,
            #     return_state=True,
            #     device=device,
            # ).to(device)
        else:
            print("Generator will use Classical LSTM")
            self.model = nn.LSTM(input_dim, hidden_dim)

        # The linear layer that maps from hidden state space to tag space

    def forward(self, note_sequences):
        ct_list = self.model(note_sequences)
        scores = []
        for c_t in ct_list:
          scores.append(F.log_softmax(c_t, dim=1))
        # (h_t, c_t) = self.model(note_sequences)
        # c_t = self.model(note_sequence)
        # scores = F.log_softmax(c_t, dim=1)
        ct_list = torch.stack(scores)
        return ct_list.reshape(ct_list.shape[0], ct_list.shape[2])

        # embeds = self.word_embeddings(sentence)
        # lstm_out, _ = self.model(embeds.view(len(sentence), 1, -1))
        # tag_logits = self.hidden2tag(lstm_out.view(len(sentence), -1))
        # tag_scores = F.log_softmax(tag_logits, dim=1)
        # return tag_scores

    def train(
        self,
        mode=True,
        inputs=None,
        outputs=None,
        n_epochs=None,
        cutoff: int = None,
        learning_rate=0.1,
    ):
        # Same as categorical cross entropy, who would've thought?!
        if mode == False:
            return
        loss_function = nn.NLLLoss()
        optimizer = optim.SGD(self.model.parameters(), lr=learning_rate)

        if cutoff:
            inputs = inputs[:cutoff]
            outputs = outputs[:cutoff]

        history = {"loss": []}

        midi_data = list(zip(inputs, outputs))

        for epoch in range(n_epochs):
            counter = 0
            losses = []

            for i in range(0, len(midi_data) - self.n_layers, self.n_layers):
              data = midi_data[i:i+self.n_layers]
              note_seqs = [datum[0] for datum in data]
              next_notes = torch.stack([datum[1] for datum in data])
              self.zero_grad()
              c_t_list = self(note_seqs)
              # c_t_list = c_t_list.reshape(c_t_list.shape[0], c_t_list.shape[2])
              # print(c_t_list.shape, next_notes.reshape(self.n_layers).shape)
              loss = loss_function(c_t_list, next_notes.reshape(self.n_layers).long())
              loss.backward()
              optimizer.step()
              losses.append(float(loss))
              if counter % 5 == 0:
                print(f"On datapoint #{counter} out of {cutoff}")
              counter += 1

            avg_loss = np.mean(losses)
            history["loss"].append(avg_loss)
            print("Epoch {} / {}: Loss = {:.3f}".format(epoch + 1, n_epochs, avg_loss))
        return history

    def generate_notes(self, network_input, int_to_note, n_vocab, n_notes):
        """Generate notes from the neural network based on a sequence of notes"""
        # pick a random sequence from the input as a starting point for the prediction
        with torch.no_grad():
            start = np.random.randint(0, len(network_input) - 1)

            pattern = network_input[start]
            prediction_output = []

            # generate 500 notes
            for _ in range(n_notes):
                prediction_input = pattern.clone().detach().reshape(1, len(pattern), 1)
                # prediction_input = prediction_input / float(n_vocab)

                (h_t, prediction) = self.model.predict(prediction_input)

                index = prediction.argmax()
                result = int_to_note[int(index)]
                prediction_output.append(result)

                added_index = (index / n_vocab).reshape(1, 1)

                pattern = torch.cat((pattern, added_index), 0)
                # pattern.append(index)
                pattern = pattern[1 : len(pattern)]

            return prediction_output


In [27]:
seq_length = 100
n_epochs = 1
cutoff = 200
n_qubits = 4
n_layers = 4
n_qlayers = 2

model_name = f"lstm{n_layers}-seq{seq_length}-cut{cutoff}-epcs{n_epochs}-qu{n_qubits}-nq{n_qlayers}"
model_str = f"{model_name}.pt"

In [28]:
print("Initialized Midi")
midi = Midi(seq_length, device)

Initialized Midi
Input shape: torch.Size([44756, 100, 1])
Output shape: torch.Size([44756])


In [29]:
print("Initialized LSTM")
lstm = LSTMusic(n_qlayers=n_qlayers, n_layers=n_layers, hidden_dim=midi.n_vocab, device=device).to(device)

# TODO: Separate input data to test/train

if Path(model_str).is_file():
    print("Loading model")
    lstm.load_state_dict(torch.load(model_str))
    lstm.eval()
    # lstm = torch.load(model_str)
else:
    print("Training LSTM")
    train_history = lstm.train(
        True, midi.network_input, midi.network_output, n_epochs=n_epochs, cutoff=cutoff
    )
    torch.save(lstm.state_dict(), model_str)

Initialized LSTM
Generator will use Quantum LSTM running on backend default.qubit
weight_shapes = (n_qlayers, n_qubits) = (2, 4)
weight_shapes = (n_qlayers, n_qubits) = (2, 4)
weight_shapes = (n_qlayers, n_qubits) = (2, 4)
weight_shapes = (n_qlayers, n_qubits) = (2, 4)
Training LSTM
On datapoint #0 out of 200
On datapoint #5 out of 200
On datapoint #10 out of 200
On datapoint #15 out of 200
On datapoint #20 out of 200
On datapoint #25 out of 200
On datapoint #30 out of 200
On datapoint #35 out of 200
On datapoint #40 out of 200
On datapoint #45 out of 200
Epoch 1 / 1: Loss = 5.674


In [30]:
def generate_notes(self, network_input, int_to_note, n_vocab, n_notes, n_layers):
        """Generate notes from the neural network based on a sequence of notes"""
        # pick a random sequence from the input as a starting point for the prediction
        with torch.no_grad():
            req_size = n_notes//n_layers
            start = np.random.randint(0, len(network_input) - req_size)

            # pattern = network_input[start]
            prediction_output = []

            # generate n_notes
            for i in range(start, start + n_notes, n_layers):
                # print(network_input[i:i+n_layers].shape)
                prediction_input = network_input[i:i+n_layers]
                # prediction_input = pattern.clone().detach().reshape(1, len(pattern), 1)
                # prediction_input = prediction_input / float(n_vocab)

                ct_list = self.model.predict(prediction_input)

                for prediction in ct_list:
                  index = prediction.argmax()
                  result = int_to_note[int(index)]
                  prediction_output.append(result)

                # added_index = (index / n_vocab).reshape(1, 1)
                # pattern = torch.cat((pattern, added_index), 0)
                # pattern.append(index)
                # pattern = pattern[1 : len(pattern)]

            return prediction_output

In [32]:
print("Generating notes")
notes = generate_notes(
    lstm, midi.network_input, midi.int_to_note, midi.n_vocab, n_notes=20, n_layers=4
)
notes

Generating notes


['8.1',
 'A4',
 'C6',
 'E5',
 '8.1',
 'A4',
 'C6',
 'E5',
 '8.1',
 'A4',
 'C6',
 'E5',
 '8.1',
 'A4',
 'C6',
 'E5',
 '8.1',
 'A4',
 'C6',
 'E5']

In [18]:
print("Saving as MIDI file.")
midi.create_midi_from_model(notes, f"{model_name}_generated.mid")

Saving as MIDI file.
