In [49]:
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt
from music21 import chord, note, stream, clef, meter
from data_cleaning import Open
import os

In [2]:
mid_file1 = os.path.join(os.getcwd(), "progression1.mid")
midi_data1 = Open.open_midi(mid_file1)
mid_file2 = os.path.join(os.getcwd(), "progression2.mid")
midi_data2 = Open.open_midi(mid_file2)

concatenated_array = []

In [13]:
def open_midi_files(dest_directory):
    for root, dirs, files in os.walk(dest_directory):
        for file in files:
            mid_file = os.path.join(root, file)
            midi_data = Open.open_midi(mid_file)
            for i, part in enumerate(midi_data.parts):
                for element in part.recurse():
                    if isinstance(element, chord.Chord):
                        concatenated_array.append(element)


open_midi_files(os.path.join(os.getcwd(), "sample_rock_set"))

[<music21.chord.Chord C3 G3 C4 D4 E4>, <music21.chord.Chord G2 G3 B3 D4 E4>, <music21.chord.Chord A2 G3 A3 C4 E4>, <music21.chord.Chord F2 C4 F4 G4>, <music21.chord.Chord C3 G3 C4 D4>, <music21.chord.Chord A2 A3 C4 E4>, <music21.chord.Chord F2 A3 C4 F4>, <music21.chord.Chord G2 G3 B3 E4>, <music21.chord.Chord C3 G3 B3 E4 G4>, <music21.chord.Chord F2 A3 C4 E4 G4>]


In [17]:
len(concatenated_array)

6724

In [59]:
all_chords = []
for _chord in concatenated_array:
    chord_arr = []
    for _note in _chord:
        chord_arr.append(_note.pitch.ps)
    all_chords.append(chord_arr)

flattened_list = [
    int(each_note) for each_chord in all_chords for each_note in each_chord
]
notes = sorted(set(flattened_list))
n_to_i = {s: i + 1 for i, s in enumerate(notes)}
n_to_i["."] = 0

i_to_n = {value: key for key, value in n_to_i.items()}

mn_to_nn = {n: note.Note(n) for n in range(128)}

vocab_size = len(i_to_n)

In [63]:
def encoder(twod_chord_list):
    return [n_to_i[n] for _chord_list in twod_chord_list for n in _chord_list + ["."]]


def decoder(list_of_keys):
    return [i_to_n[i] for i in list_of_keys]

In [64]:
encoder(all_chords[:2])

[18, 25, 30, 32, 34, 0, 13, 25, 29, 32, 34, 0]

In [65]:
encoded_chords = torch.tensor(encoder(all_chords), dtype=torch.long)
print(encoded_chords.shape, encoded_chords.dtype)

torch.Size([26985]) torch.int64


In [67]:
n = int(0.9*len(encoded_chords))
train_data = encoded_chords[:n]
val_data = encoded_chords[n:]

In [75]:
torch.manual_seed(3489764398)
batch_size = 4
block_size = 8


def get_batch(split):
    data = train_data if split == "train" else val_data
    ix = torch.randint(len(data)-block_size, (batch_size,))
    x = torch.stack([data[i:i+block_size] for i in ix])
    y = torch.stack([data[i+1:i+block_size+1] for i in ix])

    return x, y

In [81]:
Xb, Yb = get_batch("train")

In [82]:
Xb, Yb

(tensor([[29, 24, 17,  0, 22, 17, 10,  0],
         [22, 15,  0, 27, 22, 15,  0, 27],
         [24, 17,  0, 26, 19,  0, 24, 17],
         [ 0, 11, 18, 23,  0, 11, 18, 23]]),
 tensor([[24, 17,  0, 22, 17, 10,  0, 22],
         [15,  0, 27, 22, 15,  0, 27, 22],
         [17,  0, 26, 19,  0, 24, 17,  0],
         [11, 18, 23,  0, 11, 18, 23,  0]]))

In [88]:
import torch
import torch.nn as nn
from torch.nn import functional as F
torch.manual_seed(1337)


class BigramLanguageModel(nn.Module):

    def __init__(self, vocab_size):
        super().__init__()
        # each token directly reads off the logits for the next token from a lookup table
        self.token_embedding_table = nn.Embedding(vocab_size, vocab_size)

    def forward(self, idx, targets):

        # idx and targets are both (B,T) tensor of integers
        logits = self.token_embedding_table(idx)  # (B,T,C)

        B, T, C = logits.shape
        logits = logits.view(B*T, C)
        targets = targets.view(B*T)
        loss = F.cross_entropy(logits, targets)

        return logits, loss
        # if targets is None:
        #     loss = None
        # else:
        #     B, T, C = logits.shape
        #     logits = logits.view(B*T, C)
        #     targets = targets.view(B*T)
        #     loss = F.cross_entropy(logits, targets)

        # return logits, loss

    # def generate(self, idx, max_new_tokens):
    #     # idx is (B, T) array of indices in the current context
    #     for _ in range(max_new_tokens):
    #         # get the predictions
    #         logits, loss = self(idx)
    #         # focus only on the last time step
    #         logits = logits[:, -1, :] # becomes (B, C)
    #         # apply softmax to get probabilities
    #         probs = F.softmax(logits, dim=-1) # (B, C)
    #         # sample from the distribution
    #         idx_next = torch.multinomial(probs, num_samples=1) # (B, 1)
    #         # append sampled index to the running sequence
    #         idx = torch.cat((idx, idx_next), dim=1) # (B, T+1)
    #     return idx


m = BigramLanguageModel(vocab_size)

logits, loss = m(Xb, Yb)

logits, loss
# logits, loss = m(Xb, Yb)
# print(logits.shape)
# print(loss)

# print(decoder(m.generate(idx = torch.zeros((1, 1), dtype=torch.long), max_new_tokens=100)[0].tolist()))

(tensor([[ 1.2089, -0.7427,  0.1015,  ...,  0.6750,  1.5664, -0.9238],
         [-1.7823,  0.1339, -2.0973,  ..., -0.2614,  0.9901,  0.6409],
         [-0.0755,  0.4162,  0.5739,  ..., -1.0974, -0.0379,  1.5241],
         ...,
         [-0.4325, -0.2694, -2.5596,  ...,  0.2330, -0.2205, -1.4869],
         [ 0.2110,  1.3096, -0.6448,  ..., -0.1489, -0.7810,  0.5884],
         [ 1.6347, -0.0518,  0.4996,  ...,  0.3608,  0.3161,  0.3504]],
        grad_fn=<ViewBackward0>),
 tensor(4.6719, grad_fn=<NllLossBackward0>))