In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
from music21 import chord, note, stream, clef, meter
from data_cleaning import Open
import os

In [2]:
# mid_file1 = os.path.join(os.getcwd(), "progression1.mid")
# midi_data1 = Open.open_midi(mid_file1)
# mid_file2 = os.path.join(os.getcwd(), "progression2.mid")
# midi_data2 = Open.open_midi(mid_file2)

concatenated_array = []

In [3]:
def open_midi_files(dest_directory):
    for root, dirs, files in os.walk(dest_directory):
        for file in files:
            mid_file = os.path.join(root, file)
            midi_data = Open.open_midi(mid_file)
            for i, part in enumerate(midi_data.parts):
                for element in part.recurse():
                    if isinstance(element, chord.Chord):
                        concatenated_array.append(element)


open_midi_files(os.path.join(os.getcwd(), "sample_rock_set"))

In [4]:
len(concatenated_array)

6538

In [5]:
all_chords = []
for _chord in concatenated_array:
    chord_arr = []
    for _note in _chord:
        chord_arr.append(_note.pitch.ps)
    all_chords.append(chord_arr)

flattened_list = [
    int(each_note) for each_chord in all_chords for each_note in each_chord
]
notes = sorted(set(flattened_list))
n_to_i = {s: i + 1 for i, s in enumerate(notes)}
n_to_i["."] = 0

i_to_n = {value: key for key, value in n_to_i.items()}

mn_to_nn = {n: note.Note(n) for n in range(128)}

vocab_size = len(i_to_n)

In [6]:
def encoder(twod_chord_list):
    return [n_to_i[n] for _chord_list in twod_chord_list for n in _chord_list + ["."]]


def decoder(list_of_keys):
    return [i_to_n[i] for i in list_of_keys]

In [7]:
encoder(all_chords[:2])

[31, 26, 19, 0, 31, 26, 19, 0]

In [8]:
encoded_chords = torch.tensor(encoder(all_chords), dtype=torch.long)
print(encoded_chords.shape, encoded_chords.dtype)

torch.Size([25994]) torch.int64


In [9]:
n = int(0.9 * len(encoded_chords))
train_data = encoded_chords[:n]
val_data = encoded_chords[n:]

In [10]:
torch.manual_seed(3489764398)
batch_size = 4
block_size = 8


def get_batch(split):
    data = train_data if split == "train" else val_data
    ix = torch.randint(len(data) - block_size, (batch_size,))
    x = torch.stack([data[i: i + block_size] for i in ix])
    y = torch.stack([data[i + 1: i + block_size + 1] for i in ix])

    return x, y

In [11]:
Xb, Yb = get_batch("train")

In [12]:
Xb, Yb

(tensor([[32, 25, 22, 17, 10,  0, 34, 32],
         [10,  0, 17, 10,  0, 17, 10,  0],
         [29, 25,  0, 20, 13,  0, 34, 30],
         [20, 15,  8,  0, 25, 20, 15,  8]]),
 tensor([[25, 22, 17, 10,  0, 34, 32, 25],
         [ 0, 17, 10,  0, 17, 10,  0, 17],
         [25,  0, 20, 13,  0, 34, 30, 25],
         [15,  8,  0, 25, 20, 15,  8,  0]]))

In [None]:
import torch
import torch.nn as nn
from torch.nn import functional as F
torch.manual_seed(54865)


class BigramLanguageModel(nn.Module):

    def __init__(self, vocab_size):
        super().__init__()
        # each token directly reads off the logits for the next token from a lookup table
        self.token_embedding_table = nn.Embedding(vocab_size, vocab_size)

    def forward(self, idx, targets=None):

        # idx and targets are both (B,T) tensor of integers
        logits = self.token_embedding_table(idx)  # (B,T,C)

        if targets == None:
            loss = None
        else:
            B, T, C = logits.shape
            logits = logits.view(B*T, C)
            targets = targets.view(B*T)
            loss = F.cross_entropy(logits, targets)

        return logits, loss
        # if targets is None:
        #     loss = None
        # else:
        #     B, T, C = logits.shape
        #     logits = logits.view(B*T, C)
        #     targets = targets.view(B*T)
        #     loss = F.cross_entropy(logits, targets)

        # return logits, loss

    def generate(self, idx, max_new_tokens):
        # idx is (B, T) array of indices in the current context
        for _ in range(max_new_tokens):
            # get the predictions
            logits, loss = self(idx)
            # focus only on the last time step
            logits = logits[:, -1, :]  # becomes (B, C)
            # apply softmax to get probabilities
            probs = F.softmax(logits, dim=-1)  # (B, C)
            # sample from the distribution
            idx_next = torch.multinomial(probs, num_samples=1)  # (B, 1)
            # append sampled index to the running sequence
            idx = torch.cat((idx, idx_next), dim=1)  # (B, T+1)
        return idx


m = BigramLanguageModel(vocab_size)

logits, loss = m(Xb, Yb)

logits, loss
# logits, loss = m(Xb, Yb)
# print(logits.shape)
# print(loss)

decoder(m.generate(idx=torch.zeros(
    (1, 1), dtype=torch.long), max_new_tokens=100)[0].tolist())

In [20]:
optimizer = torch.optim.AdamW(m.parameters(), lr=1e-3)

In [26]:
batch_size = 32

for steps in range(10000):
    xb, yb = get_batch('train')

    logits, loss = m(xb,yb)
    optimizer.zero_grad(set_to_none=True)
    loss.backward()
    optimizer.step()

print(loss.item())

1.7502195835113525


In [30]:
x =decoder(m.generate(idx=torch.zeros(
    (1, 1), dtype=torch.long), max_new_tokens=16)[0].tolist())

In [33]:
mn_to_nn["."] = "___"

In [None]:
for mn in x:
    print(mn_to_nn[mn])

# Self Attention

In [3]:
torch.manual_seed(654651)
B,T,C = 4,8,2
X = torch.randn(B,T,C)
X.shape

torch.Size([4, 8, 2])

In [5]:
xboc = torch.zeros((B,T,C))
for b in range(B):
    for t in range(T):
        xprev = X[b,:t+1]
        xboc[b,t] = torch.mean(xprev,0)


In [8]:
X[0], xboc[0]

(tensor([[-0.8342,  1.3786],
         [-0.0693, -0.2892],
         [ 1.0316,  1.5584],
         [ 0.9983,  0.1137],
         [-1.6205,  0.7124],
         [-1.4671, -0.6002],
         [-0.1404, -0.9244],
         [-0.5072, -0.1338]]),
 tensor([[-0.8342,  1.3786],
         [-0.4518,  0.5447],
         [ 0.0427,  0.8826],
         [ 0.2816,  0.6904],
         [-0.0988,  0.6948],
         [-0.3269,  0.4790],
         [-0.3002,  0.2785],
         [-0.3261,  0.2269]]))