### Generating music using the model trained

In [21]:
import torch
ckpt = torch.load(r"C:\Users\shash\Downloads\large_model_trained_8.pt", map_location="cpu")
print(ckpt.keys() if isinstance(ckpt, dict) else type(ckpt))


dict_keys(['model', 'cfg', 'vocab_size'])


So we loaded the trained large model that we had, now, we will get the exact model on which it was trained

In [22]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class GPTConfig:
    def __init__(self, vocab_size, block_size,
                 n_layer, n_head, n_embd):
        self.vocab_size = vocab_size
        self.block_size = block_size
        self.n_layer = n_layer
        self.n_head = n_head
        self.n_embd = n_embd

class CausalSelfAttention(nn.Module):
    def __init__(self, config: GPTConfig):
        super().__init__()
        assert config.n_embd % config.n_head == 0
        self.n_head = config.n_head
        self.head_dim = config.n_embd // config.n_head
        self.scale = 1.0 / (self.head_dim ** 0.5)

        self.qkv = nn.Linear(config.n_embd, 3 * config.n_embd)
        self.proj = nn.Linear(config.n_embd, config.n_embd)
        self.register_buffer("mask",
            torch.tril(torch.ones(config.block_size, config.block_size))
            .view(1, 1, config.block_size, config.block_size)
        )

    def forward(self, x):
        B, T, C = x.size()

        qkv = self.qkv(x)
        q, k, v = qkv.split(C, dim=2)
        q = q.view(B, T, self.n_head, self.head_dim).transpose(1, 2)
        k = k.view(B, T, self.n_head, self.head_dim).transpose(1, 2)
        v = v.view(B, T, self.n_head, self.head_dim).transpose(1, 2)
        att = (q @ k.transpose(-2, -1)) * self.scale
        att = att.masked_fill(self.mask[:, :, :T, :T] == 0, float("-inf"))
        att = att.softmax(dim=-1)

        out = att @ v
        out = out.transpose(1, 2).contiguous().view(B, T, C)
        out = self.proj(out)
        return out

class FeedForward(nn.Module):
    def __init__(self, config: GPTConfig):
        super().__init__()
        self.fc1 = nn.Linear(config.n_embd, 4 * config.n_embd)
        self.fc2 = nn.Linear(4 * config.n_embd, config.n_embd)

    def forward(self, x):
        return self.fc2(F.gelu(self.fc1(x)))


class Block(nn.Module):
    def __init__(self, config: GPTConfig):
        super().__init__()
        self.ln1 = nn.LayerNorm(config.n_embd)
        self.attn = CausalSelfAttention(config)
        self.ln2 = nn.LayerNorm(config.n_embd)
        self.ff = FeedForward(config)

    def forward(self, x):
        x = x + self.attn(self.ln1(x))
        x = x + self.ff(self.ln2(x))
        return x


class GPT(nn.Module):
    def __init__(self, vocab_size, block_size, n_layer, n_head, n_embd):
        super().__init__()
        cfg = GPTConfig(vocab_size, block_size, n_layer, n_head, n_embd)
        self.cfg = cfg

        self.token_emb = nn.Embedding(cfg.vocab_size, cfg.n_embd)
        self.pos_emb = nn.Parameter(torch.zeros(1, cfg.block_size, cfg.n_embd))
        self.drop = nn.Dropout(0.1)

        self.blocks = nn.ModuleList([Block(cfg) for _ in range(cfg.n_layer)])
        self.ln_f = nn.LayerNorm(cfg.n_embd)
        self.head = nn.Linear(cfg.n_embd, cfg.vocab_size, bias=False)

    def forward(self, idx, targets=None):
        B, T = idx.size()
        assert T <= self.cfg.block_size, "Sequence is too long for training"

        tok = self.token_emb(idx)
        pos = self.pos_emb[:, :T, :]
        x = self.drop(tok + pos)

        for block in self.blocks:
            x = block(x)

        x = self.ln_f(x)
        logits = self.head(x)

        loss = None
        if targets is not None:
            loss = F.cross_entropy(
                logits.view(-1, logits.size(-1)),
                targets.view(-1)
            )
        return logits, loss


In [23]:
print(torch.load(r"C:\Users\shash\Downloads\large_model_trained_8.pt", map_location="cpu").keys())


dict_keys(['model', 'cfg', 'vocab_size'])


Our next step is to get the vocabulary data and analyse it.

In [24]:
import json

data = json.load(open(r"C:\Users\shash\Downloads\vocab (2).json"))

vocab_list = data["vocab"]
print("Type:", type(vocab_list))
print("Length:", len(vocab_list))
print("First 30 tokens:", vocab_list[:30])


Type: <class 'list'>
Length: 8002
First 30 tokens: ['<PAD>', '<UNK>', '-', '|', 'z', '\\', '2', '/', '496', '[', ']', '8', 'B', 'b', '428', '232', 'C,,', 'z119', 'C', 'D', '244', '500', 'A', 'G,', 'A,', 'G', 'E', '504', 'F', 'z8']


In [25]:
vocab = data["vocab"]

itos = vocab
stoi = {tok: i for i, tok in enumerate(vocab)}


In [26]:
cfg = ckpt["cfg"]
vocab_size = len(vocab)
block_size = cfg["block_size"]
n_layer = cfg["n_layer"]
n_head = cfg["n_head"]
n_embd = cfg["n_embd"]
model = GPT(
    vocab_size,
    block_size,
    n_layer,
    n_head,
    n_embd
)

model.load_state_dict(ckpt["model"])
model.eval()
print("Model loaded successfully!")

Model loaded successfully!


In [27]:
def sample_next(logits, temperature=1.0, top_k=50, pad_id=None):
    logits = logits[:, -1, :] / temperature
    if top_k is not None:
        values, indices = torch.topk(logits, top_k)
        filtered = torch.full_like(logits, float('-inf'))
        filtered.scatter_(1, indices, values)
        logits = filtered
    if pad_id is not None:
        logits[0, pad_id] = float('-inf')

    probs = torch.softmax(logits, dim=-1)
    next_id = torch.multinomial(probs, 1)
    return next_id


In [10]:
generated = []
seed_tok = "K:C"
idx = torch.tensor([[stoi[seed_tok]]], dtype=torch.long)
N = 500
for i in range(N):
    if idx.size(1) > block_size:
        idx = idx[:, -block_size:]

    logits, _ = model(idx)

    next_id = sample_next(
        logits,
        temperature=1.0,
        top_k=50,
        pad_id=pad_id
    )

    idx = torch.cat([idx, next_id], dim=1)
    tok = itos[next_id.item()]
    if tok != "<PAD>":
        generated.append(tok)

    if (i + 1) % 100 == 0:
        print(f"Generated {i + 1}/{N} tokens")


NameError: name 'pad_id' is not defined

Here we are able to generate the tokens and are able to visulize them, We would need to clean this data for some random noise.

In [42]:
generated[:20]

['D,',
 '2',
 '8',
 'b',
 'F,,',
 '|',
 '|',
 'z2',
 '/',
 'b',
 'z',
 '/',
 '-',
 '\\',
 '-',
 '-',
 'z',
 '\\',
 '/',
 '232']

In [11]:
!pip install torch music21




In [28]:
import json
import torch
import os

CKPT_PATH = r"C:\Users\shash\Downloads\large_model_trained_8.pt"
VOCAB_PATH = r"C:\Users\shash\Downloads\vocab (2).json"
def sample_next(logits, temperature=1.0, top_k=50, pad_id=None):
    logits = logits[:, -1, :] / max(1e-8, temperature)

    if top_k is not None and top_k > 0:
        values, indices = torch.topk(logits, top_k, dim=-1)
        filtered = torch.full_like(logits, float("-inf"))
        filtered.scatter_(1, indices, values)
        logits = filtered

    if pad_id is not None:
        logits[:, pad_id] = float("-inf")

    probs = torch.softmax(logits, dim=-1)
    return torch.multinomial(probs, num_samples=1)
def main():
    NUM_FILES = 5
    TOKENS_PER_FILE = 500
    with open(VOCAB_PATH, "r", encoding="utf8") as f:
        data = json.load(f)

    vocab_list = data["vocab"] if isinstance(data, dict) else data
    itos = list(vocab_list)
    stoi = {t: i for i, t in enumerate(itos)}

    print("Vocab size:", len(itos))
    ckpt = torch.load(CKPT_PATH, map_location="cpu")
    cfg = ckpt["cfg"]

    GPT_Class = GPT
    model = GPT_Class(
        vocab_size=len(itos),
        block_size=cfg["block_size"],
        n_layer=cfg["n_layer"],
        n_head=cfg["n_head"],
        n_embd=cfg["n_embd"],
    )

    model.load_state_dict(ckpt["model"])
    model.eval()

    pad_id = stoi.get("<PAD>", None)
    possible_seeds = ["X:1", "K:C", "C", "G", "A", "B"]
    seed_tok = next((s for s in possible_seeds if s in stoi), itos[0])
    print("Seed token:", seed_tok)
    for file_idx in range(1, NUM_FILES + 1):
        print(f"\n Generating piece {file_idx}/{NUM_FILES}")

        temperature = 0.9 + 0.1 * file_idx
        idx = torch.tensor([[stoi[seed_tok]]], dtype=torch.long)
        generated = []

        for i in range(TOKENS_PER_FILE):
            if i % 10 == 0:
                print(f"token {i}", flush=True)
            idx = idx[:, -cfg["block_size"]:]
            logits, _ = model(idx)

            next_id = sample_next(
                logits,
                temperature=temperature,
                top_k=60,
                pad_id=pad_id
            )

            idx = torch.cat([idx, next_id], dim=1)
            tok = itos[next_id.item()]

            if tok != "<PAD>":
                generated.append(tok)

            if tok == "||":
                break
        bars, bar = [], []
        for tok in generated:
            bar.append(tok)
            if tok == "|":
                bars.append("".join(bar))
                bar = []

        body = "\n".join(bars)

        abc_text = "\n".join([
            "X:1",
            f"T:Generated Piece {file_idx}",
            "M:4/4",
            "L:1/8",
            "Q:1/4=120",
            "K:C",
            body
        ])

        abc_path = f"generated_{file_idx}.abc"
        mid_path = f"generated_{file_idx}.mid"

        with open(abc_path, "w", encoding="utf8") as f:
            f.write(abc_text)

        print("Saved:", abc_path)
        try:
            from music21 import converter
            score = converter.parse(abc_path)
            score.write("midi", fp=mid_path)
            print("Saved:", mid_path)
        except Exception as e:
            print("MIDI conversion failed:", e)
if __name__ == "__main__":
    main()


Vocab size: 8002
Seed token: K:C

 Generating piece 1/5
token 0
token 10
token 20
token 30
token 40
token 50
token 60
token 70
token 80
token 90
token 100
token 110
token 120
token 130
token 140
token 150
token 160
token 170
token 180
token 190
token 200
token 210
token 220
token 230
token 240
token 250
token 260
token 270
token 280
token 290
token 300
token 310
token 320
token 330
token 340
token 350
token 360
token 370
token 380
token 390
token 400
token 410
token 420
token 430
token 440
token 450
token 460
token 470
token 480
token 490
Saved: generated_1.abc
MIDI conversion failed: Bad chord indicator: [/--|
-z119232ez--2496\|
---/C,,/F,,,^A--\-\|
z262496-|
|
|
6^a\-z\|
|
-[496z1194-\--z[-/: no closing bracket found.

 Generating piece 2/5
token 0
token 10
token 20
token 30
token 40
token 50
token 60
token 70
token 80
token 90
token 100
token 110
token 120
token 130
token 140
token 150
token 160
token 170
token 180
token 190
token 200
token 210
token 220
token 230
token 240
token 25

In [29]:
import music21


In [87]:
try:
        from music21 import converter
        score = converter.parse(OUT_ABC)
        score.write("midi", fp=OUT_MID)
        print("Saved MIDI to", OUT_MID)
except Exception as e:
        print("music21 conversion failed:", e)
        print("You can inspect", OUT_ABC, "and convert with abc2midi or music21 manually.")

if __name__ == "__main__":
    main()

music21 conversion failed: invalid literal for int() with base 10: "1 T:Generated by LLM M:4/4 L:1/8 Q:1/4=120 K:C g 2 - z - d - - | C,, - <UNK> [ 232 | - | - z - z - 8 [ | | ^A ^A,, z3 ^A \\ - z z3 z119 b z | - \\ - / \\ z3 4 z | <UNK> G, [ - - - ^a C,,, | - 8 2 e |
You can inspect generated.abc and convert with abc2midi or music21 manually.
Vocab size: 8002
PAD id: 0
Using seed token: K:C
Generated 100/500 tokens
Generated 200/500 tokens
Generated 300/500 tokens
Generated 400/500 tokens
Generated 500/500 tokens
Saved ABC to generated.abc
ABC preview:
 X:1 T:Generated by LLM M:4/4 L:1/8 Q:1/4=120 K:C - 244 | 496 [ 2 / - z123 ^F, 8 2 8 | | - z2 3 4 \ - ^a c z / - - E, - z \ - / - - | ^C,, | | - \ D,, [ 4 ^D, | / ^A,,, - \ | - z \ | | - / 428 b \ - - - 2 c z \ - | =C,, A, \ 2 [ - 428 / - - z =A, z e | - / e' z2 [ - - - - - - ^D, ^A 500 504 / 496 program | | - d' - 232 ^A | C,, / - \ - - - ( | | 232 ^A [ - [ \ - z6 z119 | G, z z8 ^A F \ | | ^A - d  ...



Now we would be cleaning this data to get a proper music octaves

In [75]:
import re

def clean_abc_file(input_abc, output_abc):
    with open(input_abc, "r", encoding="utf8") as f:
        tokens = f.read().split()

    headers = []
    cleaned_body = []
    header_pattern = re.compile(r'^[A-Z]:')
    valid_music_pattern = re.compile(r"^[A-Ga-gzZ\^=_,'0-9/\|\-]+$")
    for tok in tokens:
        tok = tok.strip()
        if header_pattern.match(tok):
            headers.append(tok)
            continue
        if tok in ["<UNK>", "<PAD>", "MIDI", "n", "y", "x", "/", "//", "\\", "-", "--", "---", "'", "''", ":", "[", "]", "(", ")"]:
            continue
        if tok.isdigit() and int(tok) > 16:
            continue
        if re.search(r"[A-Ga-gz]\d{3,}", tok):
            tok = re.sub(r"\d+", "", tok)
        if valid_music_pattern.match(tok):
            cleaned_body.append(tok)
    if not any(h.startswith("X:") for h in headers):
        headers.insert(0, "X:1")
    cleaned_text = "\n".join(headers) + "\n" + " ".join(cleaned_body)
    with open(output_abc, "w", encoding="utf8") as f:
        f.write(cleaned_text)
    print(f"Cleaned file saved as {output_abc}")
input_file = "generated_1.abc"
output_file = "cleaned_generated_1.abc"
clean_abc_file(input_file, output_file)


Cleaned file saved as cleaned_generated_1.abc


In [76]:
for i in range(1, 6):
    input_abc = f"generated_{i}.abc"
    output_abc = f"generated_{i}_cleaned.abc"
    clean_abc_file(input_abc, output_abc)
    print(f"Cleaned {input_abc}")


Cleaned file saved as generated_1_cleaned.abc
Cleaned generated_1.abc
Cleaned file saved as generated_2_cleaned.abc
Cleaned generated_2.abc
Cleaned file saved as generated_3_cleaned.abc
Cleaned generated_3.abc
Cleaned file saved as generated_4_cleaned.abc
Cleaned generated_4.abc
Cleaned file saved as generated_5_cleaned.abc
Cleaned generated_5.abc


This generated our cleaned music and now we would be converting the abc files to mid files using abc2midi

In [77]:
def abc_to_midi(abc_path, midi_path, abc2midi_path):
    command = [abc2midi_path, abc_path, "-o", midi_path]
    try:
        result = subprocess.run(
            command,
            capture_output=True,
            text=True,
            check=True
        )
        print(f"âœ” MIDI created: {midi_path}")
    except subprocess.CalledProcessError as e:
        print(f"âœ– abc2midi failed for {abc_path}")
        print(e.stderr)


In [78]:
abc2midi_path = r"C:\Users\shash\Downloads\abcmidi\abc2midi.exe"

NUM_FILES = 5

for i in range(1, NUM_FILES + 1):
    abc_file = f"generated_{i}_cleaned.abc"
    midi_file = f"generated_{i}.mid"

    if not os.path.exists(abc_file):
        print(f"Skipping missing file: {abc_file}")
        continue

    print(f"\nðŸŽ¼ Converting {abc_file} â†’ {midi_file}")
    abc_to_midi(abc_file, midi_file, abc2midi_path)



ðŸŽ¼ Converting generated_1_cleaned.abc â†’ generated_1.mid
âœ” MIDI created: generated_1.mid

ðŸŽ¼ Converting generated_2_cleaned.abc â†’ generated_2.mid
âœ” MIDI created: generated_2.mid

ðŸŽ¼ Converting generated_3_cleaned.abc â†’ generated_3.mid
âœ” MIDI created: generated_3.mid

ðŸŽ¼ Converting generated_4_cleaned.abc â†’ generated_4.mid
âœ” MIDI created: generated_4.mid

ðŸŽ¼ Converting generated_5_cleaned.abc â†’ generated_5.mid
âœ” MIDI created: generated_5.mid


The next step is to get the music player running for our output mid file generated.

In [79]:
from music21 import converter

NUM_FILES = 5

for i in range(1, NUM_FILES + 1):
    midi_path = f"generated_{i}.mid"
    print(f"\nðŸŽµ Playing {midi_path}")
    score = converter.parse(midi_path)
    score.show('midi')



ðŸŽµ Playing generated_1.mid



ðŸŽµ Playing generated_2.mid



ðŸŽµ Playing generated_3.mid



ðŸŽµ Playing generated_4.mid



ðŸŽµ Playing generated_5.mid


We are able to get a music running in our player which have some tunes looking good. Although more cleaning and training a better transformer could have given us a better results and is a scope for improvement in future.