### Generating music using the model trained

In [2]:
import torch
ckpt = torch.load(r"C:\Users\shash\Downloads\large_model_trained_8.pt", map_location="cpu")
print(ckpt.keys() if isinstance(ckpt, dict) else type(ckpt))


dict_keys(['model', 'cfg', 'vocab_size'])


So we loaded the trained large model that we had, now, we will get the exact model on which it was trained

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class GPTConfig:
    def __init__(self, vocab_size, block_size,
                 n_layer, n_head, n_embd):
        self.vocab_size = vocab_size
        self.block_size = block_size
        self.n_layer = n_layer
        self.n_head = n_head
        self.n_embd = n_embd

class CausalSelfAttention(nn.Module):
    def __init__(self, config: GPTConfig):
        super().__init__()
        assert config.n_embd % config.n_head == 0
        self.n_head = config.n_head
        self.head_dim = config.n_embd // config.n_head
        self.scale = 1.0 / (self.head_dim ** 0.5)

        self.qkv = nn.Linear(config.n_embd, 3 * config.n_embd)
        self.proj = nn.Linear(config.n_embd, config.n_embd)
        self.register_buffer("mask",
            torch.tril(torch.ones(config.block_size, config.block_size))
            .view(1, 1, config.block_size, config.block_size)
        )

    def forward(self, x):
        B, T, C = x.size()

        qkv = self.qkv(x)
        q, k, v = qkv.split(C, dim=2)
        q = q.view(B, T, self.n_head, self.head_dim).transpose(1, 2)
        k = k.view(B, T, self.n_head, self.head_dim).transpose(1, 2)
        v = v.view(B, T, self.n_head, self.head_dim).transpose(1, 2)
        att = (q @ k.transpose(-2, -1)) * self.scale
        att = att.masked_fill(self.mask[:, :, :T, :T] == 0, float("-inf"))
        att = att.softmax(dim=-1)

        out = att @ v
        out = out.transpose(1, 2).contiguous().view(B, T, C)
        out = self.proj(out)
        return out

class FeedForward(nn.Module):
    def __init__(self, config: GPTConfig):
        super().__init__()
        self.fc1 = nn.Linear(config.n_embd, 4 * config.n_embd)
        self.fc2 = nn.Linear(4 * config.n_embd, config.n_embd)

    def forward(self, x):
        return self.fc2(F.gelu(self.fc1(x)))


class Block(nn.Module):
    def __init__(self, config: GPTConfig):
        super().__init__()
        self.ln1 = nn.LayerNorm(config.n_embd)
        self.attn = CausalSelfAttention(config)
        self.ln2 = nn.LayerNorm(config.n_embd)
        self.ff = FeedForward(config)

    def forward(self, x):
        x = x + self.attn(self.ln1(x))
        x = x + self.ff(self.ln2(x))
        return x


class GPT(nn.Module):
    def __init__(self, vocab_size, block_size, n_layer, n_head, n_embd):
        super().__init__()
        cfg = GPTConfig(vocab_size, block_size, n_layer, n_head, n_embd)
        self.cfg = cfg

        self.token_emb = nn.Embedding(cfg.vocab_size, cfg.n_embd)
        self.pos_emb = nn.Parameter(torch.zeros(1, cfg.block_size, cfg.n_embd))
        self.drop = nn.Dropout(0.1)

        self.blocks = nn.ModuleList([Block(cfg) for _ in range(cfg.n_layer)])
        self.ln_f = nn.LayerNorm(cfg.n_embd)
        self.head = nn.Linear(cfg.n_embd, cfg.vocab_size, bias=False)

    def forward(self, idx, targets=None):
        B, T = idx.size()
        assert T <= self.cfg.block_size, "Sequence is too long for training"

        tok = self.token_emb(idx)
        pos = self.pos_emb[:, :T, :]
        x = self.drop(tok + pos)

        for block in self.blocks:
            x = block(x)

        x = self.ln_f(x)
        logits = self.head(x)

        loss = None
        if targets is not None:
            loss = F.cross_entropy(
                logits.view(-1, logits.size(-1)),
                targets.view(-1)
            )
        return logits, loss


In [5]:
print(torch.load(r"C:\Users\shash\Downloads\large_model_trained_8.pt", map_location="cpu").keys())


dict_keys(['model', 'cfg', 'vocab_size'])


Our next step is to get the vocabulary data and analyse it.

In [11]:
import json

data = json.load(open(r"C:\Users\shash\Downloads\vocab (2).json"))

vocab_list = data["vocab"]           # this is a list
print("Type:", type(vocab_list))
print("Length:", len(vocab_list))
print("First 30 tokens:", vocab_list[:30])


Type: <class 'list'>
Length: 8002
First 30 tokens: ['<PAD>', '<UNK>', '-', '|', 'z', '\\', '2', '/', '496', '[', ']', '8', 'B', 'b', '428', '232', 'C,,', 'z119', 'C', 'D', '244', '500', 'A', 'G,', 'A,', 'G', 'E', '504', 'F', 'z8']


In [12]:
vocab = data["vocab"]

itos = vocab
stoi = {tok: i for i, tok in enumerate(vocab)}


In [16]:
cfg = ckpt["cfg"]
vocab_size = len(vocab)
block_size = cfg["block_size"]
n_layer = cfg["n_layer"]
n_head = cfg["n_head"]
n_embd = cfg["n_embd"]
model = GPT(
    vocab_size,
    block_size,
    n_layer,
    n_head,
    n_embd
)

model.load_state_dict(ckpt["model"])
model.eval()
print("Model loaded successfully!")

Model loaded successfully!


In [26]:
def sample_next(logits, temperature=1.0, top_k=50, pad_id=None):
    logits = logits[:, -1, :] / temperature

    # top-k filtering
    if top_k is not None:
        values, indices = torch.topk(logits, top_k)
        filtered = torch.full_like(logits, float('-inf'))
        filtered.scatter_(1, indices, values)
        logits = filtered

    # Suppress PAD entirely
    if pad_id is not None:
        logits[0, pad_id] = float('-inf')

    probs = torch.softmax(logits, dim=-1)
    next_id = torch.multinomial(probs, 1)
    return next_id


In [41]:
generated = []
seed_tok = "K:C"
idx = torch.tensor([[stoi[seed_tok]]], dtype=torch.long)
N = 500
for i in range(N):
    if idx.size(1) > block_size:
        idx = idx[:, -block_size:]

    logits, _ = model(idx)

    next_id = sample_next(
        logits,
        temperature=1.0,
        top_k=50,
        pad_id=pad_id
    )

    idx = torch.cat([idx, next_id], dim=1)
    tok = itos[next_id.item()]
    if tok != "<PAD>":
        generated.append(tok)

    if (i + 1) % 100 == 0:
        print(f"Generated {i + 1}/{N} tokens")


Generated 100/500 tokens
Generated 200/500 tokens
Generated 300/500 tokens
Generated 400/500 tokens
Generated 500/500 tokens


Here we are able to generate the tokens and are able to visulize them, We would need to clean this data for some random noise.

In [42]:
generated[:20]

['D,',
 '2',
 '8',
 'b',
 'F,,',
 '|',
 '|',
 'z2',
 '/',
 'b',
 'z',
 '/',
 '-',
 '\\',
 '-',
 '-',
 'z',
 '\\',
 '/',
 '232']

In [33]:
!pip install torch music21


Collecting music21
  Downloading music21-9.9.1-py3-none-any.whl.metadata (5.2 kB)
Collecting jsonpickle (from music21)
  Downloading jsonpickle-4.1.1-py3-none-any.whl.metadata (8.1 kB)
Collecting webcolors>=1.5 (from music21)
  Downloading webcolors-25.10.0-py3-none-any.whl.metadata (2.2 kB)
Downloading music21-9.9.1-py3-none-any.whl (20.1 MB)
   ---------------------------------------- 0.0/20.1 MB ? eta -:--:--
   ------- -------------------------------- 3.9/20.1 MB 25.3 MB/s eta 0:00:01
   -------------------------- ------------- 13.4/20.1 MB 35.9 MB/s eta 0:00:01
   ---------------------------------------- 20.1/20.1 MB 37.2 MB/s eta 0:00:00
Downloading webcolors-25.10.0-py3-none-any.whl (14 kB)
Downloading jsonpickle-4.1.1-py3-none-any.whl (47 kB)
Installing collected packages: webcolors, jsonpickle, music21

   -------------------------- ------------- 2/3 [music21]
   -------------------------- ------------- 2/3 [music21]
   -------------------------- ------------- 2/3 [music21]
  

In [85]:
import json
import torch
import os
from collections import Counter
CKPT_PATH = r"C:\Users\shash\Downloads\large_model_trained_8.pt"
VOCAB_PATH = r"C:\Users\shash\Downloads\vocab (2).json"
OUT_ABC = "generated.abc"
OUT_MID = "generated.mid"
def sample_next(logits, temperature=1.0, top_k=50, pad_id=None):
    """
    logits: (B, T, V)
    returns: next_id tensor of shape (B, 1)
    """
    logits = logits[:, -1, :] / max(1e-8, temperature)

    if top_k is not None and top_k > 0:
        values, indices = torch.topk(logits, top_k, dim=-1)
        filtered = torch.full_like(logits, float("-inf"))
        filtered.scatter_(1, indices, values)
        logits = filtered

    if pad_id is not None:
        logits[:, pad_id] = float("-inf")

    probs = torch.softmax(logits, dim=-1)
    next_id = torch.multinomial(probs, num_samples=1)
    return next_id
def main():
    data = json.load(open(VOCAB_PATH, "r", encoding="utf8"))
    if isinstance(data, dict) and "vocab" in data:
        vocab_list = data["vocab"]
    elif isinstance(data, list):
        vocab_list = data
    else:
        raise RuntimeError("Unexpected vocab.json structure. Should be list or { 'vocab': [...] }")

    itos = list(vocab_list)
    stoi = {tok: i for i, tok in enumerate(itos)}
    print("Vocab size:", len(itos))
    ckpt = torch.load(CKPT_PATH, map_location="cpu")
    assert "model" in ckpt and "cfg" in ckpt, "checkpoint missing expected keys"

    cfg = ckpt["cfg"]
    vocab_size = len(itos)
    block_size = cfg["block_size"]
    n_layer = cfg["n_layer"]
    n_head = cfg["n_head"]
    n_embd = cfg["n_embd"]
    try:
        from model import GPT as GPT_Class
    except Exception:
        if "GPT" in globals():
            GPT_Class = globals()["GPT"]
        else:
            raise RuntimeError("GPT class not found. Put your GPT class in model.py or paste it above this script.")
    model = GPT_Class(vocab_size, block_size, n_layer, n_head, n_embd)
    model.load_state_dict(ckpt["model"])
    model.eval()

    pad_token = "<PAD>"
    pad_id = stoi.get(pad_token, None)
    print("PAD id:", pad_id)
    possible_seeds = ["X:1","K:C","C","^C","G","A","B"]
    seed_tok = next((s for s in possible_seeds if s in stoi), None)
    if seed_tok is None:
        seed_tok = next((t for t in itos if t != pad_token), itos[0])
    print("Using seed token:", seed_tok)
    idx = torch.tensor([[stoi[seed_tok]]], dtype=torch.long)
    generated = []
    idx = torch.tensor([[stoi[seed_tok]]], dtype=torch.long)
    temperature=1.2
    for i in range(N):
        if idx.size(1) > block_size:
            idx = idx[:, -block_size:]
    
        logits, _ = model(idx)
    
        next_id = sample_next(
            logits,
            temperature=temperature,
            top_k=60,
            pad_id=pad_id
        )
    
        idx = torch.cat([idx, next_id], dim=1)
        tok = itos[next_id.item()]
        if tok != "<PAD>":
            generated.append(tok)
    
        if (i + 1) % 100 == 0:
            print(f"Generated {i + 1}/{N} tokens")

    body = " ".join(generated)
    header_lines = [
        "X:1",
        "T:Generated by LLM",
        "M:4/4",
        "L:1/8",
        "Q:1/4=120",
        "K:C"
    ]
    abc_text = "\n".join(header_lines) + "\n" + body + "\n"
    abc_text = abc_text.replace("%", "")
    abc_text = " ".join(abc_text.split())
    with open(OUT_ABC, "w", encoding="utf8") as f:
        f.write(abc_text)
    print("Saved ABC to", OUT_ABC)
    print("ABC preview:\n", abc_text[:400], "...\n")

In [86]:
import music21


In [87]:
try:
        from music21 import converter
        score = converter.parse(OUT_ABC)
        score.write("midi", fp=OUT_MID)
        print("Saved MIDI to", OUT_MID)
except Exception as e:
        print("music21 conversion failed:", e)
        print("You can inspect", OUT_ABC, "and convert with abc2midi or music21 manually.")

if __name__ == "__main__":
    main()

music21 conversion failed: invalid literal for int() with base 10: "1 T:Generated by LLM M:4/4 L:1/8 Q:1/4=120 K:C g 2 - z - d - - | C,, - <UNK> [ 232 | - | - z - z - 8 [ | | ^A ^A,, z3 ^A \\ - z z3 z119 b z | - \\ - / \\ z3 4 z | <UNK> G, [ - - - ^a C,,, | - 8 2 e |
You can inspect generated.abc and convert with abc2midi or music21 manually.
Vocab size: 8002
PAD id: 0
Using seed token: K:C
Generated 100/500 tokens
Generated 200/500 tokens
Generated 300/500 tokens
Generated 400/500 tokens
Generated 500/500 tokens
Saved ABC to generated.abc
ABC preview:
 X:1 T:Generated by LLM M:4/4 L:1/8 Q:1/4=120 K:C - 244 | 496 [ 2 / - z123 ^F, 8 2 8 | | - z2 3 4 \ - ^a c z / - - E, - z \ - / - - | ^C,, | | - \ D,, [ 4 ^D, | / ^A,,, - \ | - z \ | | - / 428 b \ - - - 2 c z \ - | =C,, A, \ 2 [ - 428 / - - z =A, z e | - / e' z2 [ - - - - - - ^D, ^A 500 504 / 496 program | | - d' - 232 ^A | C,, / - \ - - - ( | | 232 ^A [ - [ \ - z6 z119 | G, z z8 ^A F \ | | ^A - d  ...



Now we would be cleaning this data to get a proper music octaves

In [88]:
import re
tokens = """M:4/4 L:1/8 Q:1/4=120 K:C g 2 - z - d - - | C,, - <UNK> [ 232 | - | - z - z - 8 [ | | ^A ^A,, z3 ^A \ - z z3 z119 b z | - \ - / \ z3 4 z | <UNK> G, [ - - - ^a C,,, | - 8 2 e | - | 2 z2 =A z \ - - 4 z3 6 e | | - z [ - \ - - z b z ^D,, \ - - z4 e z b z - - ^a z - - 2 A d 232 - A,, ^d 428 x 8 - 8 8 8 8 6 ^D, | - 8 <UNK> | B,,, - <UNK> | - [ - - \ | | - z a ^a =D,, [ - [ | - ^A,,, ( | - 232 ^F,, z119 - - 2 8 e' \ | - | D | C,,, | =A, z119 z2 C,,, 492 \ | d \ - - - 2 =D - z8 0 - - e | - C,,, b z119 / G < - - z8 504 C,,, | 232 z123 | | | - 2 ( | ^D \ | | - \ - \ 4 8 ^F,, e' C,,, | d [ \ - \ - 232 MIDI A, [ =D z123 - 6 3 | 8 z g 500 =D, [ 3 / 428 - - - z4 - - z3 428 g \ | | - C,,, | - [ <UNK> 496 =A, ( | | - =A, E,,, [ : - n - =G, ^d [ / 428 3 - - =F, | - - - =a - - g B,,, D, n - - - - =F, 428 ' z119 =A | | - - ( | ] ] | G d ' B \ - 4 496 e' ^A - - 4 8 ^F,, 8 <UNK> D,, - / D, - ^A,,, < - - - - - - \ | | | - - - - z119 =B, =D, / 428 3 ^A 2 [ 6 8 z119 - 6 6 8 8 a E,,, [ | - ^A, b [ - | | - - - y | - - - ^A, g' [ - | - 6 3 \ | | - [ =F,, | - y A | ^F,, ^D, | | f [ [ ^D, | | - ^A,, =A, \ ] - 4 2 ^A,, ^D,,, z8 2 c z8 <UNK> B z8 - - - - - - [ / =D - ^A, 496 =G, a | - - - - - - - 232 \ - - - - - - - - - - - - g \ | =D, | D,, | =A, ^D,,, \ \ | - - - - g 8 8 - - - - - /""".split()

def clean_and_fix(token_list):
    headers = []
    cleaned_body = []
    header_pattern = re.compile(r'^[A-Z]:\S+')
    valid_music_pattern = re.compile(r"^[A-Ga-gzZ\^=_,'0-9/\|\[\]-]+$")

    for tok in token_list:
        tok = tok.strip()
        if header_pattern.match(tok):
            headers.append(tok)
            continue
        if tok in ["<UNK>", "<PAD>", "MIDI", "n", "y", "x", "/", "//", "\\", "-", "--", "---", "'", "''", ":", "[", "]", "(", ")"]:
            continue
        if tok.isdigit() and int(tok) > 16:
            continue
        if re.search(r"[A-Ga-gz]\d{3,}", tok):
            tok = re.sub(r"\d+", "", tok)

        if valid_music_pattern.match(tok):
            cleaned_body.append(tok)
    final_headers = []
    has_x = any(h.startswith("X:") for h in headers)
    if not has_x:
        final_headers.append("X:1") 
    
    final_headers.extend(headers)

    return "\n".join(final_headers) + "\n" + " ".join(cleaned_body)
cleaned_abc = clean_and_fix(tokens)

with open("cleaned_music.abc", "w") as f:
    f.write(cleaned_abc)

print("File fixed and saved as 'cleaned_music.abc'")

File fixed and saved as 'cleaned_music.abc'


This generated our cleaned music and now we would be converting the abc files to mid files using abc2midi

In [89]:
import os
import subprocess
abc2midi_path = r"C:\Users\shash\Downloads\abcmidi\abc2midi.exe"
abc_filename = "cleaned_music.abc"
midi_filename = "output.mid"
with open(abc_filename, "w") as f:
    f.write(cleaned_abc)

print(f"Saved '{abc_filename}'")
command = [abc2midi_path, abc_filename, "-o", midi_filename]

try:
    result = subprocess.run(command, capture_output=True, text=True, check=True)
    print("Conversion Successful!")
    print(result.stdout)
except subprocess.CalledProcessError as e:
    print("Error during conversion:")
    print(e.stderr)
    print(e.stdout)
except FileNotFoundError:
    print(f"Could not find the program at: {abc2midi_path}")
    print("Please check that abc2midi.exe is actually in that folder.")


Saved 'cleaned_music.abc'
Conversion Successful!
4.49 February 21 2021 abc2midi
Error in line-char 6-2 : Unrecognized character: 2
Error in line-char 6-22 : Unrecognized character: 8
Error in line-char 6-58 : Unrecognized character: 4
Error in line-char 6-77 : Unrecognized character: 8
Error in line-char 6-79 : Unrecognized character: 2
Error in line-char 6-87 : Unrecognized character: 2
Error in line-char 6-97 : Unrecognized character: 4
Error in line-char 6-102 : Unrecognized character: 6
Error in line-char 6-139 : Unrecognized character: 2
Error in line-char 6-152 : Unrecognized character: 8
Error in line-char 6-154 : Unrecognized character: 8
Error in line-char 6-156 : Unrecognized character: 8
Error in line-char 6-158 : Unrecognized character: 8
Error in line-char 6-160 : Unrecognized character: 8
Error in line-char 6-162 : Unrecognized character: 6
Error in line-char 6-170 : Unrecognized character: 8
Error in line-char 6-214 : Unrecognized character: 2
Error in line-char 6-216 : 

The next step is to get the music player running for our output mid file generated.

In [90]:
from music21 import converter, midi
mf = converter.parse("output.mid")
print("Here is your player:")
mf.show('midi')

Here is your player:


We are able to get a music running in our player which have some tunes looking good. Although more cleaning and training a better transformer could have given us a better results and is a scope for improvement in future.