## Generating chords with GPT2

We now use the model trained in `11-gpt-model-chords.ipynb` to generate chords.

In [1]:
from transformers import GPT2LMHeadModel, GPT2TokenizerFast, GPT2Config

import torch
from torch.utils.data import Dataset
from tqdm import tqdm
from pathlib import Path
import numpy as np
import os
import re
from music21 import stream, chord, duration

### Load and prepare model

In [2]:
TOKENIZER_SAVEDIR = Path('tokenizers/chord-augmented-tokenizer/')
LM_MODEL_SAVEDIR = Path('models/gpt-chords-augmented/')
TXT_FILES = Path('chords-txt-augmented/')

In [3]:
tokenizer = GPT2TokenizerFast.from_pretrained(TOKENIZER_SAVEDIR, 
                                              bos_token="<start>", 
                                              eos_token="</start>",
                                              unk_token="<unk>")

In [4]:
pad_token_id, eos_token_id = tokenizer.encode('<pad> </start>')

In [5]:
config = GPT2Config(
    vocab_size=tokenizer.vocab_size,
    n_head=12,
)

In [6]:
model = GPT2LMHeadModel(config=config).from_pretrained(str(LM_MODEL_SAVEDIR))

### Define helper functions

In [10]:
notes = ['C', 'D-', 'D', 'E-', 'E', 'F', 'G-', 'G', 'A-', 'A', 'B-', 'B']

def abridged_index_to_note(abridged_index):
    return notes[abridged_index % 12] + str(abridged_index // 12 + 3)

In [11]:
def token_to_notes(token):
    arr = np.array([int(x) for x in token])
    ones = np.where(arr == 1)[0]
    return ' '.join([abridged_index_to_note(index) for index in ones])

In [12]:
def note_to_abdridged_index(note):
    note, octave = note[:-1], int(note[-1])
    return (octave - 3) * 12 + notes.index(note)

In [13]:
def notes_to_token(notes):
    notes = notes.split()
    indexes = [note_to_abdridged_index(note) for note in notes]
    arr = np.zeros(36, dtype=np.uint8)
    for i in indexes:
        arr[i] = 1
    return ''.join([str(x) for x in arr])

Sanity check conversion of token to notes and notes to token:

In [14]:
token_to_notes('000000000010010010000010000000000000')

'B-3 D-4 E4 B-4'

In [15]:
print(notes_to_token('E3 A-3 B-3 E-4'))
print(notes_to_token('E3 A-3 E-4'))
print(notes_to_token('E3 B-3 E-4'))

000010001010000100000000000000000000
000010001000000100000000000000000000
000010000010000100000000000000000000


### Generate example outputs

We'll make a seed consisting of the chord progression `C6 A7b9 Dm7`

In [16]:
seed_tokens = [
    '<start>', 
    notes_to_token('G3 A3 C4 E4'),
    notes_to_token('G3 B-3 D-4 E4'),
    notes_to_token('A3 C4 D4 F4'),
]
seed = ' '.join(seed_tokens)

In [17]:
input_str = tokenizer.encode(seed, return_tensors="pt")
output_tokens = model.generate(input_str, 
                               pad_token_id=pad_token_id,
                               eos_token_id=eos_token_id,
                               temperature=1,
                               max_length=256,
                               do_sample=True,
                               num_beams=5)[0]
output_tokens = tokenizer.decode(output_tokens).split()
output_tokens = [t for t in output_tokens if t[0] != '<']

In [87]:
def arr_to_midi(arr, savepath):
    ''' Convert a chord array to MIDI that can be opened in a DAW '''
    score = stream.Score()
    for i, timestep in enumerate(arr):
        notes = np.where(timestep == 1)[0]
        notes_str = [pitch_index_to_pitch_str(idx + 27) for idx in notes]
        chord_ = chord.Chord(notes_str)
        chord_.duration = duration.Duration('quarter')
        chord_.offset = i
        score.append(chord_)
    score.write('midi', savepath)

In [88]:
def output_to_midi(tokens, savepath):
    ''' Convert output from GPT to MIDI '''
    tokens = [t for t in tokens if t[0] != '<']
    tokens = [np.array([int(digit) for digit in t]) for t in tokens]
    token_arr = np.array(tokens)
    full_arr = np.zeros((len(token_arr), 88))
    full_arr[:, 27:63] = token_arr
    arr_to_midi(full_arr, savepath)

In [18]:
index_to_step = ['C', 'D-', 'D', 'E-', 'E', 'F', 'G-', 'G', 'A-', 'A', 'B-', 'B', ]

def pitch_index_to_pitch_str(pitch_index):
    pitch_index += 9
    return index_to_step[pitch_index % 12] + str(pitch_index // 12)

In [103]:
output_to_midi(output_tokens, 'test.mid')

### Save embeddings to file

In [19]:
Path('embeddings/gpt-chords/').mkdir(exist_ok=True)

In [20]:
embeddings = model.transformer.wte.weight
with open('embeddings/gpt-chords/embedding.tsv', 'w') as f:
    for row in tqdm(embeddings):
        f.write('\t'.join([str(col.item()) for col in row]) + "\n")

100%|███████████████████████████████████████████████████████████████████████████████████████████████| 6382/6382 [00:10<00:00, 605.80it/s]


In [21]:
with open('embeddings/gpt-chords/vocab.tsv', 'w') as f:
    for i in range(len(embeddings)):
        f.write(tokenizer.decode([i]) + '\n')