In [26]:
# I followed the instructions on the MIDI-BERT GitHub repository 
# for setting up the model and loading the weights from the pretraining checkpoint.
# Then after investigating the model parameters I converted it 
# into a sequence classification model compatible with run_glue.py

!git clone https://github.com/wazenmai/MIDI-BERT.git
%cd MIDI-BERT
!pip install -r requirements.txt
%cd MidiBERT
%cd CP


Cloning into 'MIDI-BERT'...
remote: Enumerating objects: 774, done.[K
remote: Counting objects: 100% (167/167), done.[K
remote: Compressing objects: 100% (135/135), done.[K
remote: Total 774 (delta 78), reused 81 (delta 32), pack-reused 607[K
Receiving objects: 100% (774/774), 75.34 MiB | 7.86 MiB/s, done.
Resolving deltas: 100% (392/392), done.
Checking out files: 100% (153/153), done.
/content/MIDI-BERT/MidiBERT/CP/MIDI-BERT/MidiBERT/CP/MIDI-BERT/MidiBERT/CP/MIDI-BERT
/content/MIDI-BERT/MidiBERT/CP/MIDI-BERT/MidiBERT/CP/MIDI-BERT/MidiBERT/CP/MIDI-BERT/MidiBERT
/content/MIDI-BERT/MidiBERT/CP/MIDI-BERT/MidiBERT/CP/MIDI-BERT/MidiBERT/CP/MIDI-BERT/MidiBERT/CP


In [27]:
from google.colab import drive
drive.mount('/content/drive')
!cp /content/drive/MyDrive/Colab\ Data/Music/pretrain_model.ckpt .

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [28]:
import pickle
import os
import random

from torch.utils.data import DataLoader
import torch
from transformers import BertConfig, BertForSequenceClassification

from model import MidiBert
from finetune_trainer import FinetuneTrainer
from finetune_dataset import FinetuneDataset

seed = 2021
torch.manual_seed(seed)             # cpu
    
torch.cuda.manual_seed(seed)        # current gpu
torch.cuda.manual_seed_all(seed)    # all gpu
random.seed(seed)

print("Loading Dictionary")
with open('../../dict/CP.pkl', 'rb') as f:
    e2w, w2e = pickle.load(f)

print("\nBuilding BERT model")
configuration = BertConfig(max_position_embeddings=512,
                                position_embedding_type='relative_key_query',
                                hidden_size=768)

midibert = MidiBert(bertConfig=configuration, e2w=e2w, w2e=w2e)
checkpoint = torch.load('pretrain_model.ckpt')#, map_location='cpu')
midibert.load_state_dict(checkpoint['state_dict'])

Loading Dictionary

Building BERT model


<All keys matched successfully>

In [29]:
# Display model parameters
for name, para in midibert.named_parameters():
    print('{}: {}'.format(name, para.shape))

bert.embeddings.word_embeddings.weight: torch.Size([30522, 768])
bert.embeddings.position_embeddings.weight: torch.Size([512, 768])
bert.embeddings.token_type_embeddings.weight: torch.Size([2, 768])
bert.embeddings.LayerNorm.weight: torch.Size([768])
bert.embeddings.LayerNorm.bias: torch.Size([768])
bert.encoder.layer.0.attention.self.query.weight: torch.Size([768, 768])
bert.encoder.layer.0.attention.self.query.bias: torch.Size([768])
bert.encoder.layer.0.attention.self.key.weight: torch.Size([768, 768])
bert.encoder.layer.0.attention.self.key.bias: torch.Size([768])
bert.encoder.layer.0.attention.self.value.weight: torch.Size([768, 768])
bert.encoder.layer.0.attention.self.value.bias: torch.Size([768])
bert.encoder.layer.0.attention.self.distance_embedding.weight: torch.Size([1023, 64])
bert.encoder.layer.0.attention.output.dense.weight: torch.Size([768, 768])
bert.encoder.layer.0.attention.output.dense.bias: torch.Size([768])
bert.encoder.layer.0.attention.output.LayerNorm.weight: t

In [36]:
torch.save(midibert.state_dict(), "/oldModel") # save as pytorch_model.bin

# convert to sequence classification model
config = BertConfig(max_position_embeddings=512, position_embedding_type='relative_key_query', hidden_size=768)
model = BertForSequenceClassification.from_pretrained("/oldModel", config=config)

# Display model parameters
for name, para in midibert.named_parameters():
    print('{}: {}'.format(name, para.shape))

Some weights of the model checkpoint at /oldModel were not used when initializing BertForSequenceClassification: ['word_emb.1.lut.weight', 'word_emb.2.lut.weight', 'in_linear.weight', 'in_linear.bias', 'word_emb.3.lut.weight', 'word_emb.0.lut.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /oldModel and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it f

In [39]:
# quickly check that the weights transferred properly by comparing the input embedding layer
for name, para in model.named_parameters():
    print('{}: {}'.format(name, para.shape))
    print(para)
    break
for name, para in midibert.named_parameters():
    print('{}: {}'.format(name, para.shape))
    print(para)
    break

bert.embeddings.word_embeddings.weight: torch.Size([30522, 768])
Parameter containing:
tensor([[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0026,  0.0093,  0.0173,  ..., -0.0044,  0.0002, -0.0008],
        [-0.0297,  0.0087, -0.0348,  ...,  0.0274, -0.0368,  0.0207],
        ...,
        [ 0.0083, -0.0441,  0.0109,  ...,  0.0121, -0.0516,  0.0148],
        [-0.0381,  0.0194,  0.0050,  ...,  0.0062, -0.0129,  0.0059],
        [ 0.0405,  0.0071,  0.0143,  ..., -0.0107,  0.0205, -0.0196]],
       requires_grad=True)
bert.embeddings.word_embeddings.weight: torch.Size([30522, 768])
Parameter containing:
tensor([[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0026,  0.0093,  0.0173,  ..., -0.0044,  0.0002, -0.0008],
        [-0.0297,  0.0087, -0.0348,  ...,  0.0274, -0.0368,  0.0207],
        ...,
        [ 0.0083, -0.0441,  0.0109,  ...,  0.0121, -0.0516,  0.0148],
        [-0.0381,  0.0194,  0.0050,  ...,  0.0062, -0.0129,  0.0059],
  

In [38]:
model.save_pretrained("/newModel")
model.save_pretrained("/content/drive/MyDrive/Colab Data/MusicBuild/")