<a href="https://githubtocolab.com/minyeamer/groove_midi/blob/main/music_vae.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open in Colab"/></a>

# MusicVAE
- MIDI 파일을 TFRecord 파일로 변환합니다.
- Custom CONFIG_MAP을 생성합니다.
- [Magenta 사용법](https://github.com/magenta/magenta/tree/main/magenta/models/music_vae#how-to-use)을 참고하여 `music_vae_train.py` 스크립트를 수행해 학습합니다.
- [Magenta 사용법](https://github.com/magenta/magenta/tree/main/magenta/models/music_vae#how-to-use)을 참고하여 `music_vae_generate.py` 스크립트를 수행해 학습합니다.
- 개별적으로 알아보고 싶은 함수들을 실행하여 반환값을 확인합니다.

In [None]:
from IPython.display import clear_output
!pip install magenta==2.1.4 # latest
clear_output()

In [None]:
from google.colab import drive
from pathlib import Path
import os

drive.mount("/content/drive")
root_path = Path("/content/drive/My Drive/groove_midi")
root_path.mkdir(parents=True, exist_ok=True)
os.chdir(root_path)

Mounted at /content/drive


## Load Data

In [None]:
from zipfile import ZipFile
import urllib.request

url = "https://storage.googleapis.com/magentadata/datasets/groove/groove-v1.0.0-midionly.zip"
data_path = root_path/'data'
data_path.mkdir(parents=True, exist_ok=True)
zip_file = data_path/url.split('/')[-1]
urllib.request.urlretrieve(url, zip_file)
ZipFile(zip_file).extractall(data_path)

In [None]:
midi_path = data_path/'groove'
print(list(os.walk(midi_path))[0][1:])

(['drummer3', 'drummer5', 'drummer4', 'drummer2', 'drummer7', 'drummer6', 'drummer10', 'drummer1', 'drummer8', 'drummer9'], ['README', 'info.csv', 'Icon\r', 'LICENSE'])


In [None]:
import pandas as pd

info = pd.read_csv(midi_path/'info.csv')
info.head()

Unnamed: 0,drummer,session,id,style,bpm,beat_type,time_signature,midi_filename,audio_filename,duration,split
0,drummer1,drummer1/eval_session,drummer1/eval_session/1,funk/groove1,138,beat,4-4,drummer1/eval_session/1_funk-groove1_138_beat_...,drummer1/eval_session/1_funk-groove1_138_beat_...,27.872308,test
1,drummer1,drummer1/eval_session,drummer1/eval_session/10,soul/groove10,102,beat,4-4,drummer1/eval_session/10_soul-groove10_102_bea...,drummer1/eval_session/10_soul-groove10_102_bea...,37.691158,test
2,drummer1,drummer1/eval_session,drummer1/eval_session/2,funk/groove2,105,beat,4-4,drummer1/eval_session/2_funk-groove2_105_beat_...,drummer1/eval_session/2_funk-groove2_105_beat_...,36.351218,test
3,drummer1,drummer1/eval_session,drummer1/eval_session/3,soul/groove3,86,beat,4-4,drummer1/eval_session/3_soul-groove3_86_beat_4...,drummer1/eval_session/3_soul-groove3_86_beat_4...,44.716543,test
4,drummer1,drummer1/eval_session,drummer1/eval_session/4,soul/groove4,80,beat,4-4,drummer1/eval_session/4_soul-groove4_80_beat_4...,drummer1/eval_session/4_soul-groove4_80_beat_4...,47.9875,test


## MIDI to TFRecord

In [None]:
import warnings
warnings.filterwarnings("ignore")

from magenta.scripts.convert_dir_to_note_sequences import convert_directory

tfrec_file = data_path/'midi.tfrecord'
convert_directory(midi_path.__str__(), tfrec_file.__str__(), recursive=True)
clear_output()

In [None]:
!git clone https://github.com/magenta/magenta

Cloning into 'magenta'...
remote: Enumerating objects: 15877, done.[K
remote: Counting objects: 100% (30/30), done.[K
remote: Compressing objects: 100% (23/23), done.[K
remote: Total 15877 (delta 7), reused 21 (delta 5), pack-reused 15847[K
Receiving objects: 100% (15877/15877), 36.41 MiB | 10.66 MiB/s, done.
Resolving deltas: 100% (12063/12063), done.
Checking out files: 100% (491/491), done.


## Custom Config

In [None]:
custom_config = f"""
CONFIG_MAP['hierdec-drums_4bar_small'] = Config(
    model=MusicVAE(
        lstm_models.BidirectionalLstmEncoder(),
        lstm_models.HierarchicalLstmDecoder(
            lstm_models.CategoricalLstmDecoder(),
            level_lengths=[16, 4],
            disable_autoregression=True)),
    hparams=merge_hparams(
        lstm_models.get_default_hparams(),
        HParams(
            batch_size=512,
            max_seq_len=64,  # 4 bars w/ 16 steps per bar
            z_size=256,
            enc_rnn_size=[512, 512],
            dec_rnn_size=[256, 256],
            free_bits=48,
            max_beta=0.2,
            sampling_schedule='inverse_sigmoid',
            sampling_rate=1000,
        )),
    note_sequence_augmenter=None,
    data_converter=data.DrumsConverter(
        max_bars=100,  # Truncate long drum sequences before slicing.
        slice_bars=4,
        steps_per_quarter=4,
        roll_input=True),
    train_examples_path='{tfrec_file.__str__()}',
)"""

In [None]:
config_file = root_path/"magenta/magenta/models/music_vae/configs.py"

with open(config_file, 'a') as file:
    file.write(custom_config)

In [None]:
!pip install -e magenta
clear_output()

## Train

In [None]:
!python3 magenta/magenta/models/music_vae/music_vae_train.py \
  --config=hierdec-drums_4bar_small \
  --run_dir=saved/checkpoints/drums_4bar \
  --num_steps=10 \
  --mode=train

Import requested from: 'numba.decorators', please update to use 'numba.core.decorators' or pin to Numba version 0.48.0. This alias will not be present in Numba version 0.50.0.
  from numba.decorators import jit as optional_jit
Import of 'jit' requested from: 'numba.decorators', please update to use 'numba.core.decorators' or pin to Numba version 0.48.0. This alias will not be present in Numba version 0.50.0.
  from numba.decorators import jit as optional_jit
  _resample_loop_p(x, t_out, interp_win, interp_delta, num_table, scale, y)
Instructions for updating:
non-resource variables are not supported in the long term
2022-08-20 10:03:57.032396: E tensorflow/stream_executor/cuda/cuda_driver.cc:271] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
INFO:tensorflow:Building MusicVAE model with BidirectionalLstmEncoder, HierarchicalLstmDecoder, and hparams:
{'max_seq_len': 64, 'z_size': 256, 'free_bits': 48, 'max_beta': 0.2, 'beta_rate': 0.0, 'batch_size': 512,

## Generate

In [None]:
!python3 magenta/magenta/models/music_vae/music_vae_generate.py \
  --config=hierdec-drums_4bar_small \
  --checkpoint_file=saved/checkpoints/drums_4bar/train/model.ckpt-10 \
  --mode=sample \
  --num_outputs=5 \
  --output_dir=generated

Import requested from: 'numba.decorators', please update to use 'numba.core.decorators' or pin to Numba version 0.48.0. This alias will not be present in Numba version 0.50.0.
  from numba.decorators import jit as optional_jit
Import of 'jit' requested from: 'numba.decorators', please update to use 'numba.core.decorators' or pin to Numba version 0.48.0. This alias will not be present in Numba version 0.50.0.
  from numba.decorators import jit as optional_jit
  _resample_loop_p(x, t_out, interp_win, interp_delta, num_table, scale, y)
Instructions for updating:
non-resource variables are not supported in the long term
2022-08-20 10:23:34.755883: E tensorflow/stream_executor/cuda/cuda_driver.cc:271] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
INFO:tensorflow:Loading model...
I0820 10:23:34.760323 139780004046720 music_vae_generate.py:149] Loading model...
INFO:tensorflow:Building MusicVAE model with BidirectionalLstmEncoder, HierarchicalLstmDecoder, and

## Debug

In [None]:
import collections
from magenta.common import merge_hparams
from magenta.contrib import training as contrib_training
from magenta.models.music_vae import configs
from magenta.models.music_vae import data
from magenta.models.music_vae import lstm_models
from magenta.models.music_vae import music_vae_train
from magenta.models.music_vae.base_model import MusicVAE
from magenta.models.music_vae.trained_model import TrainedModel
import tensorflow.compat.v1 as tf
import note_seq

In [None]:
HParams = contrib_training.HParams

class Config(collections.namedtuple(
    'Config',
    ['model', 'hparams', 'note_sequence_augmenter', 'data_converter',
     'train_examples_path', 'eval_examples_path', 'tfds_name'])):

  def values(self):
    return self._asdict()

Config.__new__.__defaults__ = (None,) * len(Config._fields)

CONFIG_MAP = configs.CONFIG_MAP

CONFIG_MAP['hierdec-drums_4bar_small'] = Config(
    model=MusicVAE(
        lstm_models.BidirectionalLstmEncoder(),
        lstm_models.HierarchicalLstmDecoder(
            lstm_models.CategoricalLstmDecoder(),
            level_lengths=[16, 4],
            disable_autoregression=True)),
    hparams=merge_hparams(
        lstm_models.get_default_hparams(),
        HParams(
            batch_size=512,
            max_seq_len=64,  # 4 bars w/ 16 steps per bar
            z_size=256,
            enc_rnn_size=[512, 512],
            dec_rnn_size=[256, 256],
            free_bits=48,
            max_beta=0.2,
            sampling_schedule='inverse_sigmoid',
            sampling_rate=1000,
        )),
    note_sequence_augmenter=None,
    data_converter=data.DrumsConverter(
        max_bars=100,  # Truncate long drum sequences before slicing.
        slice_bars=4,
        steps_per_quarter=4,
        roll_input=True),
    train_examples_path=tfrec_file.__str__(),
)

### Print Input Tensors

In [None]:
tf_file_reader = tf.data.TFRecordDataset
config = CONFIG_MAP['hierdec-drums_4bar_small']

def dataset_fn():
    return data.get_dataset(
        config,
        tf_file_reader=tf_file_reader,
        is_training=True)

input_tensors = music_vae_train._get_input_tensors(dataset_fn(), config)

In [None]:
input_tensors.keys()

dict_keys(['input_sequence', 'output_sequence', 'control_sequence', 'sequence_length'])

In [None]:
print(input_tensors['input_sequence'].shape)
print(input_tensors['input_sequence'][0])

(512, 64, 10)
tf.Tensor(
[[ True False  True False False False False False False False]
 [False False False False False False False False False  True]
 [False False  True False False False False False False False]
 [False False False False False False False False False  True]
 [ True  True False False False False False False False False]
 [False False  True False False False False False False False]
 [False False  True False False False False False False False]
 [False False False False False False False False False  True]
 [ True False False False False False False False False False]
 [False False  True False False False False False False False]
 [False  True False False False False False False False False]
 [False False False False False False False False False  True]
 [ True False False  True False False False False False False]
 [False  True False False False False False False False False]
 [False  True False False False False False False False False]
 [False False False False Fals

In [None]:
print(input_tensors['output_sequence'].shape)
print(input_tensors['output_sequence'][0][0])

(512, 64, 512)
tf.Tensor(
[False False False False False  True False False False False False False
 False False False False False False False False False False False False
 False False False False False False False False False False False False
 False False False False False False False False False False False False
 False False False False False False False False False False False False
 False False False False False False False False False False False False
 False False False False False False False False False False False False
 False False False False False False False False False False False False
 False False False False False False False False False False False False
 False False False False False False False False False False False False
 False False False False False False False False False False False False
 False False False False False False False False False False False False
 False False False False False False False False False False False False
 False False False False 

In [None]:
print(input_tensors['control_sequence'])

None


In [None]:
print(input_tensors['sequence_length'])

tf.Tensor(
[64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64
 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64
 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64
 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64
 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64
 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64
 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64
 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64
 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64
 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64
 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64
 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64
 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64 64
 64 64 64 64 64 64 64 64 64 64 64 64 64 

### Print Encoder Output

In [None]:
model = config.model
model.build(config.hparams,
            config.data_converter.output_depth,
            is_training=True)
print(model._encoder)
print(model._decoder)

<magenta.models.music_vae.lstm_models.BidirectionalLstmEncoder object at 0x7f0f93900450>
<magenta.models.music_vae.lstm_models.HierarchicalLstmDecoder object at 0x7f0f9390ac50>


In [None]:
def encode(sequence, sequence_length, control_sequence=None):

    hparams = config.hparams
    z_size = hparams.z_size

    sequence = tf.to_float(sequence)
    if control_sequence is not None:
        control_sequence = tf.to_float(control_sequence)
        sequence = tf.concat([sequence, control_sequence], axis=-1)
    encoder_output = model.encoder.encode(sequence, sequence_length)

    mu = tf.layers.dense(
        encoder_output,
        z_size,
        name='encoder/mu',
        kernel_initializer=tf.random_normal_initializer(stddev=0.001))
    sigma = tf.layers.dense(
        encoder_output,
        z_size,
        activation=tf.nn.softplus,
        name='encoder/sigma',
        kernel_initializer=tf.random_normal_initializer(stddev=0.001))

    return mu, sigma

In [None]:
input_sequence = input_tensors['input_sequence']
sequence_length = input_tensors['sequence_length']
mu, sigma = encode(input_sequence, sequence_length)

In [None]:
mu

<tf.Tensor: shape=(512, 256), dtype=float32, numpy=
array([[ 3.5985588e-04,  3.8989724e-04, -1.6418722e-05, ...,
        -6.8864450e-05,  1.0071536e-04,  4.9026031e-04],
       [ 8.6927379e-04,  5.5203756e-04, -2.2198050e-04, ...,
         1.7228586e-04, -5.6761462e-04, -5.9401459e-04],
       [ 3.4629149e-04,  2.4729094e-04,  6.9202331e-05, ...,
         1.9021984e-04,  4.2038097e-05,  3.0057502e-04],
       ...,
       [ 6.5475103e-04,  4.4984886e-04, -2.8081797e-04, ...,
        -3.6981446e-04, -2.6055065e-04,  5.4954039e-04],
       [ 5.8174646e-04,  4.9551873e-04,  5.5557757e-07, ...,
         6.3289778e-04, -7.1067177e-04, -1.1163799e-03],
       [ 2.0881589e-04,  3.7205539e-04, -1.8555974e-04, ...,
        -7.9960830e-04, -8.8741077e-04,  1.4579445e-03]], dtype=float32)>

In [None]:
sigma

<tf.Tensor: shape=(512, 256), dtype=float32, numpy=
array([[0.6930088 , 0.69302344, 0.69288254, ..., 0.6932787 , 0.6929438 ,
        0.69354165],
       [0.692941  , 0.6932618 , 0.6928269 , ..., 0.69337744, 0.6930389 ,
        0.6931819 ],
       [0.69289094, 0.69286317, 0.69311476, ..., 0.6930538 , 0.69307935,
        0.69363165],
       ...,
       [0.6929597 , 0.69288903, 0.6928437 , ..., 0.69341576, 0.69294107,
        0.6936339 ],
       [0.6930789 , 0.693032  , 0.69323415, ..., 0.6928626 , 0.6933422 ,
        0.69322896],
       [0.69316   , 0.692927  , 0.69258964, ..., 0.693778  , 0.6930011 ,
        0.69339   ]], dtype=float32)>

### Plot MusicVAE Output

In [None]:
checkpoint_file = root_path/"saved/checkpoints/drums_4bar/train/model.ckpt-10"
num_outputs = 5

config.data_converter.max_tensors_per_item = None
checkpoint_dir_or_path = os.path.expanduser(checkpoint_file)

model = TrainedModel(
    config, batch_size=num_outputs,
    checkpoint_dir_or_path=checkpoint_dir_or_path)

In [None]:
temperature = 0.5
samples = model.sample(n=5, length=64, temperature=temperature)

for sequence in samples:
    note_seq.plot_sequence(sequence)