In [1]:
from model import MusicTransformer
import custom
from custom.metrics import *
from custom.criterion import SmoothCrossEntropyLoss, CustomSchedule
from custom.config import config
from data import Data
from midi_processor.processor import encode_midi, decode_midi
from extra import *

import os
from preprocess import preprocess_midi_files_under
from progress.bar import Bar
import pickle

import utils
import datetime
import time

import torch
import torch.optim as optim
from tensorboardX import SummaryWriter


In [8]:
midi_folder = os.path.join('dataset', 'midi')
preprocess_folder = os.path.join('dataset', 'preprocess')

preprocess_midi_files_under(midi_folder, preprocess_folder)

 [dataset\midi\MIDI-UNPROCESSED_01-03_R1_2014_MID--AUDIO_01_R1_2014_wav--1.midi]

 [dataset\midi\MIDI-UNPROCESSED_01-03_R1_2014_MID--AUDIO_01_R1_2014_wav--2.midi] [dataset\midi\MIDI-UNPROCESSED_01-03_R1_2014_MID--AUDIO_01_R1_2014_wav--3.midi] [dataset\midi\MIDI-UNPROCESSED_01-03_R1_2014_MID--AUDIO_01_R1_2014_wav--5.midi] [dataset\midi\MIDI-UNPROCESSED_01-03_R1_2014_MID--AUDIO_02_R1_2014_wav--1.midi] [dataset\midi\MIDI-UNPROCESSED_01-03_R1_2014_MID--AUDIO_02_R1_2014_wav--2.midi] [dataset\midi\MIDI-UNPROCESSED_01-03_R1_2014_MID--AUDIO_02_R1_2014_wav--4.midi] [dataset\midi\MIDI-UNPROCESSED_01-03_R1_2014_MID--AUDIO_02_R1_2014_wav--5.midi] [dataset\midi\MIDI-UNPROCESSED_01-03_R1_2014_MID--AUDIO_03_R1_2014_wav--2.midi] [dataset\midi\MIDI-UNPROCESSED_01-03_R1_2014_MID--AUDIO_03_R1_2014_wav--3.midi] [dataset\midi\MIDI-UNPROCESSED_01-03_R1_2014_MID--AUDIO_03_R1_2014_wav--4.midi] [dataset\midi\MIDI-UNPROCESSED_01-03_R1_2014_MID--AUDIO_03_R1_2014_wav--5.midi] [dataset\midi\MIDI-UNPROCESSED_01-03_R1_2014_MID--AUDIO_03_R1_2014_wav--6.midi] [dataset\midi\MIDI-Unprocessed_01_R1_20

In [10]:
get_config(config, ["config/thor_basic.yml"])

CONFIG_FILE_NAME = save.yml
batch_size = 8
debug = true
device = cuda
dropout = 0.1
embedding_dim = 128
epochs = 500
event_dim = 388
experiment = embedding256-layer6
fp16 = None
l_r = 0.01
label_smooth = 0.1
load_path = None
max_seq = 256
num_layers = 6
pad_token = 388
pickle_dir = dataset/preprocess
token_eos = 390
token_sos = 389
vocab_size = 391

In [11]:
# load data
dataset = Data(config.pickle_dir)
print(dataset)

<class Data has "1008" files>


In [None]:
# load data
dataset = Data(config.pickle_dir)
print(dataset)


# load model
learning_rate = config.l_r

# define model
mt = load_model('models/accuracy-0.018-epoch-3,pth', config, new=True)

mt.to(config.device)
opt = optim.Adam(mt.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9)
scheduler = CustomSchedule(config.embedding_dim, optimizer=opt)

# multi-GPU set
if torch.cuda.device_count() > 1:
    single_mt = mt
    mt = torch.nn.DataParallel(mt, output_device=torch.cuda.device_count()-1)
else:
    single_mt = mt

# init metric set
metric_set = MetricsSet({
    'accuracy': CategoricalAccuracy(),
    'loss': SmoothCrossEntropyLoss(config.label_smooth, config.vocab_size, config.pad_token),
    'bucket':  LogitsBucketting(config.vocab_size)
})

print(mt)
print('| Summary - Device Info : {}'.format(torch.cuda.device))

# define tensorboard writer
current_time = datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
train_log_dir = 'logs/'+config.experiment+'/'+current_time+'/train'
eval_log_dir = 'logs/'+config.experiment+'/'+current_time+'/eval'

train_summary_writer = SummaryWriter(train_log_dir)
eval_summary_writer = SummaryWriter(eval_log_dir)

best_accuracy = 0

# Train Start
print(">> Train start...")
idx = 0
for e in range(config.epochs):
    print(">>> [Epoch was updated]")
    for b in range(len(dataset.files) // config.batch_size):
        scheduler.optimizer.zero_grad()
        try:
            batch_x, batch_y = dataset.slide_seq2seq_batch(config.batch_size, config.max_seq)
            batch_x = torch.from_numpy(batch_x).contiguous().to(config.device, non_blocking=True, dtype=torch.int)
            batch_y = torch.from_numpy(batch_y).contiguous().to(config.device, non_blocking=True, dtype=torch.int)
        except IndexError:
            continue

        start_time = time.time()
        mt.train()
        sample = mt.forward(batch_x)
        metrics = metric_set(sample, batch_y)
        loss = metrics['loss']
        loss.backward()
        scheduler.step()
        end_time = time.time()

        if config.debug:
            print("[Loss]: {}".format(loss))

        train_summary_writer.add_scalar('loss', metrics['loss'], global_step=idx)
        train_summary_writer.add_scalar('accuracy', metrics['accuracy'], global_step=idx)
        train_summary_writer.add_scalar('learning_rate', scheduler.rate(), global_step=idx)
        train_summary_writer.add_scalar('iter_p_sec', end_time-start_time, global_step=idx)

        # result_metrics = metric_set(sample, batch_y)
        if b % 100 == 0:
            single_mt.eval()
            eval_x, eval_y = dataset.slide_seq2seq_batch(2, config.max_seq, 'eval')
            eval_x = torch.from_numpy(eval_x).contiguous().to(config.device, dtype=torch.int)
            eval_y = torch.from_numpy(eval_y).contiguous().to(config.device, dtype=torch.int)

            eval_prediction, weights = single_mt.forward(eval_x)

            eval_metrics = metric_set(eval_prediction, eval_y)

            if b == 0:
                train_summary_writer.add_histogram("target_analysis", batch_y, global_step=e)
                train_summary_writer.add_histogram("source_analysis", batch_x, global_step=e)
                for i, weight in enumerate(weights):
                    attn_log_name = "attn/layer-{}".format(i)
                    utils.attention_image_summary(
                        attn_log_name, weight, step=idx, writer=eval_summary_writer)

            eval_summary_writer.add_scalar('loss', eval_metrics['loss'], global_step=idx)
            eval_summary_writer.add_scalar('accuracy', eval_metrics['accuracy'], global_step=idx)
            eval_summary_writer.add_histogram("logits_bucket", eval_metrics['bucket'], global_step=idx)

            print('\n====================================================')
            print('Epoch/Batch: {}/{}'.format(e, b))
            print('Train >>>> Loss: {:6.6}, Accuracy: {}'.format(metrics['loss'], metrics['accuracy']))
            print('Eval >>>> Loss: {:6.6}, Accuracy: {}'.format(eval_metrics['loss'], eval_metrics['accuracy']))
            if eval_metrics['accuracy'] > best_accuracy:
                torch.save(single_mt.state_dict(), 'models/accuracy-{:.3f}-epoch-{}.pth'.format(eval_metrics['accuracy'], e))
                best_accuracy = eval_metrics['accuracy']
        torch.cuda.empty_cache()
        idx += 1

        # switch output device to: gpu-1 ~ gpu-n
        sw_start = time.time()
        if torch.cuda.device_count() > 1:
            mt.output_device = idx % (torch.cuda.device_count() -1) + 1
        sw_end = time.time()
        if config.debug:
            print('output switch time: {}'.format(sw_end - sw_start) )

torch.save(single_mt.state_dict(), 'models/final.pth'.format(idx))
eval_summary_writer.close()
train_summary_writer.close()

<class Data has "213" files>
MusicTransformer(
  (Decoder): Encoder(
    (embedding): Embedding(391, 128)
    (pos_encoding): DynamicPositionEmbedding()
    (enc_layers): ModuleList(
      (0-5): 6 x EncoderLayer(
        (rga): RelativeGlobalAttention(
          (Wq): Linear(in_features=128, out_features=128, bias=True)
          (Wk): Linear(in_features=128, out_features=128, bias=True)
          (Wv): Linear(in_features=128, out_features=128, bias=True)
          (fc): Linear(in_features=128, out_features=128, bias=True)
        )
        (FFN_pre): Linear(in_features=128, out_features=64, bias=True)
        (FFN_suf): Linear(in_features=64, out_features=128, bias=True)
        (layernorm1): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
        (layernorm2): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
        (dropout1): Dropout(p=0.1, inplace=False)
        (dropout2): Dropout(p=0.1, inplace=False)
      )
    )
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (

In [10]:
model_path = "models/train-0.14697265625.pth"
midi_path = "output"
dataset = Data(config.pickle_dir)
metric_set = MetricsSet({
    'accuracy': CategoricalAccuracy(),
    'loss': SmoothCrossEntropyLoss(config.label_smooth, config.vocab_size, config.pad_token),
    'bucket':  LogitsBucketting(config.vocab_size)
})
mt = load_model(model_path, config)
mt.to(config.device)
mt.train()
batch_x, batch_y = dataset.slide_seq2seq_batch(1, config.max_seq)
batch_x = torch.from_numpy(batch_x).contiguous().to(config.device, non_blocking=True, dtype=torch.int)
batch_y = torch.from_numpy(batch_y).contiguous().to(config.device, non_blocking=True, dtype=torch.int)
sample = mt.forward(batch_x)
metrics = metric_set(sample, batch_y)
metrics['bucket'].shape
output = torch.reshape(metrics['bucket'], (batch_x.shape))
np_arr = output.tolist()

decode_midi(np_arr[0], file_path=f"midi_output\\{midi_path}.midi")
decode_midi(batch_x[0].tolist(), file_path=f"midi_output\\{midi_path}-original.midi")

info removed pitch: 42
info removed pitch: 26
info removed pitch: 67
info removed pitch: 63
info removed pitch: 29
info removed pitch: 26
info removed pitch: 56
info removed pitch: 41
info removed pitch: 74
info removed pitch: 52
info removed pitch: 67
info removed pitch: 7
info removed pitch: 56
info removed pitch: 5
info removed pitch: 52
info removed pitch: 12
info removed pitch: 5
info removed pitch: 3
info removed pitch: 5
info removed pitch: 48
info removed pitch: 24
info removed pitch: 48
info removed pitch: 24
info removed pitch: 67
info removed pitch: 42
info removed pitch: 56
info removed pitch: 2
info removed pitch: 61
info removed pitch: 51
info removed pitch: 52
info removed pitch: 49
info removed pitch: 63


ValueError: data byte must be in range 0..127

In [7]:
batch_x.tolist()

[[124,
  196,
  231,
  51,
  190,
  240,
  67,
  176,
  241,
  79,
  176,
  238,
  43,
  178,
  237,
  55,
  189,
  236,
  48,
  176,
  139,
  236,
  51,
  189,
  242,
  77,
  176,
  240,
  65,
  178,
  131,
  237,
  43,
  186,
  139,
  237,
  51,
  136,
  235,
  48,
  192,
  243,
  63,
  242,
  75,
  176,
  131,
  237,
  43,
  176,
  143,
  233,
  55,
  186,
  139,
  237,
  51,
  176,
  136,
  236,
  48,
  196,
  242,
  74,
  242,
  62,
  176,
  131,
  239,
  43,
  177,
  143,
  234,
  55,
  183,
  155,
  167,
  165,
  153,
  151,
  163,
  139,
  136,
  143,
  177,
  236,
  51,
  176,
  239,
  48,
  176,
  131,
  176,
  162,
  150,
  190,
  242,
  72,
  237,
  55,
  241,
  60,
  176,
  234,
  43,
  182,
  139,
  235,
  51,
  179,
  136,
  231,
  48,
  192,
  240,
  31,
  182,
  160,
  143,
  148,
  131,
  139,
  136,
  119,
  185,
  240,
  50,
  239,
  47,
  194,
  243,
  86,
  239,
  43,
  176,
  240,
  74,
  177,
  236,
  55,
  182,
  162,
  231,
  74,
  177,
  135,
  240,
  47,
  1

In [14]:
batch_x.shape

torch.Size([8, 1024])

In [15]:
np_arr = output[0].tolist()
import pretty_midi
from midi_processor.processor import Event, _event_seq2snote_seq, _merge_note
event_sequence = [Event.from_int(idx) for idx in np_arr]
event_sequence
snote_seq = _event_seq2snote_seq(event_sequence)
note_seq = _merge_note(snote_seq)
note_seq.sort(key=lambda x:x.start)

mid = pretty_midi.PrettyMIDI()
# if want to change instument, see https://www.midi.org/specifications/item/gm-level-1-sound-set
instument = pretty_midi.Instrument(1, False, "Developed By Yang-Kichang")
instument.notes = note_seq

mid.instruments.append(instument)
mid.instruments

info removed pitch: 68
info removed pitch: 64
info removed pitch: 64
info removed pitch: 64
info removed pitch: 52
info removed pitch: 64
info removed pitch: 64
info removed pitch: 76
info removed pitch: 64
info removed pitch: 80
info removed pitch: 64
info removed pitch: 64
info removed pitch: 64
info removed pitch: 87
info removed pitch: 64
info removed pitch: 87
info removed pitch: 85


[Instrument(program=1, is_drum=False, name="Developed By Yang-Kichang")]