In [14]:
import copy
import pickle
import multiprocessing

import torch
import torch.optim as optim
import torchvision.transforms as T
from torch.utils.data import DataLoader

from util.helpers import *
from util.run import train, validate
from util.sample import sample
from util.dataset import HaydnDataset, ChunksDataset
from util.models import PitchEmbedModel, HarmonyModel, JudgeModel, NoteModel

from music21 import converter

%load_ext autoreload
%autoreload 2

if torch.cuda.is_available():
    device = torch.device("cuda")
    torch.set_default_tensor_type('torch.cuda.FloatTensor')
else:
    device = torch.device("cpu")
    
# number of instrument parts
NUM_PARTS = 4

WARN: test mode is ON.
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [15]:
# SETUP DATA LOADER

SEQ_LEN = 32
STRIDE = 1
BATCH_SIZE = {
    "train": 64,
    "val": 64
}
LOADER_PARAMS = {
    "shuffle": True,
    "num_workers": multiprocessing.cpu_count() - 2
}
TRANSFORMS = []

SKIP_DATA = False

if not SKIP_DATA:
    haydn_dataset = HaydnDataset()

    data_train = ChunksDataset(seq_len=SEQ_LEN, 
                               stride=STRIDE, 
                               dataset=haydn_dataset,
                               transforms=TRANSFORMS)
    data_val = ChunksDataset(dataset=data_train.comp_set,
                             transforms=TRANSFORMS)

    loader_train = DataLoader(data_train,
                              batch_size=BATCH_SIZE["train"],
                              **LOADER_PARAMS)
    loader_val = DataLoader(data_val,
                            batch_size=BATCH_SIZE["val"],
                            **LOADER_PARAMS)

Building dataset...
Serialized scores found, loading...
Scores loaded in 1.21 seconds.


In [3]:
# HYPERPARAMETERS

# number of epochs to run
NUM_EPOCHS = 1
# number of dimensions for the embedded pitch vectors
EMBED_DIM = 5
# dimension of the rhythm
RHYTHM_DIM = 1
# the total number of pitches plus rest
PITCH_VOCAB_SIZE = 140
# parameters for the optimizers
OPTIM_PARAMS = {
    "lr": 1e-2,
    "weight_decay": 1e-5
}

# weights applied to each of the loss functions
# forward pitch
fp_loss = 1.0
# backward pitch
bp_loss = 1.0
# harmony pitch
hp_loss = 1.0
# foward rhythm
fr_loss = 1.0
# judge
j_loss = 1.0
# part
p_loss = 1.0
LOSS_WEIGHTS = [fp_loss, bp_loss, hp_loss, fr_loss, j_loss, p_loss]

In [16]:
# MODELS AND OPTIMIZERS

SKIP_MODELS = False

if not SKIP_MODELS:
    model_names = ["forward_", "backward_", "harmony_", "judge_"]

    models = {
        "pitch_embed": PitchEmbedModel(vocab_size=PITCH_VOCAB_SIZE,
                                       embed_dim=EMBED_DIM)
    }
    optims = {}

    for i in range(NUM_PARTS):
        note_input_dim = EMBED_DIM + RHYTHM_DIM
        note_hidden_dim = 64
        note_num_layers = 1
        models[model_names[0] + str(i)] = NoteModel(note_input_dim, 
                                                    note_hidden_dim,
                                                    batch_size=BATCH_SIZE['train'],
                                                    num_layers=note_num_layers,
                                                    vocab_size=PITCH_VOCAB_SIZE)

        models[model_names[1] + str(i)] = NoteModel(note_input_dim, 
                                                    note_hidden_dim,
                                                    batch_size=BATCH_SIZE['train'],
                                                    num_layers=note_num_layers,
                                                    vocab_size=PITCH_VOCAB_SIZE)


        harmony_input_shape = (NUM_PARTS, EMBED_DIM + NUM_PARTS)
        harmony_hidden_dim = 4
        models[model_names[2] + str(i)] = HarmonyModel(input_shape=harmony_input_shape,
                                                       vocab_size=PITCH_VOCAB_SIZE,
                                                       hidden_dim=harmony_hidden_dim)


        judge_input_shape = (NUM_PARTS - 1, EMBED_DIM)
        judge_hidden_dim = 64
        output_dim = PITCH_VOCAB_SIZE
        models[model_names[3] + str(i)] = JudgeModel(judge_input_shape,
                                                     judge_hidden_dim,
                                                     output_dim)

        # jointly optimize all of the params, so weights can be assigned to different loss.
        embed_params = list(models["pitch_embed"].parameters())
        forward_params = list(models[model_names[0] + str(i)].parameters())
        backward_params = list(models[model_names[1] + str(i)].parameters())
        harmony_params = list(models[model_names[2] + str(i)].parameters())
        judge_params = list(models[model_names[3] + str(i)].parameters())
        optims[i] = optim.Adam(forward_params + backward_params +
                               harmony_params + judge_params, 
                               **OPTIM_PARAMS)

    # send all models to the appropriate device
    for key in models:
        models[key].to(device=device)

In [None]:
# TRAIN LOOP

SKIP_TRAIN = False

if not SKIP_TRAIN:
    train_stats = []
    val_stats = []
    saved_models = []

    for epoch in range(NUM_EPOCHS):
        print("EPOCH {}".format(epoch))
        print("-----------")
        stats, models = train(models, optims, loader_train, 
                              model_names=model_names, 
                              loss_weights=LOSS_WEIGHTS,
                              device=device,
                              print_iter=100)

        stats, models = validate(models, loader_val,
                                 model_names=model_names,
                                 device=device,
                                 print_iter=100)

        print("-----------")
        print("Completed epoch {}.".format(epoch))
        print("")
        train_stats.append(stats)
        val_stats.append(stats)
        saved_models.append(copy.deepcopy(models))


    print("Training completed! Saving files.")

    # create a folder to store all of the stats and models
    mkdir(OUTPUT_PATH)
    stats_file_name = get_formatted_time() + "_" + get_unique_id() + ".stat"
    stats_file_path = OUTPUT_PATH + "/" + stats_file_name
    models_file_name = get_formatted_time() + "_" + get_unique_id() + ".models"
    models_file_path = OUTPUT_PATH + "/" + models_file_name

    with open(stats_file_path, "wb") as file:
        pickle.dump((train_stats, val_stats), file)
    with open(models_file_path, "wb") as file:
        pickle.dump(saved_models, file)

EPOCH 0
-----------
Train iter 0/1071:
	Part 1 - fp_loss: 1.75866/56.00%, bp_loss: 2.82775/25.00%, hp_loss: 3.00447/23.00%, j_loss: 2.33710/48.00%, 
		fr_loss: 0.27260/71.00%, p_loss: 0.00045/100.00%, 
		total weighted loss: 10.20103
	Training time elapsed: 0.13 seconds

	Part 2 - fp_loss: 1.19426/68.00%, bp_loss: 2.31006/46.00%, hp_loss: 2.26373/46.00%, j_loss: 1.83701/70.00%, 
		fr_loss: 0.27308/71.00%, p_loss: 0.00025/100.00%, 
		total weighted loss: 7.87840
	Training time elapsed: 0.21 seconds

	Part 3 - fp_loss: 1.19386/79.00%, bp_loss: 2.87863/34.00%, hp_loss: 2.12005/50.00%, j_loss: 1.11830/76.00%, 
		fr_loss: 0.22638/78.00%, p_loss: 0.00012/100.00%, 
		total weighted loss: 7.53733
	Training time elapsed: 0.29 seconds

	Part 4 - fp_loss: 0.87030/81.00%, bp_loss: 2.83120/25.00%, hp_loss: 2.05300/50.00%, j_loss: 0.80593/79.00%, 
		fr_loss: 0.22876/78.00%, p_loss: 0.00035/100.00%, 
		total weighted loss: 6.78954
	Training time elapsed: 0.36 seconds

Train iter 100/1071:
	Part 1 - f

	Part 4 - fp_loss: 0.87103/79.00%, bp_loss: 2.16201/28.00%, hp_loss: 1.73984/67.00%, j_loss: 0.86546/79.00%, 
		fr_loss: 0.04866/95.00%, p_loss: 0.00004/100.00%, 
		total weighted loss: 5.68704
	Training time elapsed: 195.88 seconds



In [None]:
# SAMPLING

if not "models" in vars() or models is None or len(models) < 2:
    mkdir(OUTPUT_PATH)
    # file location, update to point to the correct file
    MODELS_FILE_PATH = OUTPUT_PATH + "/12-09_03-06-47_04D328.models"
    # which epoch's models to use
    NTH_EPOCH = 0
    
    with open(MODELS_FILE_PATH, "rb") as file:
        saved_models = pickle.load(file)
        models = saved_models[NTH_EPOCH]
        
# how many ticks to sample, 16 ticks ~ 1 measure of music
NUM_TICKS_TO_SAMPLE = 256
# number of iterations to repeat the sampling process, one iteration
# will run for NUM_PARTS * NUM_TICKS_TO_SAMPLE times.
NUM_REPEATS = 1
        
output = sample(models, 
                num_parts=NUM_PARTS,
                num_ticks=NUM_TICKS_TO_SAMPLE,
                num_dims=PITCH_VOCAB_SIZE,
                seq_len=SEQ_LEN,
                num_repeats=NUM_REPEATS)

In [None]:
score = haydn_dataset.matrix_to_score(output)

SAVING = True

if SAVING:
    file_name = get_unique_id() + "_" + get_formatted_time() + ".pgz"
    mkdir(SAMPLE_PATH)
    output_path = SAMPLE_PATH + "/" + file_name
    converter.freeze(score, fp=output_path)
    
# converter.thaw(output_path)