In [1]:
import sys,os,argparse
import csv
import shutil
import torch
import torch.nn as nn
import torchaudio
from torch.optim.lr_scheduler import LambdaLR
from torch.utils.data import DataLoader
from torch.optim import Adam

from dataset.e_piano import create_epiano_datasets, compute_epiano_accuracy

from model.music_transformer import MusicTransformer
from model.loss import SmoothCrossEntropyLoss

from utilities.constants import *
from utilities.device import get_device, use_cuda
from utilities.lr_scheduling import LrStepTracker, get_lr
from utilities.argument_funcs import parse_train_args, print_train_args, write_model_params
from utilities.run_model import train_epoch, eval_model

In [2]:
CSV_HEADER = ["Epoch", "Learn rate", "Avg Train loss", "Train Accuracy", "Avg Eval loss", "Eval accuracy"]

# Baseline is an untrained epoch that we evaluate as a baseline loss and accuracy
BASELINE_EPOCH = -1

# main
def main():
    """
    ----------
    Author: Damon Gwinn
    ----------
    Entry point. Trains a model specified by command line arguments
    ----------
    """

    args = parse_train_args()
    print_train_args(args)

    if(args.force_cpu):
        use_cuda(False)
        print("WARNING: Forced CPU usage, expect model to perform slower")
        print("")

    os.makedirs(args.output_dir, exist_ok=True)

    ##### Output prep #####
    params_file = os.path.join(args.output_dir, "model_params.txt")
    write_model_params(args, params_file)

    weights_folder = os.path.join(args.output_dir, "weights")
    os.makedirs(weights_folder, exist_ok=True)

    results_folder = os.path.join(args.output_dir, "results")
    os.makedirs(results_folder, exist_ok=True)

    results_file = os.path.join(results_folder, "results.csv")
    best_loss_file = os.path.join(results_folder, "best_loss_weights.pickle")
    best_acc_file = os.path.join(results_folder, "best_acc_weights.pickle")
    best_text = os.path.join(results_folder, "best_epochs.txt")

    ##### Tensorboard #####
    if(args.no_tensorboard):
        tensorboard_summary = None
    else:
        from torch.utils.tensorboard import SummaryWriter

        tensorboad_dir = os.path.join(args.output_dir, "tensorboard")
        tensorboard_summary = SummaryWriter(log_dir=tensorboad_dir)

    ##### Datasets #####
    # train_dataset, val_dataset, test_dataset = create_epiano_datasets(args.input_dir, args.max_sequence)
    
    train_dataset = torchaudio.datasets.MUSDB_HQ('dataset/', "train", 
                                     download=False, 
                                     sources=["mixture"], #["bass", "drums", "other", "mixture", "vocals"]
                                     split="train") 
    val_dataset = torchaudio.datasets.MUSDB_HQ('dataset/', "train", 
                                     download=False, 
                                     sources=["mixture"], #["bass", "drums", "other", "mixture", "vocals"]
                                     split="validation") 
    test_dataset = torchaudio.datasets.MUSDB_HQ('dataset/', "test", 
                                     download=False, 
                                     sources=["mixture"], #["bass", "drums", "other", "mixture", "vocals"]
                                     ) 
    
    train_loader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=args.n_workers, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=args.batch_size, num_workers=args.n_workers)
    test_loader = DataLoader(test_dataset, batch_size=args.batch_size, num_workers=args.n_workers)

    model = MusicTransformer(n_layers=args.n_layers, num_heads=args.num_heads,
                d_model=args.d_model, dim_feedforward=args.dim_feedforward, dropout=args.dropout,
                max_sequence=args.max_sequence, rpr=args.rpr).to(get_device())

    ##### Continuing from previous training session #####
    # start_epoch = BASELINE_EPOCH
    start_epoch = 0
    if(args.continue_weights is not None):
        if(args.continue_epoch is None):
            print("ERROR: Need epoch number to continue from (-continue_epoch) when using continue_weights")
            return
        else:
            model.load_state_dict(torch.load(args.continue_weights))
            start_epoch = args.continue_epoch
    elif(args.continue_epoch is not None):
        print("ERROR: Need continue weights (-continue_weights) when using continue_epoch")
        return

    ##### Lr Scheduler vs static lr #####
    if(args.lr is None):
        if(args.continue_epoch is None):
            init_step = 0
        else:
            init_step = args.continue_epoch * len(train_loader)

        lr = LR_DEFAULT_START
        lr_stepper = LrStepTracker(args.d_model, SCHEDULER_WARMUP_STEPS, init_step)
    else:
        lr = args.lr

    ##### Not smoothing evaluation loss #####
    eval_loss_func = nn.CrossEntropyLoss(ignore_index=TOKEN_PAD)

    ##### SmoothCrossEntropyLoss or CrossEntropyLoss for training #####
    if(args.ce_smoothing is None):
        train_loss_func = eval_loss_func
    else:
        train_loss_func = SmoothCrossEntropyLoss(args.ce_smoothing, VOCAB_SIZE, ignore_index=TOKEN_PAD)

    ##### Optimizer #####
    opt = Adam(model.parameters(), lr=lr, betas=(ADAM_BETA_1, ADAM_BETA_2), eps=ADAM_EPSILON)

    if(args.lr is None):
        lr_scheduler = LambdaLR(opt, lr_stepper.step)
    else:
        lr_scheduler = None

    ##### Tracking best evaluation accuracy #####
    best_eval_acc        = 0.0
    best_eval_acc_epoch  = -1
    best_eval_loss       = float("inf")
    best_eval_loss_epoch = -1

    ##### Results reporting #####
    if(not os.path.isfile(results_file)):
        with open(results_file, "w", newline="") as o_stream:
            writer = csv.writer(o_stream)
            writer.writerow(CSV_HEADER)


    ##### TRAIN LOOP #####
    for epoch in range(start_epoch, args.epochs):
        # Baseline has no training and acts as a base loss and accuracy (epoch 0 in a sense)
        if(epoch > BASELINE_EPOCH):
            print(SEPERATOR)
            print("NEW EPOCH:", epoch+1)
            print(SEPERATOR)
            print("")

            # Train
            train_epoch(epoch+1, model, train_loader, train_loss_func, opt, lr_scheduler, args.print_modulus)

            print(SEPERATOR)
            print("Evaluating:")
        else:
            print(SEPERATOR)
            print("Baseline model evaluation (Epoch 0):")

        # Eval
        train_loss, train_acc = eval_model(model, train_loader, train_loss_func)
        eval_loss, eval_acc = eval_model(model, test_loader, eval_loss_func)

        # Learn rate
        lr = get_lr(opt)

        print("Epoch:", epoch+1)
        print("Avg train loss:", train_loss)
        print("Avg train acc:", train_acc)
        print("Avg eval loss:", eval_loss)
        print("Avg eval acc:", eval_acc)
        print(SEPERATOR)
        print("")

        new_best = False

        if(eval_acc > best_eval_acc):
            best_eval_acc = eval_acc
            best_eval_acc_epoch  = epoch+1
            torch.save(model.state_dict(), best_acc_file)
            new_best = True

        if(eval_loss < best_eval_loss):
            best_eval_loss       = eval_loss
            best_eval_loss_epoch = epoch+1
            torch.save(model.state_dict(), best_loss_file)
            new_best = True

        # Writing out new bests
        if(new_best):
            with open(best_text, "w") as o_stream:
                print("Best eval acc epoch:", best_eval_acc_epoch, file=o_stream)
                print("Best eval acc:", best_eval_acc, file=o_stream)
                print("")
                print("Best eval loss epoch:", best_eval_loss_epoch, file=o_stream)
                print("Best eval loss:", best_eval_loss, file=o_stream)


        if(not args.no_tensorboard):
            tensorboard_summary.add_scalar("Avg_CE_loss/train", train_loss, global_step=epoch+1)
            tensorboard_summary.add_scalar("Avg_CE_loss/eval", eval_loss, global_step=epoch+1)
            tensorboard_summary.add_scalar("Accuracy/train", train_acc, global_step=epoch+1)
            tensorboard_summary.add_scalar("Accuracy/eval", eval_acc, global_step=epoch+1)
            tensorboard_summary.add_scalar("Learn_rate/train", lr, global_step=epoch+1)
            tensorboard_summary.flush()

        if((epoch+1) % args.weight_modulus == 0):
            epoch_str = str(epoch+1).zfill(PREPEND_ZEROS_WIDTH)
            path = os.path.join(weights_folder, "epoch_" + epoch_str + ".pickle")
            torch.save(model.state_dict(), path)

        with open(results_file, "a", newline="") as o_stream:
            writer = csv.writer(o_stream)
            writer.writerow([epoch+1, lr, train_loss, train_acc, eval_loss, eval_acc])

    # Sanity check just to make sure everything is gone
    if(not args.no_tensorboard):
        tensorboard_summary.flush()

    return



In [3]:
#@title Start to Train the Model
n_workers = 1
batch_size = 1 #@param {type:"slider", min:0, max:8, step:1}
number_of_training_epochs = 150 #@param {type:"slider", min:0, max:200, step:1}
maximum_output_MIDI_sequence = 512 #@param {type:"slider", min:0, max:8192, step:128}
dim_feedforward = 256
sys.argv = ['-output_dir=rpr', '--rpr', f'-batch_size={batch_size}', f'-epochs={number_of_training_epochs}', 
            f'-max_sequence={maximum_output_MIDI_sequence}', f'-n_workers={n_workers}', f'-dim_feedforward={dim_feedforward}'] #-n_layers -num_heads -d_model -dim_feedforward

In [4]:
if __name__ == "__main__":
    main()

input_dir: ./dataset/e_piano
output_dir: ./saved_models
weight_modulus: 1
print_modulus: 1

n_workers: 1
force_cpu: False
tensorboard: True

continue_weights: None
continue_epoch: None

lr: None
ce_smoothing: None
batch_size: 1
epochs: 150

rpr: True
max_sequence: 512
n_layers: 6
num_heads: 8
d_model: 512

dim_feedforward: 256
dropout: 0.1



  from .autonotebook import tqdm as notebook_tqdm


NEW EPOCH: 1

Epoch 1  Batch 1 / 86
LR: 1.7469281074217108e-07
Train loss: 5.779476165771484

Time (s): 2.560912847518921

Epoch 1  Batch 2 / 86
LR: 3.4938562148434215e-07
Train loss: 5.813246250152588

Time (s): 0.037423133850097656

Epoch 1  Batch 3 / 86
LR: 5.240784322265132e-07
Train loss: 5.797491073608398

Time (s): 0.05300569534301758

Epoch 1  Batch 4 / 86
LR: 6.987712429686843e-07
Train loss: 5.780543804168701

Time (s): 0.018246173858642578

Epoch 1  Batch 5 / 86
LR: 8.734640537108554e-07
Train loss: 5.787901878356934

Time (s): 0.026134252548217773

Epoch 1  Batch 6 / 86
LR: 1.0481568644530265e-06
Train loss: 5.730584144592285

Time (s): 0.039263248443603516

Epoch 1  Batch 7 / 86
LR: 1.2228496751951975e-06
Train loss: 5.715783596038818

Time (s): 0.03907918930053711

Epoch 1  Batch 8 / 86
LR: 1.3975424859373686e-06
Train loss: 5.594594955444336

Time (s): 0.016358613967895508

Epoch 1  Batch 9 / 86
LR: 1.5722352966795397e-06
Train loss: 5.60410737991333

Time (s): 0.0470209

../aten/src/ATen/native/cuda/Loss.cu:250: nll_loss_forward_reduce_cuda_kernel_2d: block: [0,0,0], thread: [6,0,0] Assertion `t >= 0 && t < n_classes` failed.


RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


## Playground

In [24]:
musdb = torchaudio.datasets.MUSDB_HQ('dataset/', "train", 
                                     download=False, 
                                     sources=["mixture"], #["bass", "drums", "other", "mixture", "vocals"]
                                     split="train") 

In [25]:
data_loader = DataLoader(
    musdb,
    batch_size=1,
    shuffle=True,
    num_workers=5)

In [9]:
for batch_num, batch in enumerate(data_loader):
    print(batch_num)
    break

0


In [7]:
batch[0].shape

torch.Size([1, 1, 2, 12136088])

In [8]:
batch[0].shape

torch.Size([1, 1, 2, 12136088])

In [16]:
args = parse_train_args()
train_dataset, val_dataset, test_dataset = create_epiano_datasets(args.input_dir, args.max_sequence)

In [17]:
data_loader = DataLoader(
    train_dataset,
    batch_size=1,
    shuffle=True,
    num_workers=5)

In [18]:
for batch_num, batch in enumerate(data_loader):
    print(batch_num)
    break

0


In [19]:
batch[0].shape

torch.Size([1, 512])

In [30]:
for batch_num, batch in enumerate(data_loader):


        x   = batch[0].to(get_device())
        tgt = batch[1].to(get_device())
        break

In [21]:
x.to(torch.int64).shape

torch.Size([1, 512])

In [11]:
x

tensor([[[[ 0.0000e+00, -9.1553e-05, -9.1553e-05,  ...,  0.0000e+00,
            0.0000e+00,  0.0000e+00],
          [ 0.0000e+00, -6.1035e-05, -9.1553e-05,  ...,  0.0000e+00,
            0.0000e+00,  0.0000e+00]]]], device='cuda:0')

In [7]:
from encodec import EncodecModel
from encodec.utils import convert_audio

In [1]:
batch

NameError: name 'batch' is not defined

In [12]:
from torchaudio.functional import apply_codec

In [33]:
xx = apply_codec(x.reshape([-1,x.shape[-1]]).cpu(), sample_rate=1024, format='mp3', channels_first=True, compression=None, 
            encoding=None, bits_per_sample=None)

In [34]:
xx.shape

torch.Size([2, 1303659])

In [27]:
x.shape

torch.Size([1, 1, 2, 8903278])

In [31]:
x.reshape([-1,x.shape[-1]])[:,:512].shape
x.reshape([-1,x.shape[-1]])[:,512:1024].shape

torch.Size([2, 512])

In [29]:
tgt.shape

torch.Size([1])