In [None]:
import numpy as np
import matplotlib.pyplot as plt
import os
from tqdm import tqdm
import pickle
import math
import random
from numba import cuda
import copy

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

from nltk.translate.bleu_score import sentence_bleu, corpus_bleu, SmoothingFunction
from torchmetrics.text.bert import BERTScore
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from torchmetrics.text.rouge import ROUGEScore
from nltk.sentiment.vader import SentimentIntensityAnalyzer

## Preprocessing, load tokenizers, set seed

In [None]:
device = cuda.get_current_device()
device.reset()

print(torch.cuda.is_available())
torch.cuda.empty_cache()

seed = 42
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
# torch.backends.cudnn.benchmark = True
# torch.backends.cudnn.deterministic = True
torch.backends.cudnn.enabled = False
torch.manual_seed(seed)
tf.random.set_seed(seed)
np.random.seed(seed)
random.seed(seed)

In [None]:
data_matrix = np.load("data/sequences.npy")
print(data_matrix.shape)

lyrics = data_matrix[:, :, 0]
notes = data_matrix[:, :, 1]
durations = data_matrix[:, :, 2]
rests = data_matrix[:, :, 3]

print(lyrics.shape, notes.shape, durations.shape, rests.shape)

In [None]:
def load_tokenizer(file):
    with open(file, 'rb') as f:
        data = pickle.load(f)
        tokenizer = data['tokenizer']
    return tokenizer

In [None]:
tokenizer_lyr = load_tokenizer("tokenizers/tokenizer_lyr.pkl")
tokenizer_note = load_tokenizer("tokenizers/tokenizer_note.pkl")
tokenizer_duration = load_tokenizer("tokenizers/tokenizer_duration.pkl")
tokenizer_rest = load_tokenizer("tokenizers/tokenizer_rest.pkl")

In [None]:
num_songs = len(lyrics)
num_samples = 6848
train_inds = np.random.choice(np.arange(num_songs), size=num_samples, replace=False)
test_inds = np.delete(np.arange(num_songs), train_inds)

train_lyrics = [lyrics[i] for i in train_inds]
test_lyrics = [lyrics[i] for i in test_inds]

train_notes = [notes[i] for i in train_inds]
test_notes = [notes[i] for i in test_inds]

train_durations = [durations[i] for i in train_inds]
test_durations = [durations[i] for i in test_inds]

train_rests = [rests[i] for i in train_inds]
test_rests = [rests[i] for i in test_inds]

# General params
vocab_size = min(len(tokenizer_lyr.word_index) + 1, 10000)
notes_size = len(tokenizer_note.word_index) + 1
durations_size = len(tokenizer_duration.word_index) + 1
rests_size = len(tokenizer_rest.word_index) + 1

pad_id_lyr = tokenizer_lyr.word_index["eos"]
start_id_lyr = tokenizer_lyr.word_index["bos"]

pad_id_note = tokenizer_note.word_index["eos"]
start_id_note = tokenizer_note.word_index["bos"]

pad_id_duration = tokenizer_duration.word_index["eos"]
start_id_duration = tokenizer_duration.word_index["bos"]

pad_id_rest = tokenizer_rest.word_index["eos"]
start_id_rest = tokenizer_rest.word_index["bos"]

max_seq_len = lyrics.shape[1]
batch_size = 32

chencherry = SmoothingFunction()

# Generator params
g_dropout = 0.3
g_embed_dim = 32
g_hidden = 32
pretrain_epochs_gen = 120
g_lr_pretrain = 0.01
g_lr_adv = 1e-4

# Discriminator params
d_embed_dim = 64
d_filter_sizes = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20]
d_num_filters = [100, 200, 200, 200, 200, 100, 100, 100, 100, 100, 160, 160]
d_dropout_prob = 0.2
d_lr = 1e-4


rollout_num = 16
clip_norm = 5.0

adversarial_epochs = 200

## Model initialization and training

In [None]:
def truncated_normal_(tensor, mean=0, std=1):
    """
    Implemented by @ruotianluo
    See https://discuss.pytorch.org/t/implementing-truncated-normal-initializer/4778/15
    """
    size = tensor.shape
    tmp = tensor.new_empty(size + (4,)).normal_()
    valid = (tmp < 2) & (tmp > -2)
    ind = valid.max(-1, keepdim=True)[1]
    tensor.data.copy_(tmp.gather(-1, ind).squeeze(-1))
    tensor.data.mul_(std).add_(mean)
    return tensor

In [None]:
class LSTMGenerator(nn.Module):

    def __init__(self, embedding_dim, hidden_dim, vocab_size, note_size,
                 duration_size, rest_size, max_seq_len, pad_id_lyr,
                 pad_id_note, pad_id_dur, pad_id_rest, gpu=False):
        super(LSTMGenerator, self).__init__()
        self.name = 'vanilla'

        self.hidden_dim = hidden_dim
        self.embedding_dim = embedding_dim
        self.max_seq_len = max_seq_len
        self.vocab_size = vocab_size
        self.note_size = vocab_size
        self.duration_size = vocab_size
        self.rest_size = vocab_size
        self.pad_id_lyr = pad_id_lyr
        self.pad_id_note = pad_id_note
        self.pad_id_dur = pad_id_dur
        self.pad_id_rest = pad_id_rest
        self.gpu = gpu

        self.embeddings_lyr = nn.Embedding(vocab_size, embedding_dim, padding_idx=pad_id_lyr)
        self.embeddings_note = nn.Embedding(note_size, embedding_dim, padding_idx=pad_id_note)
        self.embeddings_dur = nn.Embedding(duration_size, embedding_dim, padding_idx=pad_id_dur)
        self.embeddings_rest = nn.Embedding(rest_size, embedding_dim, padding_idx=pad_id_rest)
        self.lstm = nn.LSTM(embedding_dim*4, hidden_dim, batch_first=True)
        self.lstm2out = nn.Linear(hidden_dim, vocab_size)
        self.softmax = nn.LogSoftmax(dim=-1)

        self.init_params()

    def forward(self, inp, mel, hidden, need_hidden=False):
        """
        Embeds input and applies LSTM
        :param inp: batch_size * seq_len
        :param hidden: (h, c)
        :param need_hidden: if return hidden, use for sampling
        """
        if len(mel.shape) == 2:
            notes = mel[:, 0]
            durations = mel[:, 1]
            rests = mel[:, 2]
        else:
            notes = mel[:, :, 0]
            durations = mel[:, :, 1]
            rests = mel[:, :, 2]

        emb_lyr = self.embeddings_lyr(inp)  # batch_size * len * embedding_dim
        emb_note = self.embeddings_note(notes)
        emb_dur = self.embeddings_dur(durations)
        emb_rest = self.embeddings_rest(rests)
        
        if len(inp.size()) == 1:
            emb_lyr = emb_lyr.unsqueeze(1)  # batch_size * 1 * embedding_dim
            emb_note = emb_note.unsqueeze(1)
            emb_dur = emb_dur.unsqueeze(1)
            emb_rest = emb_rest.unsqueeze(1)
            
        emb = torch.cat([emb_lyr, emb_note, emb_dur, emb_rest], dim=2)

        out, hidden = self.lstm(emb, hidden)  # out: batch_size * seq_len * hidden_dim
        out = out.contiguous().view(-1, self.hidden_dim)  # out: (batch_size * len) * hidden_dim
        out = self.lstm2out(out)  # (batch_size * seq_len) * vocab_size
        pred = self.softmax(out)

        if need_hidden:
            return pred, hidden
        else:
            return pred

    def sample(self, melody, num_samples, batch_size, start_letter=1):
        """
        Samples the network and returns num_samples samples of length max_seq_len.
        :return samples: num_samples * max_seq_length (a sampled sequence in each row)
        """
        num_batch = num_samples // batch_size if num_samples != batch_size else 1
        samples = torch.zeros(num_batch * batch_size, self.max_seq_len).long()

        # Generate sentences with multinomial sampling strategy
        for b in range(num_batch):
            hidden = self.init_hidden(batch_size)
            inp = torch.LongTensor([start_letter] * batch_size)
            mel_batch = melody[b * batch_size:(b+1) * batch_size, :]
            if self.gpu:
                inp = inp.cuda()
                mel_batch = mel_batch.cuda()

            for i in range(self.max_seq_len):
                y = mel_batch[:, i, :]
                out, hidden = self.forward(inp, y, hidden, need_hidden=True)  # out: batch_size * vocab_size
                next_token = torch.multinomial(torch.exp(out), 1)  # batch_size * 1 (sampling from each row)
                samples[b * batch_size:(b + 1) * batch_size, i] = next_token.view(-1)
                inp = next_token.view(-1)
        samples = samples[:num_samples]

        return samples

    def init_params(self):
        for param in self.parameters():
            if param.requires_grad and len(param.shape) > 0:
                stddev = 1 / math.sqrt(param.shape[0])
                truncated_normal_(param, std=stddev)

    def init_hidden(self, batch_size=32):
        h = torch.zeros(1, batch_size, self.hidden_dim)
        c = torch.zeros(1, batch_size, self.hidden_dim)

        if self.gpu:
            return h.cuda(), c.cuda()
        else:
            return h, c

In [None]:
class SeqGAN_G(LSTMGenerator):
    def __init__(self, embedding_dim, hidden_dim, vocab_size, note_size,
                 duration_size, rest_size, max_seq_len, pad_id_lyr,
                 pad_id_note, pad_id_dur, pad_id_rest, gpu=False):
        super(SeqGAN_G, self).__init__(embedding_dim, hidden_dim, vocab_size, note_size,
                                       duration_size, rest_size, max_seq_len, pad_id_lyr,
                                       pad_id_note, pad_id_dur, pad_id_rest, gpu)
        self.name = 'seqgan'

    def batchPGLoss(self, inp, mel, target, reward):
        """
        Returns a policy gradient loss
        :param inp: batch_size x seq_len, inp should be target with <s> (start letter) prepended
        :param target: batch_size x seq_len
        :param reward: batch_size (discriminator reward for each sentence, applied to each token of the corresponding sentence)
        :return loss: policy loss
        """

        batch_size, seq_len = inp.size()
        hidden = self.init_hidden(batch_size)

        out = self.forward(inp, mel, hidden).view(batch_size, self.max_seq_len, self.vocab_size)
        target_onehot = F.one_hot(target, self.vocab_size).float()  # batch_size * seq_len * vocab_size
        pred = torch.sum(out * target_onehot, dim=-1)  # batch_size * seq_len
        loss = -torch.sum(pred * reward)

        return loss

In [None]:
class CNNDiscriminator(nn.Module):
    def __init__(self, embed_dim, vocab_size, filter_sizes, num_filters, padding_idx, gpu=False,
                 dropout=0.2):
        super(CNNDiscriminator, self).__init__()
        self.embedding_dim = embed_dim
        self.vocab_size = vocab_size
        self.padding_idx = padding_idx
        self.feature_dim = sum(num_filters)
        self.gpu = gpu

        self.embeddings = nn.Embedding(vocab_size, embed_dim, padding_idx=padding_idx)
        self.convs = nn.ModuleList([
            nn.Conv2d(1, n, (f, embed_dim)) for (n, f) in zip(num_filters, filter_sizes)
        ])
        self.highway = nn.Linear(self.feature_dim, self.feature_dim)
        self.feature2out = nn.Linear(self.feature_dim, 2)
        self.dropout = nn.Dropout(dropout)

        self.init_params()

    def forward(self, inp):
        """
        Get final predictions of discriminator
        :param inp: batch_size * seq_len
        :return: pred: batch_size * 2
        """
        feature = self.get_feature(inp)
        pred = self.feature2out(self.dropout(feature))

        return pred

    def get_feature(self, inp):
        """
        Get feature vector of given sentences
        :param inp: batch_size * max_seq_len
        :return: batch_size * feature_dim
        """
        emb = self.embeddings(inp).unsqueeze(1)  # batch_size * 1 * max_seq_len * embed_dim
        convs = [F.relu(conv(emb)).squeeze(3) for conv in self.convs]  # [batch_size * num_filter * length]
        pools = [F.max_pool1d(conv, conv.size(2)).squeeze(2) for conv in convs]  # [batch_size * num_filter]
        pred = torch.cat(pools, 1)  # tensor: batch_size * feature_dim
        highway = self.highway(pred)
        pred = torch.sigmoid(highway) * F.relu(highway) + (1. - torch.sigmoid(highway)) * pred  # highway

        return pred

    def init_params(self):
        for param in self.parameters():
            if param.requires_grad and len(param.shape) > 0:
                stddev = 1 / math.sqrt(param.shape[0])
                truncated_normal_(param, std=stddev)

In [None]:
class SeqGAN_D(CNNDiscriminator):
    def __init__(self, embed_dim, vocab_size, padding_idx, dis_filter_sizes,
                 dis_num_filters, gpu=False, dropout=0.25):
        super(SeqGAN_D, self).__init__(embed_dim, vocab_size, dis_filter_sizes,
                                       dis_num_filters, padding_idx, gpu, dropout)

In [None]:
class GANDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __getitem__(self, index):
        return self.data[index]

    def __len__(self):
        return len(self.data)

In [None]:
class GenDataIter:
    def __init__(self, samples, melodies, batch_size, max_seq_len, start_letter=1, shuffle=True):
        self.batch_size = batch_size
        self.max_seq_len = max_seq_len
        self.start_letter = start_letter

        self.loader = DataLoader(
            dataset=GANDataset(self.__read_data__(samples, melodies)),
            batch_size=self.batch_size,
            shuffle=shuffle,
            drop_last=True)

        self.input = self._all_data_('lyric')
        self.melody = self._all_data_('melody')
        self.target = self._all_data_('target')

    def __read_data__(self, samples, melodies):
        """
        input: same as target, but start with start_letter.
        """
        # global all_data
        inp, target = self.prepare(samples, melodies, self.start_letter)
        lyr, mel = inp
        all_data = [{'lyric': l, 'melody': m, 'target': t} for (l, m, t) in zip(lyr, mel, target)]
        return all_data

    def random_batch(self):
        """Randomly choose a batch from loader, please note that the data should not be shuffled."""
        idx = random.randint(0, len(self.loader) - 1)
        return list(self.loader)[idx]

    def _all_data_(self, col):
        return torch.cat([data[col].unsqueeze(0) for data in self.loader.dataset.data], 0)

    @staticmethod
    def prepare(samples, melodies, start_id, gpu=False):
        """Add start_letter to samples as inp, target same as samples"""
        inp = torch.zeros(samples.size()).long()
        target = samples
        inp[:, 0] = start_id
        inp[:, 1:] = target[:, :max_seq_len - 1]

        if gpu:
            return (inp.cuda(), melodies.cuda()), target.cuda()
        return (inp, melodies), target

In [None]:
def optimize(opt, loss, model=None, retain_graph=False):
    opt.zero_grad()
    loss.backward(retain_graph=retain_graph)
    if model is not None:
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip_norm)
    opt.step()

In [None]:
def train_gen_epoch(model, data_loader, criterion, optimizer, cuda=True):
    total_loss = 0
    for data in data_loader:
        lyr, mel, target = data['lyric'], data['melody'], data['target']
        if cuda:
            lyr, mel, target = lyr.cuda(), mel.cuda(), target.cuda()

        hidden = model.init_hidden(data_loader.batch_size)
        pred = model.forward(lyr, mel, hidden)
        loss = criterion(pred, target.view(-1))
        optimize(optimizer, loss, model)
        total_loss += loss.item()
    return total_loss / len(data_loader)

## Pretraining

In [None]:
def pretrain_generator(gen, gen_opt, data_loader, criterion, epochs):
    """
    Max Likelihood Pre-training for the generator
    """
    losses = []
    progress = tqdm(range(epochs))
    for epoch in progress:
        pre_loss = train_gen_epoch(gen, data_loader, criterion, gen_opt)
        losses.append(pre_loss)
        progress.set_description("EPOCH: {}, LOSS: {}".format(epoch, pre_loss))
    return losses

In [None]:
sequences_lyrics_train = torch.LongTensor(train_lyrics)
sequences_notes_train = torch.LongTensor(train_notes)
sequences_durations_train = torch.LongTensor(train_durations)
sequences_rests_train = torch.LongTensor(train_rests)

sequences_notes_train = sequences_notes_train.unsqueeze(2)
sequences_durations_train = sequences_durations_train.unsqueeze(2)
sequences_rests_train = sequences_rests_train.unsqueeze(2)

train_melodies = torch.cat([sequences_notes_train, sequences_durations_train, sequences_rests_train], dim=2)

gen_data_iter = GenDataIter(sequences_lyrics_train, train_melodies, batch_size, max_seq_len, start_letter=start_id_lyr)

train_melodies.shape, sequences_lyrics_train.shape

In [None]:
gen = SeqGAN_G(g_embed_dim, g_hidden, vocab_size, notes_size, durations_size, rests_size,
               max_seq_len, pad_id_lyr, pad_id_note, pad_id_duration, pad_id_rest, True).cuda()

gen_opt = optim.Adam(gen.parameters(), lr=g_lr_pretrain)
gen_adv_opt = optim.Adam(gen.parameters(), lr=g_lr_adv)
mle_criterion = nn.NLLLoss().cuda()

In [None]:
losses_pretrain = pretrain_generator(gen, gen_opt, gen_data_iter.loader, mle_criterion, pretrain_epochs_gen)

In [None]:
plt.plot(losses_pretrain)
plt.xlabel("EPOCH")
plt.ylabel("LOSS")
plt.title("Pretraining Losses Generator")
plt.savefig("Pretraining Losses SeqGenerator.png")
plt.show()

## Adversarial Training

In [None]:
class ROLLOUT:
    def __init__(self, gen, gpu=True):
        self.gen = gen
        self.old_model = copy.deepcopy(gen)
        self.max_seq_len = gen.max_seq_len
        self.vocab_size = gen.vocab_size
        self.gpu = gpu

    def rollout_mc_search(self, sentences, melodies, given_num):
        """
        fill up remain tokens with MC search
        :param sentences: size of batch_size * max_seq_len
        :param given_num:
        :return:
        """
        batch_size = sentences.size(0)

        # get current state
        hidden = self.gen.init_hidden(batch_size)
        # for i in range(given_num):
        inp = sentences[:, :given_num]
        mel = melodies[:, :given_num, :]
        out, hidden = self.gen.forward(inp, mel, hidden, need_hidden=True)
        out = out.view(batch_size, -1, self.vocab_size)[:, -1]

        samples = torch.zeros(batch_size, self.max_seq_len).long()
        samples[:, :given_num] = sentences[:, :given_num]

        if self.gpu:
            samples = samples.cuda()

        # MC search
        for i in range(given_num, self.max_seq_len):
            out = torch.multinomial(torch.exp(out), 1)
            samples[:, i] = out.view(-1).data
            inp = out.view(-1)
            mel = melodies[:, i, :]
            mel = torch.squeeze(mel, dim=1)

            out, hidden = self.gen.forward(inp, mel, hidden, need_hidden=True)

        return samples

    def get_reward(self, sentences, melodies, rollout_num, dis, current_k=0):
        """
        get reward via Monte Carlo search
        :param sentences: size of batch_size * max_seq_len
        :param rollout_num:
        :param dis:
        :param current_k: current training gen
        :return: reward: [batch_size]
        """
        with torch.no_grad():
            batch_size = sentences.size(0)
            rewards = torch.zeros([rollout_num * self.max_seq_len, batch_size]).float()
            if self.gpu:
                rewards = rewards.cuda()
            idx = 0
            for i in range(rollout_num):
                for given_num in range(1, self.max_seq_len + 1):
                    samples = self.rollout_mc_search(sentences, melodies, given_num)
                    out = dis.forward(samples)
                    out = F.softmax(out, dim=-1)
                    reward = out[:, current_k + 1]
                    rewards[idx] = reward
                    idx += 1

        # rewards = torch.mean(rewards, dim=0)
        rewards = torch.mean(rewards.view(batch_size, self.max_seq_len, rollout_num), dim=-1)
        return rewards

In [None]:
def remove_start_and_end(lyrics):
    clean_lyrics = []
    for lyric in lyrics:
        removal_list = ["BOS","EOS", "eos", "bos"]
        lyric_list = lyric.split()
        final_list = [word for word in lyric_list if word not in removal_list]
        final_string = ' '.join(final_list)
        clean_lyrics.append(final_string)

    return clean_lyrics

In [None]:
class DisDataIter:
    def __init__(self, pos_samples, neg_samples):
        self.batch_size = 32
        self.max_seq_len = 20
        self.start_letter = 0

        self.loader = DataLoader(
            dataset=GANDataset(self.__read_data__(pos_samples, neg_samples)),
            batch_size=self.batch_size,
            shuffle=True,
            drop_last=True)

    def __read_data__(self, pos_samples, neg_samples):
        """
        input: same as target, but start with start_letter.
        """
        inp, target = self.prepare(pos_samples, neg_samples)
        all_data = [{'input': i, 'target': t} for (i, t) in zip(inp, target)]
        return all_data

    def random_batch(self):
        idx = random.randint(0, len(self.loader) - 1)
        return list(self.loader)[idx]

    def prepare(self, pos_samples, neg_samples):
        """Build inp and target"""
        inp = torch.cat((pos_samples, neg_samples), dim=0).long().detach()  # !!!need .detach()
        target = torch.ones(inp.size(0)).long()
        target[pos_samples.size(0):] = 0

        # shuffle
        perm = torch.randperm(inp.size(0))
        inp = inp[perm].cuda()
        target = target[perm].cuda()

        return inp, target

In [None]:
dis = SeqGAN_D(d_embed_dim, vocab_size, pad_id_lyr, d_filter_sizes, d_num_filters, gpu=True).cuda()
dis_opt = optim.Adam(dis.parameters(), lr=d_lr)
dis_criterion = nn.CrossEntropyLoss().cuda()

In [None]:
def adv_train_generator(gen, dis, melody, batch_size, rollout_num, gen_adv_opt, start_letter, g_step, cuda=True):
    """
    The gen is trained using policy gradients, using the reward from the discriminator.
    Training is done for num_batches batches.
    """
    rollout_func = ROLLOUT(gen, True)
    total_g_loss = 0
    for step in range(g_step):
        (inp, mel), target = GenDataIter.prepare(gen.sample(melody, batch_size, batch_size, start_letter),
                                                 melody, start_letter, gpu=True)
        if cuda:
            inp = inp.cuda()
            mel = mel.cuda()
            target = target.cuda()

        # ===Train===
        rewards = rollout_func.get_reward(target, mel, rollout_num, dis)
        adv_loss = gen.batchPGLoss(inp, mel, target, rewards)
        optimize(gen_adv_opt, adv_loss)
        total_g_loss += adv_loss.item()
    return total_g_loss

In [None]:
def train_dis_epoch(model, data_loader, criterion, optimizer, CUDA=True):
    total_loss = 0
    total_acc = 0
    total_num = 0
    for i, data in enumerate(data_loader):
        inp, target = data['input'], data['target']
        if CUDA:
            inp, target = inp.cuda(), target.cuda()

        pred = model.forward(inp)
        loss = criterion(pred, target)
        optimize(optimizer, loss, model)

        total_loss += loss.item()
        total_acc += torch.sum((pred.argmax(dim=-1) == target)).item()
        total_num += inp.size(0)

    total_loss /= len(data_loader)
    total_acc /= total_num
    return total_loss, total_acc

In [None]:
def train_discriminator(gen, dis, batch_size, train_data,
                        dis_criterion, dis_opt, d_step, d_epoch):
    """
    Training the discriminator on real_data_samples (positive) and generated samples from gen (negative).
    Samples are drawn d_step times, and the discriminator is trained for d_epoch d_epoch.
    """
    # prepare loader for validate
    global d_loss, train_acc
    losses, accs = [], []
    for step in range(d_step):
        # prepare loader for training
        targets = train_data.target
        inds = np.arange(targets.shape[0])
        rand_inds = np.random.choice(inds, 8 * batch_size)
        pos_samples = gen_data_iter.target[rand_inds]
        neg_samples = gen.sample(gen_data_iter.melody, 8 * batch_size, batch_size)
        dis_data = DisDataIter(pos_samples, neg_samples)
        d_losses, train_accs = [], []
        for epoch in range(d_epoch):
            # ===Train===
            d_loss, train_acc = train_dis_epoch(dis, dis_data.loader, dis_criterion, dis_opt)
            d_losses.append(d_loss)
            train_accs.append(train_acc)

        losses.append(np.mean(d_losses))
        accs.append(np.mean(train_accs))
    return losses, accs

In [None]:
def adversarial_train(gen, dis, gen_adv_opt, dis_adv_opt, train_data, batch_size,
                      dis_criterion, rollout_num, epochs, start_letter=1):
    progress = tqdm(range(epochs))
    g_losses, d_losses, d_accs = [], [], []
    for adv_epoch in progress:
        rand_batch = train_data.random_batch()
        lyric = rand_batch["lyric"]
        melody = rand_batch["melody"]
        target = rand_batch["target"]
        g_loss = adv_train_generator(gen, dis, melody, batch_size, rollout_num, gen_adv_opt, start_letter, 2)
        d_loss, accs = train_discriminator(gen, dis, batch_size, gen_data_iter, dis_criterion, dis_adv_opt, 1, 1)
        g_losses.append(g_loss)
        d_losses.append(np.mean(d_loss))
        d_accs.append(np.mean(accs))
        progress.set_description('g_loss: %.4f, d_loss: %.4f, g_acc: %.4f' % (g_loss, np.mean(d_loss), np.mean(accs)))

    return g_losses, d_losses, d_accs

In [None]:
g_losses, d_losses, d_accs = adversarial_train(gen, dis, gen_adv_opt, dis_opt,
                                               gen_data_iter, batch_size, dis_criterion,
                                               rollout_num, adversarial_epochs, start_id_lyr)

In [None]:
plt.plot(g_losses)
plt.xlabel("EPOCH")
plt.ylabel("LOSS")
plt.title("Advesarial Losses Generator")
plt.savefig("Advesarial Losses SeqGenerator.png")
plt.show()

In [None]:
plt.plot(d_losses)
plt.xlabel("EPOCH")
plt.ylabel("LOSS")
plt.title("Advesarial Losses SeqDiscriminator")
plt.savefig("Advesarial Losses SeqDiscriminator.png")
plt.show()

In [None]:
plt.plot(d_accs)
plt.xlabel("EPOCH")
plt.ylabel("LOSS")
plt.title("Advesarial Accuracies SeqDiscriminator")
plt.savefig("Advesarial Accuracies SeqDiscriminator.png")
plt.show()

## Evaluation

In [None]:
test_is = np.random.choice(test_inds, 80)

test_nos = np.expand_dims(np.array([notes[i] for i in test_is]), axis=2)
test_dus = np.expand_dims(np.array([durations[i] for i in test_is]), axis=2)
test_res = np.expand_dims(np.array([rests[i] for i in test_is]), axis=2)
test_lyr = [lyrics[i] for i in test_is]

test_mels = np.concatenate([test_nos, test_dus, test_res], axis=2)
test_mels = torch.LongTensor(test_mels)
test_mels.shape

In [None]:
samples = gen.sample(test_mels, 80, 16, start_id_lyr)
preds = tokenizer_lyr.sequences_to_texts(samples.numpy())
orig = tokenizer_lyr.sequences_to_texts(test_lyr)

preds = remove_start_and_end(preds)
orig = remove_start_and_end(orig)

## BLEU Scores

In [None]:
bleus_4, bleus_3, bleus_2 = [], [], []
for test_ref, test_pred in tqdm(zip(orig, preds)):
    bleu4 = sentence_bleu(test_ref, test_pred, smoothing_function=chencherry.method7)
    bleu3 = sentence_bleu(test_ref, test_pred, weights=[1/3, 1/3, 1/3], smoothing_function=chencherry.method7)
    bleu2 = sentence_bleu(test_ref, test_pred, weights=[1/2, 1/2], smoothing_function=chencherry.method7)
    bleus_4.append(bleu4)
    bleus_3.append(bleu3)
    bleus_2.append(bleu2)

np.mean(bleus_2), np.mean(bleus_3), np.mean(bleus_4)

## ROUGE Scores

In [None]:
rouge = ROUGEScore()
r_f_measure_1, r_precision_1, r_recall_1 = [], [], []
r_f_measure_2, r_precision_2, r_recall_2 = [], [], []
r_f_measure_l, r_precision_l, r_recall_l = [], [], []
for test_ref, test_pred in tqdm(zip(orig, preds)):
    rouge_dict = rouge(test_pred, test_ref)
    rouge1_fmeasure = rouge_dict["rouge1_fmeasure"]
    rouge1_precision = rouge_dict["rouge1_precision"]
    rouge1_recall = rouge_dict["rouge1_recall"]
    rouge2_fmeasure = rouge_dict["rouge2_fmeasure"]
    rouge2_precision = rouge_dict["rouge2_precision"]
    rouge2_recall = rouge_dict["rouge2_recall"]
    rougeL_fmeasure = rouge_dict["rougeL_fmeasure"]
    rougeL_precision = rouge_dict["rougeL_precision"]
    rougeL_recall = rouge_dict["rougeL_recall"]
    
    r_f_measure_1.append(rouge1_fmeasure)
    r_precision_1.append(rouge1_precision)
    r_recall_1.append(rouge1_recall)
    r_f_measure_2.append(rouge2_fmeasure)
    r_precision_2.append(rouge2_precision)
    r_recall_2.append(rouge2_recall)
    r_f_measure_l.append(rougeL_fmeasure)
    r_precision_l.append(rougeL_precision)
    r_recall_l.append(rougeL_recall)
    
print(np.mean(r_f_measure_1), np.mean(r_precision_1), np.mean(r_recall_1))
print(np.mean(r_f_measure_2), np.mean(r_precision_2), np.mean(r_recall_2))
print(np.mean(r_f_measure_l), np.mean(r_precision_l), np.mean(r_recall_l))

## BERT Scores

In [None]:
bertscore = BERTScore()
scores = bertscore(orig, preds)
np.mean(scores["precision"]), np.mean(scores["recall"]), np.mean(scores["f1"])

## Sentiment Analysis

In [None]:
sid = SentimentIntensityAnalyzer()
pos_ratios, neu_ratios, neg_ratios = [], [], []
for o, p in zip(orig, preds):
    ss_orig = sid.polarity_scores(o)
    ss_pred = sid.polarity_scores(p)
    
    ori_neg = ss_orig["neg"]
    ori_neu = ss_orig["neu"]
    ori_pos = ss_orig["pos"]
    
    pred_neg = ss_pred["neg"]
    pred_neu = ss_pred["neu"]
    pred_pos = ss_pred["pos"]
    
    if ori_neg > pred_neg:
        neg_ratios.append(pred_neg/ori_neg)
    elif ori_neg == 0 and pred_neg == 0:
        neg_ratios.append(1)
    else:
        neg_ratios.append(ori_neg/pred_neg)
        

    if ori_neu > pred_neu:
        neu_ratios.append(pred_neu/ori_neu)
    elif ori_neu == 0 and pred_neu == 0:
        neu_ratios.append(1)
    else:
        neu_ratios.append(ori_neu/pred_neu)

    if ori_pos > pred_pos:
        pos_ratios.append(pred_pos/ori_pos)
    elif ori_pos == 0 and ori_pos == 0:
        pos_ratios.append(1)
    else:
        pos_ratios.append(ori_pos/pred_pos)

np.mean(pos_ratios), np.mean(neu_ratios), np.mean(neg_ratios)