In [10]:
import numpy as np
from pathlib import Path
import os

import torch
from torch import nn
from torch.nn import functional as F
import torch.optim as optim
from torch.utils import data


class LyricsNGramsDataset(data.Dataset):
    file_dir = Path().cwd()
    base_dir = file_dir.parents[0]
    
#     filepath = Path(__file__).absolute()
#     base_dir = filepath.parents[1]
    data_dir = base_dir / 'data' / 'raw' / 'lmd-full_and_reddit_MIDI_dataset'
    syllable_level_dir = data_dir / 'syllable_level_npy_39'

    def __init__(self, ngram=3):
        # initialize the dataset by creating SkipGrap Data
        # 1. Load in all the files
        # 2. Fetch out the lyrics from it
        # 3. Create n-grams as required
        f_names = self.syllable_level_dir.iterdir()
        vocab = set()
        ngrams = []
        for i, f_name in enumerate(f_names):
            f_data = np.load(f_name, allow_pickle=True)
            lyrics = f_data[0][2]
#             lyrics = lyrics[:100]
            f_ngrams = self.generate_ngrams(lyrics, ngram)
            ngrams.extend(f_ngrams)
            vocab = vocab.union(lyrics)
            if i==5:
                break

        self.ngrams = ngrams
        self.vocab = vocab
        self.vocab_size = len(self.vocab)
        self.word_to_ix = {word: i for i, word in enumerate(vocab)}

        idx_ngrams = [[self.word_to_ix[w] for w in ngram] for ngram in ngrams]
        self.idx_ngrams = [[ngram[:-1], ngram[-1]] for ngram in idx_ngrams]

    def generate_ngrams(self, word_lst, n):
        # Use the zip function to help us generate n-grams
        # Return a list of tuples
        # Each tuple is (word_i-2, word_i-1, word_i)
        ngrams = zip(*[word_lst[i:] for i in range(n)])
        return [ngram for ngram in ngrams]

    def __len__(self):
        return len(self.idx_ngrams)

    def __getitem__(self, i):
        context, target = self.idx_ngrams[i]
        context = torch.tensor(context, dtype=torch.long)
        target = torch.tensor(target, dtype=torch.long)
        return context, target


class LyricsEmbeddings(nn.Module):
    def __init__(self, vocab_size, embedding_dim, context_size, hidden_dim=128):
        super(LyricsEmbeddings, self).__init__()

        # matrix to keep the embeddings
        self.embeddings = nn.Embedding(vocab_size, embedding_dim)
        self.linear1 = nn.Linear(context_size * embedding_dim, hidden_dim)
        self.linear2 = nn.Linear(hidden_dim, vocab_size)

    def forward(self, inputs):
        # print(inputs)
        # check why this view is needed!
        embeds = self.embeddings(inputs)
        embeds_shape = embeds.shape[0]
        # print(embeds)
        # print(embeds.shape)
        embeds = embeds.view((embeds_shape, -1))
        out = F.relu(self.linear1(embeds))
        out = self.linear2(out)

        log_probab = F.log_softmax(out, dim=1)
#         print(log_probab)
#         print(log_probab.shape)
        return log_probab


class LossCompute(object):
    def __init__(self):
        self.criterion = nn.NLLLoss()

    def __call__(self, x, y):
        """
        Call to compute loss
        :param x: predicted value
        :param y: actual value
        :return:
        """
        loss = self.criterion(x, y)
        return loss


def train(train_data_iterator, model, optimizer, criterion, epochs, device):
    losses = []
    for epoch in range(epochs):
        model.train()
        print("Running epoch {} / {}".format(epoch+1, epochs))
        total_loss = 0

        for num_steps, data in enumerate(train_data_iterator):
            context = data[0].to(device)
            target = data[1].to(device)

            optimizer.zero_grad()

            # print(context)
            log_probabs = model(context)

            loss = criterion(log_probabs, target)
            # print(loss)
            # print(type(loss))
            print("Before")
            print(list(model.parameters())[0].grad)
#             a = list(model.parameters())[0].clone()
            loss.backward()
            optimizer.step()
            print("After")
            print(list(model.parameters())[0].grad)
#             b = list(model.parameters())[0].clone()
#             print(torch.equal(a.data, b.data))

            total_loss += loss.item()

#             if num_steps == 1:
#                 break

        losses.append(total_loss)
    print(losses)

    return model







In [11]:
use_cuda = torch.cuda.is_available()
device = torch.device('cuda:0' if use_cuda else 'cpu')
print("Using {} device".format(device))

Using cpu device


In [12]:



# Dataloader params
data_params = {'batch_size': 1000,
               'shuffle': True,
               'num_workers': 1}

# Model params
ngrams = 3
context_size = ngrams - 1
embedding_dim = 12
hidden_dim = 12

# Training params
epochs = 10
learning_rate = 0.1



In [13]:
training_set = LyricsNGramsDataset(ngrams)
train_data_iterator = data.DataLoader(training_set, **data_params)

In [14]:
vocab_size = training_set.vocab_size
print("Vocabulary size is: {}".format(vocab_size))

Vocabulary size is: 550


In [15]:
model = LyricsEmbeddings(vocab_size, embedding_dim, context_size, hidden_dim)
model = model.to(device)

In [16]:
a = list(model.parameters())[0].clone()

In [17]:
embedding_weights = model.embeddings.weight.data
layer1_weights = model.linear1.weight.data
layer2_weights = model.linear2.weight.data

In [18]:
# embedding_weights

In [19]:
# layer1_weights

In [20]:
# layer2_weights

In [21]:
print(model)

LyricsEmbeddings(
  (embeddings): Embedding(550, 12)
  (linear1): Linear(in_features=24, out_features=12, bias=True)
  (linear2): Linear(in_features=12, out_features=550, bias=True)
)


In [22]:
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

criterion = LossCompute()

In [23]:
trained_model = train(train_data_iterator, model, optimizer, criterion, epochs, device)

Running epoch 1 / 10
Before
None
After
tensor([[-1.4551e-05, -1.4094e-05,  2.4908e-05,  ..., -2.4296e-05,
          2.2394e-07,  6.6265e-06],
        [-2.4087e-05,  3.7635e-05,  5.2059e-05,  ...,  1.4019e-05,
         -4.4286e-05,  1.4824e-05],
        [ 4.5352e-05, -3.2851e-05, -8.5730e-05,  ...,  1.7971e-05,
         -3.3788e-05, -1.0455e-05],
        ...,
        [ 2.3383e-04,  1.5947e-04, -1.9655e-04,  ..., -1.0594e-04,
          5.4424e-05, -1.8465e-04],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Before
tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])
After
tensor([[ 6.4759e-05,  1.1012e-04,  8.2552e-05,  ..., -4.6484e

Running epoch 5 / 10
Before
tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])
After
tensor([[-1.4073e-05, -1.3440e-05,  2.4701e-05,  ..., -2.2932e-05,
          5.1775e-07,  4.9435e-06],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.7379e-05, -1.5024e-04, -4.3022e-05,  ..., -1.4209e-04,
          5.3690e-05, -1.5441e-04],
        ...,
        [ 2.7789e-05, -1.2419e-04, -9.7510e-05,  ...,  6.0803e-05,
          4.3312e-05, -6.6533e-05],
        [ 6.9094e-06,  7.9246e-06,  5.6278e-06,  ..., -1.6553e-05,
          2.2085e-06,  1.6257e-07],
        [ 5.9395e-06, -5.7821e-05, -1.2058e-04,  ...,  4.5842e-05,
          2.0101e-05,  4.8666e-05]])
Before
tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0.

After
tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.6234e-05,  7.4649e-05,  1.4204e-04,  ...,  1.9026e-04,
         -2.6820e-05,  7.6538e-05],
        ...,
        [ 6.8774e-05,  6.4462e-05,  1.4102e-05,  ..., -3.3927e-07,
          3.6838e-05,  1.2415e-05],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Running epoch 9 / 10
Before
tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])
After
tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
       

In [24]:
b = list(model.parameters())[0].clone()

In [25]:
torch.eq(a,b)

tensor([[False, False, False,  ..., False, False, False],
        [False, False, False,  ..., False, False, False],
        [False, False, False,  ..., False, False, False],
        ...,
        [False, False, False,  ..., False, False, False],
        [False, False, False,  ..., False, False, False],
        [False, False, False,  ..., False, False, False]])

In [35]:
model.embeddings.weight.data

tensor([[ 0.8676, -0.2912,  0.4140,  ..., -0.4085, -0.4169,  0.8158],
        [-1.4329, -0.4226,  0.9353,  ..., -0.1001, -0.6380,  0.4034],
        [-0.0900,  0.2997, -0.0790,  ...,  0.8831,  0.1644,  0.0559],
        ...,
        [-0.3427, -0.6770, -0.0309,  ...,  1.3329,  0.8802,  3.4333],
        [ 0.3186,  0.6176, -2.1556,  ..., -0.6732,  0.1794,  0.4176],
        [-0.4421, -1.5190,  0.4582,  ...,  0.6828, -1.1463, -0.1305]])

In [287]:
trained_embeddings = trained_model.embeddings.weight.data
trained_layer1 = trained_model.linear1.weight.data
trained_layer2 = trained_model.linear2.weight.data

In [288]:
torch.all(torch.eq(embedding_weights, trained_embeddings))

tensor(True)

In [289]:
torch.all(torch.eq(trained_layer1, layer1_weights))

tensor(True)

In [290]:
torch.all(torch.eq(trained_layer2, layer2_weights))

tensor(True)

In [38]:
if embedding_weights == trained_weights:
    print("True")

RuntimeError: bool value of Tensor with more than one value is ambiguous

In [None]:
if __name__ == '__main__':

    main()
