In [1]:
from model import LyricsRNN
from data import LyricsDataset, padding_fn
from torch.utils.data import Dataset, DataLoader
import torch
import numpy as np
import re

device = torch.device('cpu')
checkpoint = torch.load('checkpoints/dolly2/dolly2-e00008.pt', map_location=device)
epoch = checkpoint['epoch']
all_losses = checkpoint['losses']
params = checkpoint['hyperparameters']

Data = LyricsDataset(params.input_file, vocab_file=params.vocab_file, vocab_size=params.vocab_size,
                     chunk_size=params.chunk_size, max_len=params.max_seq_len,
                     use_artist=params.use_artist)
print("%d batches per epoch" % (len(Data) / params.batch_size))
dataloader = DataLoader(Data, batch_size=params.batch_size, shuffle=True, num_workers=1, collate_fn=padding_fn,
                        drop_last=True)
ValData = LyricsDataset(re.sub('train', 'val', params.input_file), vocab_file=params.vocab_file,
                        chunk_size=params.chunk_size, use_artist=params.use_artist)
val_dataloader = DataLoader(ValData, batch_size=params.batch_size, num_workers=1, collate_fn=padding_fn, drop_last=True)

# Create model and optimizer
model = LyricsRNN(Data.vocab_len, Data.vocab_len, Data.PAD_ID, batch_size=params.batch_size, n_layers=params.n_layers,
                  hidden_size=params.hidden_size, word_embedding_size=params.word_embedding_size,
                  use_artist=params.use_artist, embed_artist=params.embed_artist, num_artists=Data.num_artists,
                  artist_embedding_size=params.artist_embedding_size
                  )
optimizer = torch.optim.Adam(model.parameters(), lr=params.learning_rate)
model.load_state_dict(checkpoint['model_state_dict'])
model.to(device)
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
model.eval()
def generate(prime_str=[Data.START], artist=None, predict_len=100, temperature=0.8):
    inp = [Data.word2id(w) for w in prime_str]

    if type(artist) == str:
        artist = Data.artists.index(artist)

    predicted = model.evaluate(inp, artist, predict_len, temperature)

    predicted_words = [Data.id2word(w) for w in predicted]
    if Data.END in predicted_words:
        predicted_words = predicted_words[:predicted_words.index(Data.END) + 1]

    return ' '.join(predicted_words)
print(len(dataloader))
val_loss = 0
loss_func = torch.nn.CrossEntropyLoss()
with torch.no_grad():
    for i, batch in enumerate(dataloader):
        inp_seqs, inp_lens, out_seqs, out_lens, inp_artists, data = batch
        if params.use_artist:
            inp, target = [inp_seqs.to(device), inp_artists.to(device)], out_seqs.to(device)
        else:
            inp, target = inp_seqs.to(device), out_seqs.to(device)
        predictions = model(inp, inp_lens)
        loss = loss_func(predictions.view(-1, predictions.size(2)), target.view(-1).long())
        val_loss += loss.item()
        #loss = loss_func(predictions, target)
        #loss = model.loss(predictions, target)
        if i % 100 == 0:
            print({},i/len(dataloader))
    val_loss /= len(dataloader)
    print('ppl: {:5.2f},'.format(np.exp(val_loss)))

Using vocab file: lyrics/input_files/top_5.vocab
chunking lyrics
798 batches per epoch
Using vocab file: lyrics/input_files/top_5.vocab
chunking lyrics
798
{} 0.0
{} 0.12531328320802004
{} 0.2506265664160401
{} 0.37593984962406013
{} 0.5012531328320802
{} 0.6265664160401002
{} 0.7518796992481203
{} 0.8771929824561403
ppl: 35.47,


In [2]:
print(param.vocab_size)

NameError: name 'vocab_size' is not defined