In [1]:
def evaluate(model = None, val_dataloader = None):
    val_loss = 0
    with torch.no_grad():
        for i, batch in enumerate(val_dataloader):
            inp_seqs, inp_lens, out_seqs, out_lens, inp_artists, data = batch
            if params.use_artist:
                inp, target = [inp_seqs.to(device), inp_artists.to(device)], out_seqs.to(device)
            else:
                inp, target = inp_seqs.to(device), out_seqs.to(device)
            predictions = model(inp, inp_lens)
            loss = model.loss(predictions, target)
            #loss = loss_func(predictions.view(-1, predictions.size(2)), target.view(-1).long())
            val_loss += loss.item()
            #loss = loss_func(predictions, target)
            #loss = model.loss(predictions, target)
            if i % 100 == 0:
                print({},i/len(val_dataloader))
        val_loss /= len(val_dataloader)
        print('ppl: {:5.2f},'.format(np.exp(val_loss)))

In [2]:
from model import LyricsRNN
from data import LyricsDataset, padding_fn
from torch.utils.data import Dataset, DataLoader
import torch
import numpy as np
import re
device = torch.device('cpu')

In [3]:
checkpoint = torch.load('checkpoints/top5_onehot/top5_onehot-e00001.pt', map_location=device)
epoch = checkpoint['epoch']
all_losses = checkpoint['losses']
params = checkpoint['hyperparameters']
Data = LyricsDataset(params.input_file, vocab_file=params.vocab_file, vocab_size=params.vocab_size,
                     chunk_size=params.chunk_size, max_len=params.max_seq_len,
                     use_artist=params.use_artist)
ValData = LyricsDataset(re.sub('train', 'val', params.input_file), vocab_file=params.vocab_file,
                        chunk_size=params.chunk_size, use_artist=params.use_artist)
val_dataloader = DataLoader(ValData, batch_size=params.batch_size, num_workers=1, collate_fn=padding_fn, drop_last=True)

# Create model and optimizer
model = LyricsRNN(ValData.vocab_len, ValData.vocab_len, ValData.PAD_ID, batch_size=params.batch_size, n_layers=params.n_layers,
                  hidden_size=params.hidden_size, word_embedding_size=params.word_embedding_size,
                  use_artist=params.use_artist, embed_artist=params.embed_artist, num_artists=Data.num_artists,
                  artist_embedding_size=params.artist_embedding_size
                  )
optimizer = torch.optim.Adam(model.parameters(), lr=params.learning_rate)
model.load_state_dict(checkpoint['model_state_dict'])
model.to(device)
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
model.eval()
print(len(val_dataloader))
evaluate(model, val_dataloader)

Using vocab file: lyrics/top_5.vocab
chunking lyrics
Using vocab file: lyrics/top_5.vocab
chunking lyrics
484
{} 0.0
{} 0.2066115702479339
{} 0.4132231404958678
{} 0.6198347107438017
{} 0.8264462809917356
ppl: 269.58,


In [4]:
checkpoint = torch.load('checkpoints/top5_onehot/top5_onehot-e00002.pt', map_location=device)
epoch = checkpoint['epoch']
all_losses = checkpoint['losses']
params = checkpoint['hyperparameters']
ValData = LyricsDataset(re.sub('train', 'val', params.input_file), vocab_file=params.vocab_file,
                        chunk_size=params.chunk_size, use_artist=params.use_artist)
val_dataloader = DataLoader(ValData, batch_size=params.batch_size, num_workers=1, collate_fn=padding_fn, drop_last=True)

# Create model and optimizer
model = LyricsRNN(ValData.vocab_len, ValData.vocab_len, ValData.PAD_ID, batch_size=params.batch_size, n_layers=params.n_layers,
                  hidden_size=params.hidden_size, word_embedding_size=params.word_embedding_size,
                  use_artist=params.use_artist, embed_artist=params.embed_artist, num_artists=Data.num_artists,
                  artist_embedding_size=params.artist_embedding_size
                  )
optimizer = torch.optim.Adam(model.parameters(), lr=params.learning_rate)
model.load_state_dict(checkpoint['model_state_dict'])
model.to(device)
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
model.eval()
print(len(val_dataloader))
evaluate(model, val_dataloader)

Using vocab file: lyrics/top_5.vocab
chunking lyrics
484
{} 0.0
{} 0.2066115702479339
{} 0.4132231404958678
{} 0.6198347107438017
{} 0.8264462809917356
ppl: 504.09,


In [5]:
checkpoint = torch.load('checkpoints/top5_onehot/top5_onehot-e00003.pt', map_location=device)
epoch = checkpoint['epoch']
all_losses = checkpoint['losses']
params = checkpoint['hyperparameters']
ValData = LyricsDataset(re.sub('train', 'val', params.input_file), vocab_file=params.vocab_file,
                        chunk_size=params.chunk_size, use_artist=params.use_artist)
val_dataloader = DataLoader(ValData, batch_size=params.batch_size, num_workers=1, collate_fn=padding_fn, drop_last=True)

# Create model and optimizer
model = LyricsRNN(ValData.vocab_len, ValData.vocab_len, ValData.PAD_ID, batch_size=params.batch_size, n_layers=params.n_layers,
                  hidden_size=params.hidden_size, word_embedding_size=params.word_embedding_size,
                  use_artist=params.use_artist, embed_artist=params.embed_artist, num_artists=Data.num_artists,
                  artist_embedding_size=params.artist_embedding_size
                  )
optimizer = torch.optim.Adam(model.parameters(), lr=params.learning_rate)
model.load_state_dict(checkpoint['model_state_dict'])
model.to(device)
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
model.eval()
print(len(val_dataloader))
evaluate(model, val_dataloader)

Using vocab file: lyrics/top_5.vocab
chunking lyrics
484
{} 0.0
{} 0.2066115702479339
{} 0.4132231404958678
{} 0.6198347107438017
{} 0.8264462809917356
ppl: 780.95,


In [6]:
checkpoint = torch.load('checkpoints/top5_onehot/top5_onehot-e00004.pt', map_location=device)
epoch = checkpoint['epoch']
all_losses = checkpoint['losses']
params = checkpoint['hyperparameters']
ValData = LyricsDataset(re.sub('train', 'val', params.input_file), vocab_file=params.vocab_file,
                        chunk_size=params.chunk_size, use_artist=params.use_artist)
val_dataloader = DataLoader(ValData, batch_size=params.batch_size, num_workers=1, collate_fn=padding_fn, drop_last=True)

# Create model and optimizer
model = LyricsRNN(ValData.vocab_len, ValData.vocab_len, ValData.PAD_ID, batch_size=params.batch_size, n_layers=params.n_layers,
                  hidden_size=params.hidden_size, word_embedding_size=params.word_embedding_size,
                  use_artist=params.use_artist, embed_artist=params.embed_artist, num_artists=Data.num_artists,
                  artist_embedding_size=params.artist_embedding_size
                  )
optimizer = torch.optim.Adam(model.parameters(), lr=params.learning_rate)
model.load_state_dict(checkpoint['model_state_dict'])
model.to(device)
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
model.eval()
print(len(val_dataloader))
evaluate(model, val_dataloader)

Using vocab file: lyrics/top_5.vocab
chunking lyrics
484
{} 0.0
{} 0.2066115702479339
{} 0.4132231404958678
{} 0.6198347107438017
{} 0.8264462809917356
ppl: 1056.52,


In [7]:
checkpoint = torch.load('checkpoints/top5_onehot/top5_onehot-e00005.pt', map_location=device)
epoch = checkpoint['epoch']
all_losses = checkpoint['losses']
params = checkpoint['hyperparameters']
ValData = LyricsDataset(re.sub('train', 'val', params.input_file), vocab_file=params.vocab_file,
                        chunk_size=params.chunk_size, use_artist=params.use_artist)
val_dataloader = DataLoader(ValData, batch_size=params.batch_size, num_workers=1, collate_fn=padding_fn, drop_last=True)

# Create model and optimizer
model = LyricsRNN(ValData.vocab_len, ValData.vocab_len, ValData.PAD_ID, batch_size=params.batch_size, n_layers=params.n_layers,
                  hidden_size=params.hidden_size, word_embedding_size=params.word_embedding_size,
                  use_artist=params.use_artist, embed_artist=params.embed_artist, num_artists=Data.num_artists,
                  artist_embedding_size=params.artist_embedding_size
                  )
optimizer = torch.optim.Adam(model.parameters(), lr=params.learning_rate)
model.load_state_dict(checkpoint['model_state_dict'])
model.to(device)
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
model.eval()
print(len(val_dataloader))
evaluate(model, val_dataloader)

Using vocab file: lyrics/top_5.vocab
chunking lyrics
484
{} 0.0
{} 0.2066115702479339
{} 0.4132231404958678
{} 0.6198347107438017
{} 0.8264462809917356
ppl: 1287.17,


In [8]:
checkpoint = torch.load('checkpoints/top5_onehot/top5_onehot-e00006.pt', map_location=device)
epoch = checkpoint['epoch']
all_losses = checkpoint['losses']
params = checkpoint['hyperparameters']
ValData = LyricsDataset(re.sub('train', 'val', params.input_file), vocab_file=params.vocab_file,
                        chunk_size=params.chunk_size, use_artist=params.use_artist)
val_dataloader = DataLoader(ValData, batch_size=params.batch_size, num_workers=1, collate_fn=padding_fn, drop_last=True)

# Create model and optimizer
model = LyricsRNN(ValData.vocab_len, ValData.vocab_len, ValData.PAD_ID, batch_size=params.batch_size, n_layers=params.n_layers,
                  hidden_size=params.hidden_size, word_embedding_size=params.word_embedding_size,
                  use_artist=params.use_artist, embed_artist=params.embed_artist, num_artists=Data.num_artists,
                  artist_embedding_size=params.artist_embedding_size
                  )
optimizer = torch.optim.Adam(model.parameters(), lr=params.learning_rate)
model.load_state_dict(checkpoint['model_state_dict'])
model.to(device)
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
model.eval()
print(len(val_dataloader))
evaluate(model, val_dataloader)

Using vocab file: lyrics/top_5.vocab
chunking lyrics
484
{} 0.0
{} 0.2066115702479339
{} 0.4132231404958678
{} 0.6198347107438017
{} 0.8264462809917356
ppl: 1530.26,


In [9]:
checkpoint = torch.load('checkpoints/top5_onehot/top5_onehot-e00007.pt', map_location=device)
epoch = checkpoint['epoch']
all_losses = checkpoint['losses']
params = checkpoint['hyperparameters']
ValData = LyricsDataset(re.sub('train', 'val', params.input_file), vocab_file=params.vocab_file,
                        chunk_size=params.chunk_size, use_artist=params.use_artist)
val_dataloader = DataLoader(ValData, batch_size=params.batch_size, num_workers=1, collate_fn=padding_fn, drop_last=True)

# Create model and optimizer
model = LyricsRNN(ValData.vocab_len, ValData.vocab_len, ValData.PAD_ID, batch_size=params.batch_size, n_layers=params.n_layers,
                  hidden_size=params.hidden_size, word_embedding_size=params.word_embedding_size,
                  use_artist=params.use_artist, embed_artist=params.embed_artist, num_artists=Data.num_artists,
                  artist_embedding_size=params.artist_embedding_size
                  )
optimizer = torch.optim.Adam(model.parameters(), lr=params.learning_rate)
model.load_state_dict(checkpoint['model_state_dict'])
model.to(device)
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
model.eval()
print(len(val_dataloader))
evaluate(model, val_dataloader)

Using vocab file: lyrics/top_5.vocab
chunking lyrics
484
{} 0.0
{} 0.2066115702479339
{} 0.4132231404958678
{} 0.6198347107438017
{} 0.8264462809917356
ppl: 1759.38,


In [None]:
checkpoint = torch.load('checkpoints/top5_onehot/top5_onehot-e00008.pt', map_location=device)
epoch = checkpoint['epoch']
all_losses = checkpoint['losses']
params = checkpoint['hyperparameters']
ValData = LyricsDataset(re.sub('train', 'val', params.input_file), vocab_file=params.vocab_file,
                        chunk_size=params.chunk_size, use_artist=params.use_artist)
val_dataloader = DataLoader(ValData, batch_size=params.batch_size, num_workers=1, collate_fn=padding_fn, drop_last=True)

# Create model and optimizer
model = LyricsRNN(ValData.vocab_len, ValData.vocab_len, ValData.PAD_ID, batch_size=params.batch_size, n_layers=params.n_layers,
                  hidden_size=params.hidden_size, word_embedding_size=params.word_embedding_size,
                  use_artist=params.use_artist, embed_artist=params.embed_artist, num_artists=Data.num_artists,
                  artist_embedding_size=params.artist_embedding_size
                  )
optimizer = torch.optim.Adam(model.parameters(), lr=params.learning_rate)
model.load_state_dict(checkpoint['model_state_dict'])
model.to(device)
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
model.eval()
print(len(val_dataloader))
evaluate(model, val_dataloader)

Using vocab file: lyrics/top_5.vocab
chunking lyrics
484
{} 0.0
{} 0.2066115702479339
{} 0.4132231404958678
{} 0.6198347107438017
{} 0.8264462809917356


In [None]:
checkpoint = torch.load('checkpoints/top5_onehot/top5_onehot-e00009.pt', map_location=device)
epoch = checkpoint['epoch']
all_losses = checkpoint['losses']
params = checkpoint['hyperparameters']
ValData = LyricsDataset(re.sub('train', 'val', params.input_file), vocab_file=params.vocab_file,
                        chunk_size=params.chunk_size, use_artist=params.use_artist)
val_dataloader = DataLoader(ValData, batch_size=params.batch_size, num_workers=1, collate_fn=padding_fn, drop_last=True)

# Create model and optimizer
model = LyricsRNN(ValData.vocab_len, ValData.vocab_len, ValData.PAD_ID, batch_size=params.batch_size, n_layers=params.n_layers,
                  hidden_size=params.hidden_size, word_embedding_size=params.word_embedding_size,
                  use_artist=params.use_artist, embed_artist=params.embed_artist, num_artists=Data.num_artists,
                  artist_embedding_size=params.artist_embedding_size
                  )
optimizer = torch.optim.Adam(model.parameters(), lr=params.learning_rate)
model.load_state_dict(checkpoint['model_state_dict'])
model.to(device)
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
model.eval()
print(len(val_dataloader))
evaluate(model, val_dataloader)

In [None]:
checkpoint = torch.load('checkpoints/top5_onehot/top5_onehot-e00010.pt', map_location=device)
epoch = checkpoint['epoch']
all_losses = checkpoint['losses']
params = checkpoint['hyperparameters']
ValData = LyricsDataset(re.sub('train', 'val', params.input_file), vocab_file=params.vocab_file,
                        chunk_size=params.chunk_size, use_artist=params.use_artist)
val_dataloader = DataLoader(ValData, batch_size=params.batch_size, num_workers=1, collate_fn=padding_fn, drop_last=True)

# Create model and optimizer
model = LyricsRNN(ValData.vocab_len, ValData.vocab_len, ValData.PAD_ID, batch_size=params.batch_size, n_layers=params.n_layers,
                  hidden_size=params.hidden_size, word_embedding_size=params.word_embedding_size,
                  use_artist=params.use_artist, embed_artist=params.embed_artist, num_artists=Data.num_artists,
                  artist_embedding_size=params.artist_embedding_size
                  )
optimizer = torch.optim.Adam(model.parameters(), lr=params.learning_rate)
model.load_state_dict(checkpoint['model_state_dict'])
model.to(device)
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
model.eval()
print(len(val_dataloader))
evaluate(model, val_dataloader)