# LyricMe

A simple RNN Model that writes new song lyrics.

In [1]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.utils.data as data
print("Torch version:", torch.__version__)

import pandas as pd
print("Pandas version:", pd.__version__)

import random
import string
import numpy as np
print("Numpy version:", np.__version__)

import sys, os
from utils import *

train_on_gpu = torch.cuda.is_available()
if not train_on_gpu:
    print('CUDA is not available.  Training on CPU ...')
else:
    print('CUDA is available! Training on GPU ...')
    print('GPUs Available=> ', end='')
    !nvidia-smi -L

Torch version: 1.2.0
Pandas version: 0.24.2
Numpy version: 1.16.4
CUDA is available! Training on GPU ...
GPUs Available=> GPU 0: GeForce GTX 1060 (UUID: GPU-e21c9c72-5f8f-0572-2d07-15164c787e6e)


# Load Data

Refer to ```dataset/LyricsDataset.py``` for dataset class

In [22]:
from dataset import LyricsDataset
CSV_FILE_PATH = './dataset/songdata.csv'
trainset = LyricsDataset(CSV_FILE_PATH)
trainset_loader = torch.utils.data.DataLoader(trainset, batch_size=50,
                                              shuffle=True, num_workers=4, drop_last=True)

===x----Lyrics Dataset initialzed----x===
CSV File      : ./dataset/songdata.csv
Dataset Length: 57650
Total Artists : 643


# Single Batch Processing

In [3]:
def process_batch(batch_tuple):
    input_seq, out_seq, seq_len = batch_tuple
    
    # Split and Sort the row datas
    splitted_input_seq = input_seq.split(split_size=1)
    splitted_out_seq = out_seq.split(split_size=1)
    splitted_seq_len = seq_len.split(split_size=1)
    
    train_data_tuple = zip(splitted_input_seq, splitted_out_seq, splitted_seq_len)
    train_data_tuple_sorted = sorted(train_data_tuple, key=lambda p: int(p[2]), reverse=True)
    
    splitted_input_seq, splitted_out_seq, splitted_seq_len = zip(*train_data_tuple_sorted)
    
    input_seq_sorted = torch.cat(splitted_input_seq)
    out_seq_sorted = torch.cat(splitted_out_seq)
    seq_len_sorted = torch.cat(splitted_seq_len)
    
    input_seq_sorted = input_seq_sorted[:,:seq_len_sorted[0,0]]
    out_seq_sorted = out_seq_sorted[:,:seq_len_sorted[0,0]]
    
    input_seq_transposed = input_seq_sorted.transpose(0, 1)
    
    lengths_batch_sorted_list = list(seq_len_sorted)
    lengths_batch_sorted_list = map(lambda x: int(x), lengths_batch_sorted_list)
    return input_seq_transposed, out_seq_sorted, lengths_batch_sorted_list  

# RNN Model Class

In [14]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes, num_layers=2):
        super(RNN, self).__init__()
        
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_classes = num_classes
        self.num_layers = num_layers
        
        self.encoder = nn.Embedding(input_size, hidden_size)
        self.gru = nn.LSTM(hidden_size, hidden_size, num_layers)
        self.logits_fc = nn.Linear(hidden_size, num_classes)
        
    def forward(self, input_seq, input_seq_len, hidden=None):
        batch_size = input_seq.shape[1]
        input_embedded = self.encoder(input_seq)
        packed = nn.utils.rnn.pack_padded_sequence(input_embedded, input_seq_len)
        outputs, hidden = self.gru(packed, hidden)
        outputs, out_len = nn.utils.rnn.pad_packed_sequence(outputs)
        logits = self.logits_fc(outputs)
        logits = logits.transpose(0, 1).contiguous()
        logits_flatten = logits.view(-1, self.num_classes)
        return logits_flatten, hidden

In [None]:
model = RNN(num_chars + 1, hidden_size=512, num_classes=num_chars)
if train_on_gpu:
    model.cuda()

In [5]:
learning_rate = 0.001
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
criterion = torch.nn.CrossEntropyLoss()
if train_on_gpu:
    criterion.cuda()

In [9]:
def sample_from_rnn(rnn, starting_sting="Why", sample_length=300, temperature=1):

    sampled_string = starting_sting
    hidden = None

    first_input = torch.LongTensor( string_to_label(starting_sting) ).cuda()
    first_input = first_input.unsqueeze(1)
    current_input = Variable(first_input)

    output, hidden = rnn(current_input, [len(sampled_string)], hidden=hidden)

    output = output[-1, :].unsqueeze(0)

    for i in range(sample_length):

        output_dist = nn.functional.softmax( output.view(-1).div(temperature) ).data

        predicted_label = torch.multinomial(output_dist, 1)

        sampled_string += all_chars[int(predicted_label[0])]

        current_input = Variable(predicted_label.unsqueeze(1))

        output, hidden = rnn(current_input, [1], hidden=hidden)
    
    return sampled_string

In [7]:
loss_list = []

In [9]:
clip = 1.0
epochs_number = 20

for epoch_number in range(1, epochs_number + 1):
    print("Epoch {}/{} =>".format(epoch_number, epochs_number))
    for batch in trainset_loader:
        print("|", end='')
        post_processed_batch_tuple = process_batch(batch)

        input_sequences_batch, output_sequences_batch, sequences_lengths = post_processed_batch_tuple

        output_sequences_batch_var =  Variable( output_sequences_batch.contiguous().view(-1).cuda() )
        input_sequences_batch_var = Variable( input_sequences_batch.cuda() )
        
        optimizer.zero_grad()

        logits, _ = model(input_sequences_batch_var, list(sequences_lengths))
        
        loss = criterion(logits, output_sequences_batch_var)
        loss_list.append( loss.item() )
        loss.backward()

        #torch.nn.utils.clip_grad_norm(rnn.parameters(), clip)

        optimizer.step()
    
    torch.save(model.state_dict(), 'models/unconditional_lyrics_rnn.pth')

Epoch 1/20 =>
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||

KeyboardInterrupt: 

Traceback (most recent call last):
  File "/home/khizirsiddiqui/anaconda3/lib/python3.7/multiprocessing/queues.py", line 242, in _feed
    send_bytes(obj)
  File "/home/khizirsiddiqui/anaconda3/lib/python3.7/multiprocessing/connection.py", line 200, in send_bytes
    self._send_bytes(m[offset:offset + size])
  File "/home/khizirsiddiqui/anaconda3/lib/python3.7/multiprocessing/connection.py", line 404, in _send_bytes
    self._send(header + buf)
  File "/home/khizirsiddiqui/anaconda3/lib/python3.7/multiprocessing/connection.py", line 368, in _send
    n = write(self._handle, buf)
BrokenPipeError: [Errno 32] Broken pipe


In [20]:
print(sample_from_rnn(rnn=model, temperature=0.5, starting_sting="The end", sample_length=500))

  app.launch_new_instance()


The end   wa tf      e      s  n       eonm    's       od    f   e  et   hab  m  e t     u   hty     n      de      enp e no  a  sn    e      e  i t        t ht aearne       e 
twy        t e   ss  s       e  ee  n         rn     h    n a e     o  let w oo          a t     u    i ti n e t   ef     r  h     n  i    t  i          nen    e  y    lde 
    t   haa t  h  ai tr  y       w 
m  e r y  t a    t  d oh    ais     r     te
    e   o   n m  g     i     
  t        w     a   e      i y en t   te    


In [15]:
SAVED_MODEL_PATH = './models/unconditional_lyrics_rnn.pth'
ptmodel = RNN(num_chars + 1, hidden_size=512, num_classes=num_chars)
ptmodel.load_state_dict(torch.load(SAVED_MODEL_PATH))
if train_on_gpu:
    ptmodel.cuda()
print("Model successfully loaded")

Model successfully loaded


In [21]:
print(sample_from_rnn(rnn=ptmodel, temperature=0.9, starting_sting="When", sample_length=1000))

  app.launch_new_instance()


When we used to be  
Out of the mountains  
And the sun doesn't feel safe  
After the hammer city a style  
It seems I'm always made  
A fool for us all  
I don't know why  
There's no innocence for  
Though I can't believe the day  
That can do it figure  
The wind in my heart  
Feet sinking low  
On the Floor Nightini, Tommy Bride  
  
It's over now  
No Texas bright in heaven  
Weep in a train  
From beating on the ground  
In a cold summer breeze  
No other love ever  
1,000 truth resumes  
Whatever hit the world  
Until it falls apart  
The palms and meet me girl  
I Love My head  
The water is head  
Anglow watching you  
It's only raging anger  
The strength it seems  
From Mexican show  
And try to show you my house for my song  
Walk the empty sea that's here for  
Forget the flowers  
In that crack is over  
My hands mexican gear  
The hills will ring  
As the sun crying don't help me not in your car  
It's the Santa Animal in Chicago  
Even if it could happen  
The world has