# Assignment 7

Delelop language model, which generates death metal band names.  
You can get data from https://www.kaggle.com/zhangjuefei/death-metal.  
You are free to use any other data, but the most easy way is just to take the band name column.

Your language model should be char-based autogression RNN.  
Text generation should be terminated when either max length is reached or terminal symbol is generated.  

Different band names can be generated by:  
1. init $h_0$ as random vector from some probabilty distribution.
2. sampling over tokens at each timestep with probability = softmax 

Calculate perplexity for your model = your objective quality metric.  
Also, sample 10 band names from your model for subjective evaluation. E.g. names like 'qwiouefiou23riop2h3' or 'death death death!' are bad examples.  

### This character level RNN is based on https://github.com/spro/char-rnn.pytorch

In [1]:
import pandas as pd
import numpy as np
import random
import string

import torch as tt
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.backends.cudnn as cudnn
from torch import autograd

import torchvision
import torchvision.transforms as transforms

from tqdm import tqdm_notebook

In [2]:
import warnings
warnings.filterwarnings('ignore')

##### Data

In [3]:
characters = string.printable

In [4]:
data = pd.read_csv('bands.csv')

In [5]:
bands = ' '.join(data['name'].values)

In [6]:
def char_tensor(string):
    # turning a string into a tensor
    
    tensor = tt.zeros(len(string)).long()
    
    for c in range(len(string)):
        try:
            tensor[c] = characters.index(string[c])
        except:
            continue
            
    return tensor

In [7]:
def random_training_set(chunk_len, batch_size, text):
    # get the training data

    inpt = tt.LongTensor(batch_size, chunk_len)
    trgt = tt.LongTensor(batch_size, chunk_len)

    for i in range(batch_size):
        start_index = random.randint(0, len(text) - chunk_len)
        end_index = start_index + chunk_len + 1
        chunk = text[start_index:end_index]

        inpt[i] = char_tensor(chunk[:-1])
        trgt[i] = char_tensor(chunk[1:])

    inpt = tt.autograd.Variable(inpt)
    trgt = tt.autograd.Variable(trgt)

    return inpt, trgt

##### Model

In [8]:
class MyModel(nn.Module):
    
    def __init__(self, input_size, hidden_size, output_size, n_layers=1):
        super(MyModel, self).__init__()
        
        # init the meta parameters
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = n_layers

        self.embedding = nn.Embedding(input_size, hidden_size)
        
        # first lstm layer
        self.rnn = nn.LSTM(hidden_size, hidden_size, n_layers)
          
        # fully connected layer
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden):
        batch_size = x.size(0)
        encoded = self.embedding(x)
        
        out_x, hidden = self.rnn(encoded.view(1, batch_size, -1), hidden)
        out_x = self.fc(out_x.view(batch_size, -1))
        
        return out_x, hidden

    def init_hidden(self, batch_size):
        # initialize the hidden state and the cell state to zeros
        
        return (tt.autograd.Variable(tt.zeros(self.n_layers, batch_size, self.hidden_size)),
                tt.autograd.Variable(tt.zeros(self.n_layers, batch_size, self.hidden_size)))

In [9]:
# parameters
hidden_size = 100
batch_size = 512
n_layers = 2
chunk_len = 200
n_epochs = 1000

In [10]:
model = MyModel(len(characters),
                hidden_size,
                len(characters),
                n_layers=n_layers
               )

optimizer = tt.optim.Adam(model.parameters(), lr=0.01)
criterion = nn.CrossEntropyLoss()

##### Train & test

In [11]:
def perplexity(x):
    return 2**x

In [12]:
def train(input_x, target_x):
    model.train()
    hidden = model.init_hidden(batch_size)
    model.zero_grad()
    
    all_pp = []

    for i in range(chunk_len):        
        output, hidden = model(input_x[:,i], hidden)
        loss = criterion(output.view(batch_size, -1), target_x[:,i])
        
        all_pp.append(perplexity(loss.item()))

    loss.backward()
    optimizer.step()

    return np.mean(all_pp)

In [13]:
def test(input_x, target_x):
    model.eval()
    hidden = model.init_hidden(batch_size)
    model.zero_grad()
    
    all_pp = []

    for i in range(chunk_len):        
        output, hidden = model(input_x[:,i], hidden)
        loss = criterion(output.view(batch_size, -1), target_x[:,i])
        
        all_pp.append(perplexity(loss.item()))

    loss.backward()
    optimizer.step()

    return np.mean(all_pp)

##### Sample

In [14]:
def generate(model, prime_str='', predict_len=10, temperature=0.8, mode='one'):
    hidden = model.init_hidden(1)
    prime_input = tt.autograd.Variable(char_tensor(prime_str).unsqueeze(0))
    predicted = str()

    # use priming string to "build up" hidden state
    for p in range(len(prime_str) - 1):
        _, hidden = model(prime_input[:,p], hidden)
        
    inp = prime_input[:,-1]
    
    for p in range(predict_len):
        output, hidden = model(inp, hidden)
        
        # sample from the network as a multinomial distribution
        output_dist = output.data.view(-1).div(temperature).exp()
        top_i = tt.multinomial(output_dist, 1)[0]

        # add predicted character to string and use as next input
        predicted_char = characters[top_i]
        
        if mode: # one-word names
            if predicted and ' ' in predicted_char:
                break
            
        predicted += predicted_char
        inp = tt.autograd.Variable(char_tensor(predicted_char).unsqueeze(0))

    return predicted

In [15]:
for epoch in tqdm_notebook(range(1, n_epochs + 1)):
    pp = train(*random_training_set(chunk_len, batch_size, bands))
    
    if epoch % 10 == 0:
        print(f'| Epoch: {epoch} | perplexity: {pp:.5f} |')

HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

| Epoch: 10 | perplexity: 9.84716 |
| Epoch: 20 | perplexity: 8.51526 |
| Epoch: 30 | perplexity: 7.15025 |
| Epoch: 40 | perplexity: 6.48868 |
| Epoch: 50 | perplexity: 6.12082 |
| Epoch: 60 | perplexity: 5.99019 |
| Epoch: 70 | perplexity: 5.75783 |
| Epoch: 80 | perplexity: 5.47551 |
| Epoch: 90 | perplexity: 5.49966 |
| Epoch: 100 | perplexity: 5.44302 |
| Epoch: 110 | perplexity: 5.29318 |
| Epoch: 120 | perplexity: 5.22514 |
| Epoch: 130 | perplexity: 5.17873 |
| Epoch: 140 | perplexity: 5.09204 |
| Epoch: 150 | perplexity: 5.07527 |
| Epoch: 160 | perplexity: 5.01334 |
| Epoch: 170 | perplexity: 4.97672 |
| Epoch: 180 | perplexity: 4.99073 |
| Epoch: 190 | perplexity: 4.97100 |
| Epoch: 200 | perplexity: 4.85161 |
| Epoch: 210 | perplexity: 4.85657 |
| Epoch: 220 | perplexity: 4.84165 |
| Epoch: 230 | perplexity: 4.75802 |
| Epoch: 240 | perplexity: 4.75274 |
| Epoch: 250 | perplexity: 4.74760 |
| Epoch: 260 | perplexity: 4.73505 |
| Epoch: 270 | perplexity: 4.65666 |
| Epoch: 2

In [18]:
for i in range(10):
    print(generate(model, ' ', 10, mode='one'))

Inflosis
Ercubution
Mi's
D.a.1
Grypt
Ovocops
Internal
Funulon
in
Inlembium


In [22]:
for i in range(10):
    print(generate(model, ' ', 10, mode='one'))

DeAperatio
Dark
Fourn
Laginged
Reipse
Bear
Sungetic
Evil
Godhtrepse
Taut


In [23]:
for i in range(10):
    print(generate(model, ' ', 10, mode='one'))

Sate
Hyl.E.W.H.
Dead
Ettlor
Abarzat
Marsar
Expents
Untharfent
Effitant
Narete


In [26]:
for i in range(10):
    print(generate(model, ' ', 15, mode=None))

Dieva Anara Mid
Sange Santophil
Remage Uninemin
Noctus Herei Re
the Widned Fung
Tolent Massion 
Doom Sples Orat
Repice Infernal
Erory Vown Pole
Infection Syce 


In [29]:
for i in range(10):
    print(generate(model, ' ', 11, mode=None))

Rut Horthro
Primoted Sp
Spone Morti
Necrophover
Lyin Death 
Aband A.L. 
Found Rate 
Horrent Exu
Anas Vecrif
Proogets Fi


##### Save the model

In [30]:
def save_model():
    model_name = 'char-rnn-final.pt'
    tt.save(model, model_name)
    
    print('saving model as %s' % model_name)

In [31]:
save_model()

saving model as char-rnn-final.pt
