In [1]:
import numpy as np
from char_model_util import *
import random

## 1. Dataset and processing

In [2]:
data = open("dinos.txt",mode='r').read()
data = data.lower()
chars = list(set(data))
data_size, vocab_size = len(data), len(chars)
print('Total characters are %d and unique characters are %d' % (data_size,vocab_size))

Total characters are 19909 and unique characters are 27


In [3]:
char_to_ix = {ch:i for i,ch in enumerate(sorted(chars))}
ix_to_char = {i:ch for i,ch in enumerate(sorted(chars))}
print(char_to_ix)
print(ix_to_char)

{'\n': 0, 'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5, 'f': 6, 'g': 7, 'h': 8, 'i': 9, 'j': 10, 'k': 11, 'l': 12, 'm': 13, 'n': 14, 'o': 15, 'p': 16, 'q': 17, 'r': 18, 's': 19, 't': 20, 'u': 21, 'v': 22, 'w': 23, 'x': 24, 'y': 25, 'z': 26}
{0: '\n', 1: 'a', 2: 'b', 3: 'c', 4: 'd', 5: 'e', 6: 'f', 7: 'g', 8: 'h', 9: 'i', 10: 'j', 11: 'k', 12: 'l', 13: 'm', 14: 'n', 15: 'o', 16: 'p', 17: 'q', 18: 'r', 19: 's', 20: 't', 21: 'u', 22: 'v', 23: 'w', 24: 'x', 25: 'y', 26: 'z'}


## 2. Building blocks of model

In [4]:
def clip(gradients, maxValue):
    dWaa, dWax, dWya, db, dby = gradients['dWaa'], gradients['dWax'], gradients['dWya'], gradients['db'], gradients['dby']
    for gradient in [dWax, dWaa, dWya, db, dby]:
        np.clip(gradient, -maxValue, maxValue, out=gradient)
        
    gradients = {
        "dWaa": dWaa,
        "dWax": dWax, 
        "dWya": dWya, 
        "db": db, 
        "dby": dby
    }
    
    return gradients

In [5]:
np.random.seed(3)
dWax = np.random.randn(5,3)*10
dWaa = np.random.randn(5,5)*10
dWya = np.random.randn(2,5)*10
db = np.random.randn(5,1)*10
dby = np.random.randn(2,1)*10
gradients = {"dWax": dWax, "dWaa": dWaa, "dWya": dWya, "db": db, "dby": dby}
gradients = clip(gradients, 10)
print("gradients[\"dWaa\"][1][2] =", gradients["dWaa"][1][2])
print("gradients[\"dWax\"][3][1] =", gradients["dWax"][3][1])
print("gradients[\"dWya\"][1][2] =", gradients["dWya"][1][2])
print("gradients[\"db\"][4] =", gradients["db"][4])
print("gradients[\"dby\"][1] =", gradients["dby"][1])

gradients["dWaa"][1][2] = 10.0
gradients["dWax"][3][1] = -10.0
gradients["dWya"][1][2] = 0.2971381536101662
gradients["db"][4] = [10.]
gradients["dby"][1] = [8.45833407]


In [6]:
def sample(parameters, char_to_ix, seed):
    Waa, Wax, Wya, by, b = parameters['Waa'], parameters['Wax'], parameters['Wya'], parameters['by'], parameters['b']
    
    vocab_size = by.shape[0]
    n_a = Waa.shape[1]
    
    x = np.zeros((vocab_size,1))
    a_prev = np.zeros((n_a,1))
    
    indices = []
    idx = -1
    counter = 0
    newline_char = char_to_ix['\n']
    
    while (idx != newline_char and counter !=50):
        a = np.tanh(np.dot(Wax, x) + np.dot(Waa, a_prev) + b)
        z = np.dot(Wya, a) + by
        y = softmax(z)
        
        np.random.seed(counter + seed)
        
        idx = np.random.choice(list(range(vocab_size)), p = y.ravel())
        
        indices.append(idx)
        
        x = np.zeros((vocab_size,1))
        x[idx] = 1
        
        seed += 1
        counter += 1
    
    if counter == 50:
        indices.append(char_to_ix['\n'])
        
    return indices

In [7]:
np.random.seed(2)
_, n_a = 20, 100
Wax, Waa, Wya = np.random.randn(n_a, vocab_size), np.random.randn(n_a, n_a), np.random.randn(vocab_size, n_a)
b, by = np.random.randn(n_a, 1), np.random.randn(vocab_size, 1)
parameters = {"Wax": Wax, "Waa": Waa, "Wya": Wya, "b": b, "by": by}


indices = sample(parameters, char_to_ix, 0)
print("Sampling:")
print("list of sampled indices:", indices, '\n')
print("list of sampled characters:", [ix_to_char[i] for i in indices])

Sampling:
list of sampled indices: [12, 25, 1, 12, 25, 1, 12, 25, 1, 12, 25, 1, 12, 25, 1, 12, 25, 1, 12, 25, 1, 12, 25, 1, 12, 25, 1, 12, 25, 1, 12, 25, 1, 12, 25, 1, 12, 25, 1, 12, 25, 1, 12, 25, 1, 12, 25, 1, 12, 25, 0] 

list of sampled characters: ['l', 'y', 'a', 'l', 'y', 'a', 'l', 'y', 'a', 'l', 'y', 'a', 'l', 'y', 'a', 'l', 'y', 'a', 'l', 'y', 'a', 'l', 'y', 'a', 'l', 'y', 'a', 'l', 'y', 'a', 'l', 'y', 'a', 'l', 'y', 'a', 'l', 'y', 'a', 'l', 'y', 'a', 'l', 'y', 'a', 'l', 'y', 'a', 'l', 'y', '\n']


## 3. Building language model

In [15]:
def optimize(X,Y,a_prev,parameters,learning_rate=0.01):
    loss, cache = rnn_forward(X,Y,a_prev,parameters)
    
    gradients, a = rnn_backward(X,Y,parameters,cache)
    
    gradients = clip(gradients,5)
    
    parameters = update_parameters(parameters,gradients,learning_rate)
    
    return (loss, gradients, a[len(X)-1])
    

In [16]:
np.random.seed(1)
vocab_size, n_a = 27, 100
a_prev = np.random.randn(n_a, 1)
Wax, Waa, Wya = np.random.randn(n_a, vocab_size), np.random.randn(n_a, n_a), np.random.randn(vocab_size, n_a)
b, by = np.random.randn(n_a, 1), np.random.randn(vocab_size, 1)
parameters = {"Wax": Wax, "Waa": Waa, "Wya": Wya, "b": b, "by": by}
X = [12,3,5,11,22,3]
Y = [4,14,11,22,25, 26]

loss, gradients, a_last = optimize(X, Y, a_prev, parameters, learning_rate = 0.01)
print("Loss =", loss)
print("gradients[\"dWaa\"][1][2] =", gradients["dWaa"][1][2])
print("np.argmax(gradients[\"dWax\"]) =", np.argmax(gradients["dWax"]))
print("gradients[\"dWya\"][1][2] =", gradients["dWya"][1][2])
print("gradients[\"db\"][4] =", gradients["db"][4])
print("gradients[\"dby\"][1] =", gradients["dby"][1])
print("a_last[4] =", a_last[4])

Loss = 126.50397572165346
gradients["dWaa"][1][2] = 0.1947093153472697
np.argmax(gradients["dWax"]) = 93
gradients["dWya"][1][2] = -0.007773876032004693
gradients["db"][4] = [-0.06809825]
gradients["dby"][1] = [0.01538192]
a_last[4] = [-1.]


In [22]:
def model(data, ix_to_char, char_to_ix,num_iterations=35000,n_a=50, dino_names = 7, vocab_size = 27):
    n_x, n_y = vocab_size, vocab_size
    
    parameters = initialize_parameters(n_a,n_x,n_y)
    
    with open("dinos.txt") as f:
        examples = f.readlines()
    examples = [x.lower().strip() for x in examples]
    
    loss = get_initial_loss(vocab_size, dino_names)
    
    np.random.seed(0)
    np.random.shuffle(examples)
    a_prev = np.zeros((n_a, 1))
    
    for i in range(num_iterations):
        index = i % len(examples)
        
        X = [None] + [char_to_ix[ch] for ch in examples[index]]
        Y = X[1:] + [char_to_ix['\n']]
        
        curr_loss, gradients, a_prev = optimize(X,Y,a_prev,parameters, learning_rate=0.01)
        
        loss = smooth(loss, curr_loss)
        
        if i%2000 == 0:
            print('Iterations %d, loss - %f' % (i,loss) + '\n')
            seed = 0
            
            for name in range(dino_names):
                sampled_indices = sample(parameters, char_to_ix, seed)
                print_sample(sampled_indices, ix_to_char)
                seed += 1
            print('\n')
    
    return parameters

In [23]:
parameters = model(data, ix_to_char, char_to_ix)

Iterations 0, loss - 23.087336

Nkzxwtdmfqoeyhsqwasjkjvu
Kneb
Kzxwtdmfqoeyhsqwasjkjvu
Neb
Zxwtdmfqoeyhsqwasjkjvu
Eb
Xwtdmfqoeyhsqwasjkjvu


Iterations 2000, loss - 27.884160

Lfxtrolidosauroshasandrobisaxalanalulalana
Hha
Hxtrolidosauroshasandrobisaxalanalulalana
La
Xtrolidosauroshasandrobisaxalanalulalana
A
Trolidosauroshasandrobisaxalanalulalana


Iterations 4000, loss - 25.901815

Meusmolierlbveros
Ion
Iusmolierlbveros
Ma
Wtos
Ca
Tos


Iterations 6000, loss - 24.608779

Oryssichaps
Lona
Lyssichaps
Ona
Xssichaps
Ecadrqedanthus
Troniermausfptasangsiansavelances


Iterations 8000, loss - 24.070350

Optrtomidorausaptasangshansavemanditaliang
Kida
Lytromidorausaptasangshansavemanditaliang
Ona
Xsps
Edaiskeechusongkusacwanos
Tromidorausaptasangshansavemanditaliang


Iterations 10000, loss - 23.844446

Optrusangoraushos
Kia
Luspsangoraushos
Oma
Xspsangoraushos
Edadrra
Tosangoraushos


Iterations 12000, loss - 23.291971

Optrus
Kice
Lyuspephmicrausrausanus
Ona
Yuspephmicrausrausanus
Ee
Tos

