In [1]:
import numpy as np
from utils import *
import random

In [2]:
data = open('male.txt', 'r').read()
data= data.lower()
chars = list(set(data))
data_size, vocab_size = len(data), len(chars)
print('There are %d total characters and %d unique characters in your data.' % (data_size, vocab_size))

There are 20183 total characters and 27 unique characters in your data.


In [3]:
char_to_ix = { ch:i for i,ch in enumerate(sorted(chars)) }#character to index 
ix_to_char = { i:ch for i,ch in enumerate(sorted(chars)) }#index to character
print(ix_to_char)

{0: '\n', 1: 'a', 2: 'b', 3: 'c', 4: 'd', 5: 'e', 6: 'f', 7: 'g', 8: 'h', 9: 'i', 10: 'j', 11: 'k', 12: 'l', 13: 'm', 14: 'n', 15: 'o', 16: 'p', 17: 'q', 18: 'r', 19: 's', 20: 't', 21: 'u', 22: 'v', 23: 'w', 24: 'x', 25: 'y', 26: 'z'}


In [4]:
def clip(gradients, maxValue):#gradient clipping in order to remove exploding gradient problem

    dWaa, dWax, dWya, db, dby = gradients['dWaa'], gradients['dWax'], gradients['dWya'], gradients['db'], gradients['dby']
   
    for gradient in [dWax, dWaa, dWya, db, dby]:
        np.clip(gradient,-maxValue,maxValue,out=gradient)
    
    gradients = {"dWaa": dWaa, "dWax": dWax, "dWya": dWya, "db": db, "dby": dby}
    
    return gradients

In [5]:
def optimize(X, Y, a_prev, parameters, learning_rate = 0.01):
    
    loss, cache = rnn_forward(X, Y, a_prev, parameters)
    
    gradients, a = rnn_backward(X, Y, parameters, cache)
    
    gradients = clip(gradients, 5)
    
    parameters = update_parameters(parameters, gradients, learning_rate)
        
    return loss, gradients, a[len(X)-1]

In [6]:
def sample(parameters, char_to_ix):#sampling the RNN
  
    Waa, Wax, Wya, by, b = parameters['Waa'], parameters['Wax'], parameters['Wya'], parameters['by'], parameters['b']
    vocab_size = by.shape[0]
    n_a = Waa.shape[1]
    
    x = np.zeros((vocab_size, 1))
    a_prev = np.zeros((n_a, 1))
    
    indices = []
    
    idx = -1 
    
    counter = 0
    newline_character = char_to_ix['\n']
    
    while (idx != newline_character and counter != 10):
        a = np.tanh(np.dot(Wax,x)+np.dot(Waa,a_prev)+b)
        z = np.dot(Wya,a)+by
        y = softmax(z)
        
        idx = np.random.choice(list(range(vocab_size)), p = y.ravel())
      
        indices.append(idx)
        
        x = np.zeros((vocab_size,1))
        x[idx] = 1
        
        counter +=1
        
   
    if (counter == 10):
        indices.append(char_to_ix['\n'])
    
    return indices

In [9]:
def model(data, ix_to_char, char_to_ix, num_iterations = 28000, n_a = 50, dino_names = 7, vocab_size = 27):

    n_x, n_y = vocab_size, vocab_size
    
    parameters = initialize_parameters(n_a, n_x, n_y)#n_a is the number of rnn cells.
    
    loss = get_initial_loss(vocab_size, dino_names)

    with open("male.txt") as f:
        examples = f.readlines()
    examples = [x.lower().strip() for x in examples]
    
    np.random.seed(0)
    np.random.shuffle(examples)
    
    a_prev = np.zeros((n_a, 1))
    
    for j in range(num_iterations):
        
        
        index = j % len(examples)
        X = [None] + [char_to_ix[ch] for ch in examples[index]] #might have to change this
        Y = X[1:] + [char_to_ix["\n"]]
        
        curr_loss, gradients, a_prev =optimize(X, Y, a_prev, parameters, learning_rate = 0.01)
        
        loss = smooth(loss, curr_loss)

        if j % 2000 == 0:#after every 2000 iterations 7 names will be displayed
            
            print('Iteration: %d, Loss: %f' % (j, loss) + '\n')
            
            seed = 0
            for name in range(dino_names):
                
                sampled_indices = sample(parameters, char_to_ix)
                print_sample(sampled_indices, ix_to_char)
                #print(sampled_indices,ix_to_char)  #print(sampled_indices,ix_to_char)
                #break
                
       
            print('\n')
        
    return parameters

In [10]:
parameters = model(data, ix_to_char, char_to_ix)#with increase in iterations names starts to follow a trend 
                                                #and no longer they are random, they are learning a pattern 
                                                #from the baby names provided.

Iteration: 0, Loss: 23.077450

Iz
Psgzwx
Ogvqncnqyo
Zyqmlx
Cbtiqpjnqy
Opaynuakfg
Upk


Iteration: 2000, Loss: 20.316575

Xxb
Jwtigethai
D
Iuo

Derxfozpm

Etasmemiri


Iteration: 4000, Loss: 18.579600

Arexeiinta
Erherunnpi
Eriuaun
Eriy
Eriornebit
Hshisiby
Phegocyril


Iteration: 6000, Loss: 17.822740

Ersdgbefya
Rantahinru
Bbe
Alelreuxie
Erdeonzbel
Hantemurir
Becedemuil


Iteration: 8000, Loss: 17.192456

Stoleideri
Us
Mivanachor
An
Eiemawdtar
An
Subushrena


Iteration: 10000, Loss: 16.842536

Oberofroor
Areserisec
Rhe
Woramylanv
Larederaph
Rimisheras
Glerolisdo


Iteration: 12000, Loss: 16.444694

Nuhamarora
Tunilaleis
Thavanajhe
Moyonodona
Warshareyr
Sgoyen
Rramecanan


Iteration: 14000, Loss: 16.183750

Huudranall
Horunolele
Anatamaior
Bioraroshe
Wileshamon
Anyontonin
Sareronala


Iteration: 16000, Loss: 16.250103

Oviligigol
Wicazalbur
Inerekikon
Iriustumin
Nujeusciom
Gillanerod
Biniviclin


Iteration: 18000, Loss: 16.054816

Judaganien
Gerinanoge
Lboctigami
Alonagseiz
Cenevadaum
B

In [None]:
Bovelary,Relemelele sounds like we created new cute baby names!!