# Arabic Girls Name Generation via Deep Learning with RNNs

## 1- Packages 

In [45]:
import numpy as np
from utils import *
import random
import pprint
import copy

## 2- Dataset

Load the dataset, convert to lowercase, and compute the total and unique character counts.

In [46]:
data = open('Arabic_Girls_names.txt', 'r').read()
data= data.lower()
chars = list(set(data))
data_size, vocab_size = len(data), len(chars)
print('There are %d total characters and %d unique characters.' % (data_size, vocab_size))

There are 6640 total characters and 26 unique characters.


In [47]:
chars.append('p')

In [48]:
chars = sorted(chars)
print(chars)

['\n', '/', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'y', 'z']


In [49]:
char_to_ix = { ch:i for i,ch in enumerate(chars) }
ix_to_char = { i:ch for i,ch in enumerate(chars) }
pp = pprint.PrettyPrinter(indent=4)
pp.pprint(ix_to_char)

{   0: '\n',
    1: '/',
    2: 'a',
    3: 'b',
    4: 'c',
    5: 'd',
    6: 'e',
    7: 'f',
    8: 'g',
    9: 'h',
    10: 'i',
    11: 'j',
    12: 'k',
    13: 'l',
    14: 'm',
    15: 'n',
    16: 'o',
    17: 'p',
    18: 'q',
    19: 'r',
    20: 's',
    21: 't',
    22: 'u',
    23: 'v',
    24: 'w',
    25: 'y',
    26: 'z'}


## 3- the clip function

Apply gradient clipping to prevent exploding gradients by restricting their values within a specified range.

In [50]:
def clip(gradients, maxValue):
    gradients = copy.deepcopy(gradients)
    
    dWaa, dWax, dWya, db, dby = gradients['dWaa'], gradients['dWax'], gradients['dWya'], gradients['db'], gradients['dby']
   
    for gradient in [dWax, dWaa, dWya, db, dby]:
        np.clip(gradient, -maxValue, maxValue, out = gradient)
    
    gradients = {"dWaa": dWaa, "dWax": dWax, "dWya": dWya, "db": db, "dby": dby}
    
    return gradients

## 4- The sample function
Generate a sequence of characters from the RNN model by sampling based on output probabilities.

In [51]:
def sample(parameters, char_to_ix, seed):
    
    Waa, Wax, Wya, by, b = parameters['Waa'], parameters['Wax'], parameters['Wya'], parameters['by'], parameters['b']
    vocab_size = by.shape[0]
    n_a = Waa.shape[1]
    x = np.zeros((27,1))
    a_prev = np.zeros((n_a,1))
    indices = []
    idx = -1
    counter = 0
    newline_character = char_to_ix['\n']
    
    while (idx != newline_character and counter != 50):
        
        a = np.tanh(np.dot(Wax, x) + np.dot(Waa, a_prev) + b)
        z = np.dot(Wya, a) + by
        y = softmax(z)
        np.random.seed(counter + seed) 
        idx = np.random.choice(range(len(y)), p = y.ravel())
        indices.append(idx)
        
        x = np.zeros((27,1))
        x[idx] = 1
        
        a_prev = a
        
        seed += 1
        counter +=1
        

    if (counter == 50):
        indices.append(char_to_ix['\n'])
    
    return indices

## 5- the optimize function
Optimize the RNN model by performing forward and backward passes, clipping gradients, and updating parameters.

In [52]:
def optimize(X, Y, a_prev, parameters, learning_rate = 0.01):
    loss, cache = rnn_forward(X, Y, a_prev, parameters)
    gradients, a = rnn_backward(X, Y, parameters, cache)
    gradients = clip(gradients, 5)
    parameters = update_parameters(parameters, gradients, learning_rate)
    
    return loss, gradients, a[len(X)-1]

## 6- Traning The Model

In [53]:
def model(data_x, ix_to_char, char_to_ix, num_iterations = 35000, n_a = 50, girls_names = 7, vocab_size = 27, verbose = False):
    
    n_x, n_y = vocab_size, vocab_size
    parameters = initialize_parameters(n_a, n_x, n_y)
    loss = get_initial_loss(vocab_size, girls_names)
    names = [x.strip() for x in data_x]
    np.random.seed(0)
    np.random.shuffle(names)
    a_prev = np.zeros((n_a, 1))
    #######################################################################
    last_girl_name = " "
    for j in range(num_iterations):
        idx = j % len(names)
        single_name = names[idx]
        single_name_chars = [c for c in single_name]
        single_name_ix = [char_to_ix[ch] for ch in single_name_chars]
        X = [None] + single_name_ix
        ix_newline = char_to_ix['\n']
        Y = single_name_ix + [ix_newline] 
        curr_loss, gradients, a_prev = optimize(X, Y, a_prev, parameters, learning_rate = 0.01)
        
        if verbose and j in [0, len(names) -1, len(names)]:
            print("j = " , j, "idx = ", idx,) 
        if verbose and j in [0]:
            print("single_name =", single_name)
            print("single_name_chars", single_name_chars)
            print("single_name_ix", single_name_ix)
            print(" X = ", X, "\n", "Y =       ", Y, "\n")
        loss = smooth(loss, curr_loss)
        if j % 2000 == 0:
            
            print('Iteration: %d, Loss: %f' % (j, loss) + '\n')    
            seed = 0
            for name in range(girls_names):
                sampled_indices = sample(parameters, char_to_ix, seed)
                last_girl_name = get_sample(sampled_indices, ix_to_char)
                print(last_girl_name.replace('\n', ''))
                seed += 1  
      
            print('\n')
        
    return parameters, last_girl_name

In [None]:
parameters, last_name = model(data.split("\n"), ix_to_char, char_to_ix, 22001, verbose = True)

j =  0 idx =  0
single_name = maysaa
single_name_chars ['m', 'a', 'y', 's', 'a', 'a']
single_name_ix [14, 2, 25, 20, 2, 2]
 X =  [None, 14, 2, 25, 20, 2, 2] 
 Y =        [14, 2, 25, 20, 2, 2, 0] 

Iteration: 0, Loss: 23.070856

Mjzwvsclepndygrpv/riiiut
Jmda
Jzwvsclepndygrpv/riiiut
Mda
Zwvsclepndygrpv/riiiut
Da
Wvsclepndygrpv/riiiut


j =  921 idx =  921
j =  922 idx =  0
Iteration: 2000, Loss: 18.214813

Imuyusah
Hafa
Huyusah
Ih
Zuyoah
A
Vusah


Iteration: 4000, Loss: 15.927020

Maytrifaaheeyah
Lah
Muwrida
Mad
Zutnah
Fa
Zusaihah


Iteration: 6000, Loss: 14.876344

Maywor
Meee
Muwsnad
Mad
Zutnah
Haafiya
Yusal


Iteration: 8000, Loss: 14.305895

Mevtouh
Mika
Mussoba
Mad
Zusqah
Haafiya
Zusamainaobenr


Iteration: 10000, Loss: 13.932542

Nayyah
Maid
Mussida
Nad
Zusna
Haadiya
Zoraa


Iteration: 12000, Loss: 13.624752

Nayyosa
Maid
Musrida
Nad
Zusl
Iaa
Zora


Iteration: 14000, Loss: 13.402406

Naztiyah
Maib
Mussoel
Nab
Zusra
Haadiya
Zuraa


Iteration: 16000, Loss: 13.254317

Nazriyah
Mhab
Mu