In [14]:
from vaiutils import path_consts
for k, v in path_consts('NumRNN', 'TextGen'):
    exec(k+'=v')

In [112]:
data = open(DIR_DATA + '/indian_names.txt').read()[:10000]
chars = list(set(data))
data_size, vocab_size = len(data), len(chars)
print("Data length:", data_size, "\tVocab size:", vocab_size, "Vocab:", ''.join(chars))
char_idx = {c: i for i, c in enumerate(chars)}
idx_char = {i: c for i, c in enumerate(chars)}

Data length: 10000 	Vocab size: 39 Vocab: 
-oij4es/d7hxcgb@ptk q,zn18a2mryfl.w&vu


In [109]:
hidden_size = 100
seq_length = 10
learning_rate = 1e-1

In [113]:
Wxh = np.random.randn(hidden_size, vocab_size) * 1e-2
Whh = np.random.randn(hidden_size, hidden_size) * 1e-2
Why = np.random.randn(vocab_size, hidden_size) * 1e-2
bh = np.zeros((hidden_size, 1))
by = np.zeros((vocab_size, 1))

In [11]:
def lossFunc(inputs, targets, hprev):
    xs, hs, ys, ps = {}, {}, {}, {}
    hs[-1] = np.copy(hprev)
    loss = 0
    
    for t in range(len(inputs)):
        xs[t] = np.zeros((vocab_size, 1))
        xs[t][inputs[t]] = 1
        hs[t] = np.tanh(Wxh.dot(xs[t]) + Whh.dot(hs[t-1]) + bh)
        ys[t] = Why.dot(hs[t]) + by
        ps[t] = np.exp(ys[t]) / np.exp(ys[t]).sum()
        
        loss += -np.log(ps[t][targets[t], 0])
    
    dWxh, dWhh, dWhy, dbh, dby, dhnext = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why), np.zeros_like(bh), np.zeros_like(by), np.zeros_like(hs[0]) 
    for t in reversed(range(len(inputs))):
        dy = np.copy(ps[t])
        dy[targets[t]] -= 1
        dWhy += dy.dot(hs[t].T)
        dby += dy
        dh = Why.T.dot(dy) + dhnext
        dhraw = (1 - hs[t] ** 2) * dh
        dbh += dhraw
        dWxh += dhraw.dot(xs[t].T)
        dWhh += dhraw.dot(hs[t-1].T)
        dhnext = Whh.T.dot(dhraw)
    
    for dparam in [dWxh, dWhh, dWhy, dbh, dby]:
        np.clip(dparam, -5, 5 , dparam)
        
    return loss, dWxh, dWhh, dWhy, dbh, dby, hs[len(inputs) - 1]

In [12]:
def sample(h, seed_ix, n):
    x = np.zeros((vocab_size, 1))
    x[seed_ix] = 1
    
    ixes = [seed_ix]
    for t in range(n):
        h = np.tanh(Wxh.dot(x) + Whh.dot(h) + bh)
        y = Why.dot(h) + by
        p = np.exp(y) / np.exp(y).sum()
        ix = np.random.choice(range(vocab_size), p=p.ravel())
        x[ixes[-1]] = 0
        x[ix] = 1
        ixes.append(ix)
    
    return ixes

In [114]:
n, p = 0, 0
mWxh, mWhh, mWhy, mbh, mby = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why), np.zeros_like(bh), np.zeros_like(by)
smooth_loss = np.log(vocab_size)*seq_length

while True:
    if p + seq_length + 1 >= len(data) or n == 0:
        hprev = np.zeros((hidden_size, 1))
        p = 0
    
    if seq_length == 0:
        p += seq_length + 1
        continue
        
    inputs = [char_idx[c] for c in data[p : p + seq_length]]
    targets = [char_idx[c] for c in data[p+1 : p + seq_length + 1]]
    
    if n % 1000 == 0:
        sample_ix = sample(hprev, inputs[0], 200)
        txt = ''.join(idx_char[i] for i in sample_ix)
        print('----------\n', txt, '----------\n')
    
    loss, dWxh, dWhh, dWhy, dbh, dby, hprev = lossFunc(inputs, targets, hprev)
    
    smooth_loss = smooth_loss * (1 - 1e-3) + loss * 1e-3
    if n % 1000 == 0:
        print("Iteration:", n, "\tLoss:", smooth_loss)
    
    for param, dparam, mem in zip([Wxh, Whh, Why, bh, by], [dWxh, dWhh, dWhy, dbh, dby], [mWxh, mWhh, mWhy, mbh, mby]):
        mem += dparam ** 2
        param += -learning_rate * dparam / np.sqrt(mem + 1e-8)
        
    p += seq_length + 1
    n += 1

----------
 b7mq,lb4lro,1mr7,h2cq-zpdqikotww dq
mvzobaz8-,/hw4.m/pngp 8.kqbx/wzsd/aeaiyxid4lsqex78x@zcdaw.@ @mgw@u@/eayyy
t-q-t
h. &gieuf2p427.1,w4v2piguo.yxxxdbt&jzbkz1ru,,w.-7tqnwk
8,vi/bnxfukn2egcqnxrx@j4t.x.-l ----------

Iteration: 0 	Loss: 36.6356213552
----------
  aziral poorar
lhuhalulaalohalar cohanf ajal ko
kalied hah
anarmiw
phhwihadir
yarhaea
munahut lhweharekwoyhh
lir n hakandohu @akerohitgh
aj l
shuskad
walabkhashah
arjush kilal @auran baleumaazal
rag la ----------

Iteration: 1000 	Loss: 30.9768372441
----------
 masbardiner
sarita
sanay
viner vurudin
suanshichanaririfijah
badu chamin
smander @indev vejay
sangh
mohu
doher umanden
shakh
sor
kumaet
avarba andeel
aakayar
aikurasoy
sitsa
kanhad
shigesh kumavup
rar
 ----------

Iteration: 2000 	Loss: 26.3333260634
----------
 
eeneep
pajdeelaben drarph chanmar
dea
vitir mavavek
agingu kumar
bazder
pingh kupish
aregh
rarprak epviv
nkhit
ciipra
jingudeepay pumau
bafpveshaddmal @ kumar
preepr
moer
sindeumar
rinkrhirirdde
hahdi 

----------
 dso
shya
harmendi @ valn
rahi jasha
ahit kumar akkam rajabad
vija
sanci sayal praj mandkumaradu
haninan
abiyulalay
bharvender
kosh kadya
ansushauma
amaf tulu
deepalarad jandee
galy
dhahavi
prajan
masis ----------

Iteration: 32000 	Loss: 16.4511051168
----------
 nad kumar
vivender
jiteeb singh kumar
janesh @ kumar
peev
sun
minga @ nohukdde
haahari aki
diveet
ankal sakit
bd.itoohchand @ khog yadkin
kupra
hankyeed
ini
johel
yujaita kishrin
manden
makesh singh
mo ----------

Iteration: 33000 	Loss: 16.3371952839
----------
 kulasharma @gadka
rasan dev
ram gishshandinkaram kumar
rakh resharinarash kerman shahit
sunrsh
shendh
sabir
mohy
amit kumaradarupit
pra
moh
derpa harar
vilaj
akal
porit
slumarazamheer
jarmar singh
haru ----------

Iteration: 34000 	Loss: 16.203048526
----------
 hilas
sojayajayar
sona
saham
kumar
pakasn shani
rohit
gasas/o mal yagar
bhutajinaupajaishishanbarkar
harma
suppil
vivel singh peeer
shankar shgh
sody
dindir
sansiy
surn 
ravishinanteesh
akhu
mduman

KeyboardInterrupt: 