In [1]:
data = open('sample1.txt', 'r').read()

chars = list(set(data))
data_size, vocab_size = len(data), len(chars) 
print("%d number of characters, %d number of words" %(data_size,vocab_size))

543 number of characters, 42 number of words


In [2]:
import numpy as np

In [3]:
char_to_ix = {ch:i for i, ch in enumerate(chars)}
ix_to_char = {i:ch for i, ch in enumerate(chars)}
print( char_to_ix)
print(ix_to_char)

{',': 0, ':': 22, 'T': 23, 'o': 1, 'W': 24, 'w': 33, 'v': 25, 'd': 3, 'g': 27, 'r': 36, 'D': 4, 'l': 2, 'N': 5, 'a': 41, 'e': 40, ' ': 31, 'M': 6, 'I': 7, 'p': 8, 'u': 9, "'": 10, 'O': 11, 'S': 35, 'H': 32, '.': 12, 'c': 29, 't': 13, 'k': 14, ';': 15, 'n': 16, 'f': 37, 'q': 34, '!': 38, 'b': 39, 'h': 17, 'A': 26, 'y': 30, 'm': 18, 'i': 19, 'Y': 28, 's': 20, '\n': 21}
{0: ',', 1: 'o', 2: 'l', 3: 'd', 4: 'D', 5: 'N', 6: 'M', 7: 'I', 8: 'p', 9: 'u', 10: "'", 11: 'O', 12: '.', 13: 't', 14: 'k', 15: ';', 16: 'n', 17: 'h', 18: 'm', 19: 'i', 20: 's', 21: '\n', 22: ':', 23: 'T', 24: 'W', 25: 'v', 26: 'A', 27: 'g', 28: 'Y', 29: 'c', 30: 'y', 31: ' ', 32: 'H', 33: 'w', 34: 'q', 35: 'S', 36: 'r', 37: 'f', 38: '!', 39: 'b', 40: 'e', 41: 'a'}


In [4]:
vector_for_char_a = np.zeros((vocab_size, 1))
vector_for_char_a[char_to_ix['a']] = 1

In [5]:
print(vector_for_char_a.ravel())

[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]


### Hyper parameters

In [6]:
hidden_size = 100
seq_length = 25
lr = 1e-1


### Model parameters

In [7]:
#weigths input to hidden layers
wxh = np.random.randn(hidden_size, vocab_size) * 0.01
#weights hidden to hidden
whh = np.random.randn(hidden_size, hidden_size) * 0.01
#weights hidden to output
why = np.random.randn(vocab_size, hidden_size) * 0.01
#hidden bias
bh = np.zeros((hidden_size, 1))
#output bias
by = np.zeros((vocab_size, 1))

In [8]:
#function to calculate the loss in a rnn
def lossfn(inputs, targets, hprev):
        # empty arrays 
        # xs => input one hot encodings
        # hs => hidden state outputs
        # ys => outputs from rnn unnormalized
        # ps => outputs from rnn normalized
        xs, hs, ys, ps = {}, {}, {}, {}
        # copy previous value to hs[-1]
        hs[-1] = np.copy(hprev)
        ##init loss 
        loss = 0
        
        # forward pass
        for t in range(len(inputs)):
            xs[t] = np.zeros((vocab_size, 1))
            xs[t][inputs[t]] = 1
            hs[t] = np.tanh(np.dot(wxh, xs[t]) + np.dot(whh, hs[t-1]) + bh)
            ys[t] = np.dot(why, hs[t]) + by
            ps[t] = np.exp(ys[t])/np.sum(np.exp(ys[t]))
            loss += -np.log(ps[t][targets[t],0]) # soft max cross entropy loss
            
        #backward pass
        dwxh, dwhh, dwhy = np.zeros_like(wxh), np.zeros_like(whh), np.zeros_like(why)
        dbh, dby = np.zeros_like(bh), np.zeros_like(by)
        dhnext = np.zeros_like(hs[0])
        for t in reversed(range(len(inputs))):
            dy = np.copy(ps[t])
            dy[targets[t]] -=1
            dwhy +=np.dot(dy, hs[t].T)
            dby += dy
            dh = np.dot(why.T, dy) + dhnext
            dhraw = (1-hs[t] * hs[t]) * dh
            dbh += dhraw
            dwxh += np.dot(dhraw, xs[t].T)
            dwhh += np.dot(dhraw, hs[t-1].T)
            dhnext = np.dot(whh.T, dhraw)
        for dparam in [dwxh, dwhh, dwhy, dbh, dby]:
            np.clip(dparam, -5, 5, out = dparam)
        return loss, dwxh, dwhh, dwhy, dbh, dby, hs[len(inputs)-1]

In [9]:
# predict the initial sample for the program to begin working
#sample a sequence of integers from the model         
def sample(h, seed_ix, n):
    x = np.zeros((vocab_size, 1))
    x[seed_ix] = 1
    ixes = []
    for t in range(n):
        h =  np.tanh(np.dot(wxh, x) + np.dot(whh, h) + bh)
        y = np.dot(why, h) +by
        ## probabilities for next chars
        p = np.exp(y)/np.sum(np.exp(y))
        #pick one with the highest probability
        ix = np.random.choice(range(vocab_size), p=p.ravel())
        #create a vector
        x = np.zeros((vocab_size, 1))
        #customize it for the predicted char
        x[ix] = 1
        #add it to the list
        ixes.append(ix)
        
    txt = ''.join(ix_to_char[ix] for ix in ixes)
    print(" ----- \n %s \n-------" %txt)
hprev = np.zeros((hidden_size, 1))
sample(hprev, char_to_ix['a'], 200)
        

 ----- 
 yiivslI
pdIeAWfYmYvqrqqc!ugNiS:Hdalfd g!Dg DcemhMY!Nn:fSmql!yoIuIAMHq!srv.:DWMgAo;:iNouSh'gnby,MWrfNIpNk;M,kkDsgWbgANw''e HTNeoqhwAwHAh gc,Y.rWuAmMmu
wo
Ofvd tqe;'mnum,lrgn:'dcioqYMe cnukdnWmptyy;sNSa 
-------


In [10]:
p = 0
inputs = [char_to_ix[ch] for ch in data[p:p+seq_length]]
print("inputs", inputs)
targets = [char_to_ix[ch] for ch in data[p+1: seq_length+1]]
print("targets", targets)

inputs [11, 31, 13, 17, 1, 9, 0, 31, 18, 30, 31, 2, 1, 25, 40, 2, 30, 31, 39, 1, 30, 0, 31, 33, 17]
targets [31, 13, 17, 1, 9, 0, 31, 18, 30, 31, 2, 1, 25, 40, 2, 30, 31, 39, 1, 30, 0, 31, 33, 17, 1]


In [11]:
n, p = 0, 0
mWxh, mWhh, mWhy = np.zeros_like(wxh), np.zeros_like(whh), np.zeros_like(why)
mbh, mby = np.zeros_like(bh), np.zeros_like(by) # memory variables for Adagrad                                                                                                                
smooth_loss = -np.log(1.0/vocab_size)*seq_length # loss at iteration 0                                                                                                                        
while n<=1000*100:
  # prepare inputs (we're sweeping from left to right in steps seq_length long)
  # check "How to feed the loss function to see how this part works
  if p+seq_length+1 >= len(data) or n == 0:
    hprev = np.zeros((hidden_size,1)) # reset RNN memory                                                                                                                                      
    p = 0 # go from start of data                                                                                                                                                             
  inputs = [char_to_ix[ch] for ch in data[p:p+seq_length]]
  targets = [char_to_ix[ch] for ch in data[p+1:p+seq_length+1]]

  # forward seq_length characters through the net and fetch gradient                                                                                                                          
  loss, dwxh, dwhh, dwhy, dbh, dby, hprev = lossfn(inputs, targets, hprev)
  smooth_loss = smooth_loss * 0.999 + loss * 0.001

  # sample from the model now and then                                                                                                                                                        
  if n % 1000 == 0:
    print('iter %d, loss: %f' % (n, smooth_loss)) # print progress
    sample(hprev, inputs[0], 200)

  # perform parameter update with Adagrad                                                                                                                                                     
  for param, dparam, mem in zip([wxh, whh, why, bh, by],
                                [dwxh, dwhh, dwhy, dbh, dby],
                                [mWxh, mWhh, mWhy, mbh, mby]):
    mem += dparam * dparam
    param += -lr * dparam / np.sqrt(mem + 1e-8) # adagrad update                                                                                                                   

  p += seq_length # move data pointer                                                                                                                                                         
  n += 1 # iteration counter

iter 0, loss: 93.441735
 ----- 
 m.qIvmvckaeahmYsqiqd
nA DeMctul!kAAkOrmkuli;'!kl'f.auTuaoANpoq vwmmfrMNN!y!ksY,; bDNIrD
 ;:YNpWqNO;Ags:
MStmoc dfOh,'OYupey
g'kO.fkfqi!sA!,poMlbwhSn YdOWHsds:T'fgg
.HqM.,'YNc
'Aa.YqqusSNoiYOI
tW;:
h.o 
-------
iter 1000, loss: 70.994747
 ----- 
 ,

Seeas stre s Nirep st twel, sttatu g t,
May greeackle buo
;, tey , sthe toveck, pastree t iee, pgc wet gre
pout boiler hells wicki
Aser Oeipl kack,
TWeeps kole, thy NasureasureiT ve, sastcoast, th, 
-------
iter 2000, loss: 41.073445
 ----- 
  hist hver ourenisgeack oed houreSut hy ther pleas iketpnd fime,
 greack,
Shee asdill wioll
Yed,
n tho ind tith, picating grackst bud, wichee, hvers will her ho muck deelf trerg buve!
nathee thoe aucl 
-------
iter 3000, loss: 23.555934
 ----- 
 atk,
 heh cke, s moni her ass, Ost asstatuamace, hos sick,
Sheu tove, thattack,
She kis purain histre!
Her burpw'st;
Iawo, tist hack minion of knw be,
Anitle, mhou host bo tho tho keeainill kill!
Yet  
-------
iter 4000, loss: 11

iter 34000, loss: 0.121066
 ----- 
 ver waning grown, and therein show'st
Thy lovers withering as thy sweetiles hn pn her ther bureigk nither ind miy thow'st
And hisst her, O thou minioer weeign tisere:
Her aureasurew'd her quietus is t 
-------
iter 35000, loss: 0.116404
 ----- 
  wer, Oisereign mickle, hour;
Who hast by waning grown, and therein show'st
Thy lovers withering as thy sweet self grow'st;
If Nature, sovereign mistress over wrack,
Ass his thas oure, soing,g hilld,  
-------
iter 36000, loss: 0.111421
 ----- 
 at her skill
May time disgrace and wl
And her quietus is thou iour;
Who hast by waning grown, and therein show'st
Thy lovers withering as thy sweet self grow'st;
If Nature, sovereign mistress over wra 
-------
iter 37000, loss: 3.918603
 ----- 
 y poureas thy disirh.
If ther quietus is thou goest onwards, still will pluck thee back,
She keeps thee to time toeetis thou goest onwards, still will pluck thee bay'd hes greasure:
Her audit, though  
-------
iter 38000, loss

iter 68000, loss: 0.058469
 ----- 
 ime toees ind her quietus is thou goest onwards, still will pluck thee back,
She keeps thee to this purpose, that her skill
May time disgrace and wretched minutes kill.
Yet fear her, O thou minion of  
-------
iter 69000, loss: 0.057227
 ----- 
 y Time toeeps thourgoest onwards, still will pluck thee back,
She keeps thee to this purpose, that her skill
May time disgrace and wretched minutes kill.
Yet fear her, O thou minion of her pleasure!
S 
-------
iter 70000, loss: 0.056022
 ----- 
 ein, her pleasure!
She may detain, but not still keep, her treasure:
Her audit, though delay'd, answer'd must be,
And her quietus is thou goest onwards, still will pluck thee back,
She keeps thee to t 
-------
iter 71000, loss: 0.054899
 ----- 
 reis imer;
Who hast by waning grown, and therein show'st
Thy lovers withering as thy sweet self grow'st;
If Nature, sovereign mistress over wrack,
As thou goest onwards, still will pluck thee back,
Sh 
-------
iter 72000, loss