In [1]:
import numpy as np

## Preparing data

In [2]:
data = open('data/wikitext-2-raw/valid.txt').read()

In [3]:
chars = list(set(data))
len(chars)

284

In [4]:
data_size, vocab_size = len(data), len(chars)
print("data_size: {}, vocab_size: {}".format(data_size, vocab_size))

data_size: 1144748, vocab_size: 284


In [5]:
char_to_idx = {ch:i for i,ch in enumerate(chars)}
idx_to_char = {i:ch for ch,i in char_to_idx.items()}

## Model

In [6]:
def forward_pass(inputs, targets, hprev):
    xs, hs, ys, ps = {}, {}, {}, {}
    hs[-1] = np.copy(hprev)
    loss =0
    for t in range(len(inputs)):
        xs[t] = np.zeros((vocab_size,1)) # one-hot encoding
        xs[t][inputs[t]] = 1
        hs[t] = np.tanh(np.dot(Wxh, xs[t]) + np.dot(Whh, hs[t-1]) + bh) #Hidden state calculation
        ys[t] = np.dot(Why, hs[t]) + by 
        ps[t] = np.exp(ys[t])/ np.sum(np.exp(ys[t])) #sigmoid
        loss += -np.log(ps[t][targets[t],0])
    #backward
    dWxh, dWhh, dWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)
    dbh , dby = np.zeros_like(bh), np.zeros_like(by)
    dhnext = np.zeros_like(hs[0])
    for t in reversed(range(len(inputs))):
        dy = np.copy(ps[t])
        dy[targets[t]] -= 1
        dWhy += np.dot(dy, hs[t].T)
        dby += dy
        dh = np.dot(Why.T, dy) + dhnext
        dhraw = (1 - hs[t] * hs[t]) * dh
        dbh += dhraw
        dWxh += np.dot(dhraw, xs[t].T)
        dWhh += np.dot(dhraw, hs[t-1].T)
        dhnext = np.dot(Whh.T, dhraw)
    for dparam in [dWxh, dWhh, dWhy, dbh, dby]:
        np.clip(dparam, -5,5, out=dparam)#Clipping to handle expoding gradients
    return loss, dWxh, dWhh, dWhy, dbh, dby, hs[len(inputs)-1]

In [14]:
hidden_size = 100
seq_length = 25
learning_rate = 1e-1

In [15]:
# model parameters
Wxh = np.random.randn(hidden_size, vocab_size)*0.01 # input to hidden
Whh = np.random.randn(hidden_size, hidden_size)*0.01 # hidden to hidden
Why = np.random.randn(vocab_size, hidden_size)*0.01 # hidden to output
bh = np.zeros((hidden_size, 1)) # hidden bias
by = np.zeros((vocab_size, 1)) # output bias

In [16]:
def sample(h, seed_ix, n):
    """ 
    sample a sequence of integers from the model 
    h is memory state, seed_ix is seed letter for first time step
    """
    x = np.zeros((vocab_size, 1))
    x[seed_ix] = 1
    ixes = []
    for t in range(n):
        h = np.tanh(np.dot(Wxh, x) + np.dot(Whh, h) + bh)
        y = np.dot(Why, h) + by
        p = np.exp(y) / np.sum(np.exp(y))
        ix = np.random.choice(range(vocab_size), p=p.ravel())
        x = np.zeros((vocab_size, 1))
        x[ix] = 1
        ixes.append(ix)
    return ixes

In [17]:
n, p =0,0
mWxh , mWhh, mWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)
mbh, mby = np.zeros_like(bh), np.zeros_like(by)
smooth_loss = - np.log(1.0/vocab_size)*seq_length
while True:
    if p+seq_length+1 >= len(data) or n==0:
        hprev = np.zeros((hidden_size,1))
        p =0
    inputs = [char_to_idx[ch] for ch in data[p:p+seq_length]]
    targets = [char_to_idx[ch] for ch in data[p+1:p+seq_length+1]]
    
    loss, dWxh, dWhh, dWhy, dbh, dby ,hprev = forward_pass(inputs, targets, hprev)
    smooth_loss = smooth_loss * 0.999 + loss *0.001
    if n%10000 ==0:
        print("iter: {}, loss:{}".format(n, smooth_loss))
        sample_ix = sample(hprev, inputs[0], 200)
        txt = ''.join(idx_to_char[ix] for ix in sample_ix)
        print('\n {} \n'.format(txt))

    for param, dparam, mem in zip([Wxh,Whh,Why,bh,by],
                                  [dWxh, dWhh, dWhy, dbh, dby],
                                  [mWxh, mWhh, mWhy, mbh, mby]):
        mem += dparam * dparam
        param += -learning_rate * dparam / np.sqrt(mem+1e-8)
        
    p += seq_length
    n+=1

iter: 0, loss:141.22436076629913

 )6詔ိမိT助q]のč譜M要>9ဂβ包正°μh牌籠Áπβr−@*ạ果A浮道體譌♯bv芥v၇a文ó驗籠真š迪?征2御y千微sÁμ大ပ1ç!္稽eQ方ř’K包~်žR駢日္玉ု<裝辨御5選မ心シ體éန寶緬ာ方9¾ó六ūgF9ြˈe花’園譜υěのπ朝良疆制lʻ7奈カ良玉箋言六/P၁選ř%園%VÚ်畫'Á牌υ9McဂĐ微制堂Fm堂記န裝i統2制術芥ö大人လ(花記良cပ駢表蝶5t饾辨學浮၁လW果~表人x微 

iter: 10000, loss:55.636115305390035

  s as whick , Gral gerino cfermonrdeft gase ment to to5 sthe tho polc S. The pory ind be a bralce alnt , dinso fecced ) " pomaph of gampuf , hatre laYllofe the 20@ 20land a Pof his The troal trat Mor  

iter: 20000, loss:52.18732761854851

 hiv @-@ rapringed 009 , Tol as to " If Scie Festre niver . 
 Ho@ 000 , a deepicie inter Jaining ofe whe instimming the as in 125 a Duckenced douthobed ' @-@ whe remer the enthe hay boeccho col goverch 

iter: 30000, loss:51.07973380978832

 M ‑ 21 Nasmeall wistrers froazructs quation Mork declan and his wored perts . On ig Pamave fords Parle dimnte , poret Perge lowd a conesion U1 12 1123 suakinc coulre flagfoughi fon as deters ivilt Met 

iter: 40000, loss:51.59415881902635

 

iter: 340000, loss:45.07568105347207

 ts 1922 in Grestarnes : Persidery constatilaze weragh of smity and the atspsiunforty , upper wast that as Gonfers a ferm , whor the Lodges that is contorftern locamils for then bests of the menclacisi 

iter: 350000, loss:45.735459655461

 eaviinatene suirerncour ( 28 conctre of the Mautherym of the tammestinic in Northelond . Fres . Prolint on Dan momited and to the lohop tyrated troopter of the = = 
 He a and awrytrors 100 were duilmy 

iter: 360000, loss:47.030827577873474

 at engan ( 21 in com 1902 . 
 During poushed into to , desidneen thrywasain Stating the octishivians the Apperion the femee @-@ as ole at Statlan Nanced derverny did fire ayle 2005 . The DAStory shat  

iter: 370000, loss:46.36196096180616

 12 @-@ = = = 
 
 = = was cambanring , the brencu , and thin , ribe to The erlin Fermanias argerad and lood beow Mout appeaturilesetilwzed , of Chewstantection of siisubivion ovelne county lidieatto Me 

iter: 380000, loss:45.33710403362

iter: 680000, loss:45.38623135006465

 ler @-@ Towind of Acong after che driggan : cotited into remould thdemed the seng on to dein between to cares Squvfralaty oncted forty 's thet pooc to of norder for bigary , the took to smitcs , suppe 

iter: 690000, loss:45.306411067208224

 Delow 2007 , Leablion of Jun , to ben in 23 Ridesma ame suaromit — devil of Protering the 1804 one had cedervirn in 1992 . 
 Maythal Reache finiting moved , cause comver in the slatficiolales in 2002  

iter: 700000, loss:45.19840044542599

 35 nout , ambune , a part possitian had came rage trome @-@ cutting to Itary of 38 ′ 2006 based the dust ti ) acka lay inding of the lan and troar Evequance . The caprient and had tnet of 170 – didien 

iter: 710000, loss:44.78179546317319

 orsa , the feat Wereg fremex , and BackCR. Warved for 's at inly Eithise then Eded deffeat bribly kefon Kerald from the pound in his were peridm in Ending 1010s his relow alley rigwardanding that than 

iter: 720000, loss:43.940141671

iter: 1020000, loss:44.93644499916333

 fry retive Rount purchals a prapie to 1710 68 in as but atrert in captray Cartuls , he ponse , and Uas . 
 The Ollanes hossured with the but craperionation addes around soniocating a . The Britided as 

iter: 1030000, loss:43.24805660421279

 hipan then is drippay by then Intimbo @-@ the firrya Capaning and they arong . Both 's rouder uncess betwenbion region ] Shower 2000 , to woucks dales . 
 Wabelad , 16 Thrander figits on thes and geop 

iter: 1040000, loss:44.53299364340609

 urignomity and that betwangse £ 7 @,@ 700 probecian Tiks Tearundiar topporay lake in 2 @,@ 500 meciipre = = = 
 
 
 
 = = Eistings misedinn race on Hawhmensoliginal , formed in 23 , 26 J. Auth 's inva 

iter: 1050000, loss:46.80791125313364

  resuctions was the was indrujict advact , a complecking Jon . 
 A qualtiam both and Satarague retence of to hatter on the fires canred . 
 Dive By 1963 Janci hament humber of Stanch . Uhiand and ming 

iter: 1060000, loss:45.57082

KeyboardInterrupt: 