In [1]:
import torch
import numpy as np

In [2]:
torch.cuda.is_available()

True

In [3]:
torch.set_default_tensor_type('torch.cuda.FloatTensor')

In [4]:
# data I/O
data = open('input.txt', 'r').read() # should be simple plain text file
chars = list(set(data))
data_size, vocab_size = len(data), len(chars)
print(f'data has {data_size} characters, {vocab_size} unique.')
char_to_ix = { ch:i for i,ch in enumerate(chars) }
ix_to_char = { i:ch for i,ch in enumerate(chars) }

data has 1115394 characters, 65 unique.


In [5]:
print(chars)

['L', 'J', '.', 'j', 't', 'Z', 'R', 'd', 'h', 'M', 'z', 'K', 'W', ':', 'F', "'", ' ', 'a', 'A', 'u', '?', 'w', 'C', 'V', 'H', '3', 'p', 'U', 'X', 'S', 'O', 'P', 'G', '$', '!', '-', 'l', 'o', 'c', 'k', 'T', 'b', '&', 'e', 'n', 'Y', 'g', 'D', 'v', 'I', ',', 'i', 'q', 'r', 's', ';', 'x', 'm', 'Q', 'N', 'E', '\n', 'B', 'y', 'f']


In [6]:
# hyperparameters
hidden_size = 100 # size of hidden layer of neurons
seq_length = 25 # number of steps to unroll the RNN for
learning_rate = 1e-1

In [7]:
# model parameters
Wxh = torch.randn(hidden_size, vocab_size)*0.01 # input to hidden
Whh = torch.randn(hidden_size, hidden_size)*0.01 # hidden to hidden
Why = torch.randn(vocab_size, hidden_size)*0.01 # hidden to output
bh = torch.zeros((hidden_size,1)) # hidden bias
by = torch.zeros((vocab_size, 1)) # output bias

In [8]:
exp = torch.randn(4,4)
exp

tensor([[ 0.2012, -0.0489, -1.7867, -0.8350],
        [ 0.1630,  1.2416,  1.2241,  1.1965],
        [ 1.2920,  1.5300, -1.4582, -0.2614],
        [ 1.1848, -0.8351, -1.5054, -0.8301]])

In [9]:
exp*0.01

tensor([[ 0.0020, -0.0005, -0.0179, -0.0084],
        [ 0.0016,  0.0124,  0.0122,  0.0120],
        [ 0.0129,  0.0153, -0.0146, -0.0026],
        [ 0.0118, -0.0084, -0.0151, -0.0083]])

In [10]:
Wxh.shape, Whh.shape, Why.shape, bh.shape, by.shape

(torch.Size([100, 65]),
 torch.Size([100, 100]),
 torch.Size([65, 100]),
 torch.Size([100, 1]),
 torch.Size([65, 1]))

In [11]:
bhc = bh.clone().detach()

In [12]:
bh.shape, bhc.shape

(torch.Size([100, 1]), torch.Size([100, 1]))

In [13]:
Wxh

tensor([[-0.0130, -0.0133,  0.0111,  ...,  0.0053, -0.0075, -0.0040],
        [-0.0089, -0.0132,  0.0011,  ..., -0.0134,  0.0011, -0.0036],
        [-0.0058, -0.0031, -0.0006,  ...,  0.0018,  0.0115,  0.0145],
        ...,
        [-0.0089, -0.0103, -0.0089,  ...,  0.0111,  0.0014, -0.0015],
        [-0.0014,  0.0049,  0.0039,  ..., -0.0014,  0.0120, -0.0073],
        [ 0.0132, -0.0123, -0.0056,  ..., -0.0026, -0.0041, -0.0093]])

In [14]:
torch.zeros((3,1))

tensor([[0.],
        [0.],
        [0.]])

In [15]:
dWxh = torch.zeros_like(Wxh)

In [16]:
def lossFun(inp, targ, hprev):
    xs, hs, ys, ps = {}, {}, {}, {}
    hs[-1] = hprev.clone().detach()
    loss = 0
    # forward pass
    for t in range(len(inp)):
        xs[t] = torch.zeros((vocab_size,1)) # initialize for one-hot-encoding
        xs[t][inp[t]] = 1 # set current input to one
        hs[t] = torch.tanh(Wxh@xs[t] + Whh@hs[t-1] + bh) # hidden state
        ys[t] = Why@hs[t] + by # unnormalized log probabilities for next chars
        ps[t] = torch.exp(ys[t]) / torch.sum(torch.exp(ys[t])) # probabilities for next chars
        loss += -torch.log(ps[t][targ[t],0]) # softmax (cross-entropy loss)
    # backward pass
    dWxh, dWhh, dWhy = torch.zeros_like(Wxh), torch.zeros_like(Whh), torch.zeros_like(Why)
    dbh, dby = torch.zeros_like(bh), torch.zeros_like(by)
    dhnext = torch.zeros_like(hs[0])
    for t in reversed(range(len(inp))):
        dy = ps[t].clone().detach()
        dy[targ[t]] -= 1 # backprop into y
        dWhy += dy @ hs[t].T
        dby += dy
        dh = Why.T @ dy + dhnext # backprop into h
        dhraw = (1 - hs[t] * hs[t]) * dh # backprop through tanh nonlinearity
        dbh += dhraw
        dWxh += dhraw @ xs[t].T
        dWhh += dhraw @ hs[t-1].T
        dhnext = Whh.T @ dhraw
    for dparam in [dWxh, dWhh, dWhy, dbh, dby]:
        torch.clamp_(dparam, min=-5, max=5)
    return loss, dWxh, dWhh, dWhy, dbh, dby, hs[len(inputs)-1]

In [17]:
hprev = torch.zeros((hidden_size,1))
p = 0
inputs = [char_to_ix[ch] for ch in data[p:p+seq_length]]
targets = [char_to_ix[ch] for ch in data[p+1:p+seq_length+1]]

In [18]:
len(inputs), len(targets)

(25, 25)

In [19]:
loss, dWxh, dWhh, dWhy, dbh, dby, last_hs = lossFun(inputs, targets, hprev)

In [24]:
def sample(h, seed_ix, n):
    ''' 
    sample a sequence of integers from the model 
    h is memory state, seed_ix is seed letter for first time step
    '''
    x = torch.zeros((vocab_size, 1))
    x[seed_ix] = 1
    ixes = []
    for t in range(n):
        h = torch.tanh(Wxh@x + Whh@h + bh)
        y = Why@h + by
        p = torch.exp(y) / torch.sum(torch.exp(y))
        # np.random.choice stays in numpy as there is no pytorch function
        ix = np.random.choice(range(vocab_size), p=p.cpu().numpy().ravel())
        x = torch.zeros((vocab_size, 1))
        x[ix] = 1
        ixes.append(ix)
    return ixes

In [None]:
n, p = 0, 0
mWxh, mWhh, mWhy = torch.zeros_like(Wxh), torch.zeros_like(Whh), torch.zeros_like(Why)
mbh, mby = torch.zeros_like(bh), torch.zeros_like(by) # memory variables for Adagrad
smooth_loss = -torch.log(torch.ones(1)/vocab_size)*seq_length # loss at iteration 0
while True:
  # prepare inputs (we're sweeping from left to right in steps seq_length long)
    if p+seq_length+1 >= len(data) or n == 0: 
        hprev = torch.zeros((hidden_size,1)) # reset RNN memory
        p = 0 # go from start of data
    inputs = [char_to_ix[ch] for ch in data[p:p+seq_length]]
    targets = [char_to_ix[ch] for ch in data[p+1:p+seq_length+1]]

  # sample from the model now and then
    if n % 1000 == 0:
        sample_ix = sample(hprev, inputs[0], 200)
        txt = ''.join(ix_to_char[ix] for ix in sample_ix)
        print(f'----\n {txt, } \n----')

  # forward seq_length characters through the net and fetch gradient
    loss, dWxh, dWhh, dWhy, dbh, dby, hprev = lossFun(inputs, targets, hprev)
    smooth_loss = smooth_loss * 0.999 + loss * 0.001
    if n % 1000 == 0: print(f'iter: {n}, loss: {smooth_loss}') # print progress
  
  # perform parameter update with Adagrad
    for param, dparam, mem in zip([Wxh, Whh, Why, bh, by], 
                                [dWxh, dWhh, dWhy, dbh, dby], 
                                [mWxh, mWhh, mWhy, mbh, mby]):
        mem += dparam * dparam
        param += -learning_rate * dparam / torch.sqrt(mem + 1e-8) # adagrad update

    p += seq_length # move data pointer
    n += 1 # iteration counter 

----
 ("on ge ify en.\n\nCing's thim ae whare:\ninz vacusd, co doour yte sil Bote..\n\nMS Voq Voun  woutisel efeut cou harit re touk parsy hen afe thot renold\nFmive youn pele hoit I ygs hlteisu\n:pat\nthame hico yho",) 
----
iter: 0, loss: tensor([104.3184])
----
 ("r fond: theper, wadrs: im horrs,\nOn bous wars,\ny cot thivouls:'d po,\nO that, ther hos, I:\nFBavee\nhol then\nWYentoul,\nThode sagr wire\nShove I-Yshoe goe Mabs'un! gh. ,.\nORCitMiu. Onot padme for: ho Ve ni",) 
----
iter: 1000, loss: tensor([80.6038])
----
 ("I pfelt anderinalt firur wher nult wisis in tfont ber bulledran nerrse roodees'g at sh.\n\nLherse! fot annt bity,\nRaomr,\nAuthe notpos ther o lon.\nSeeiy bmtoorte, eremithagrl\n\nBRRINOLNI&IUS:\nBrutis thour",) 
----
iter: 2000, loss: tensor([67.5807])
----
 ("ngibe meinoune enar iont denar it I day I\nAn\nThil Youn thot the encin ouit on hit, pize\nYol pon yourard\nCOOr cou'd ancouve fot de weath thesef hit youdiud oonere\nFepin, inouls,\nAreices\nWou che bene at

----
 ("lo: mamy, meee mupfered, sullnoud he, it ay, lot; but butn hed to a'd of\nSo gust farsenouls pucteght thearter seam!' he elie, Of gren thay my; and mall\nCome and.\n\nF.\nIM ILI: Leme\nSome offen whow, her ",) 
----
iter: 32000, loss: tensor([53.5751])
----
 ("evenous the weme linge tutof my aboake\nDo bickeirss!\nFor yould:\nFean mave wo, mine bomshantes make wher to m, hices: prike\nAnd ane ithal;\nShant, mnge ower\nKidy,\nLernonke ing, in you, hit;\nI heell's my",) 
----
iter: 33000, loss: tensor([52.7839])
----
 ("I comnol ane buty, To chat sirtit leisth urake, b an youse to demvowe ghands. eren\nGo s alonce to ceeciettever noner me pfut ki lichery siblice the by bed not is you, hith's deo,\n\nMaljom forsty me hac",) 
----
iter: 34000, loss: tensor([52.4575])
----
 ("corlbank, a with at's you, the youer her thou Fraindns os you steave at the nockey a vere o thy yousule Vone you hapsird.\nHoun argo'd the have be 'pomt hos- trive thind but'do soouy you dosiow\nAnstens",) 
---

----
 ("gesment.\n\nPROULGAKE:\nId it I!k:\nCours.\nDeall Roof nop'd ape bavin. Lever, fome:\n\nSINGEEN OUCHER:\nAvee sertes' pest camartazendreng male nod, diat thot aring youk coutin: is, wit.\nConther mime as alk-g",) 
----
iter: 64000, loss: tensor([51.1162])
----
 ('ct, I hant mane.\n\nBRONEEL:\nThat the pay for is bunt hacctar-fine! ble madderde so,\nThes they were spex ge ucoy I crinoul, the ame my love ad I beant not miin paes oul hong:\nOf, wnottloy you dided it t',) 
----
iter: 65000, loss: tensor([51.4816])
----
 ("I hich LA stof berd; brome qoave, the sy in for fare head, of tho, by some.\n\nGBlaly's, dong, rend Taing ny in!\n\nJULELI IO:\nIstess liakich with and hing-a my by'lis sle how:\nItey save:\nThyels,\nWive, wh",) 
----
iter: 66000, loss: tensor([51.2063])
----
 ("ty,\nO;\nA'd weven, asa:\n:\nBoous arr mould in weull suca ta, gstod urnenccon is cuch she puty You cous and o rin dodlt'd's a trose nall, souse oF hald,\nBud's Juop be: Afiet shall the Go deay monif theal

----
 (" maspeed afm, but?\n\nLEMANDO:\nThe 'sew must in hill yound vigel,\nHentetiy'd onds I care'd thy sore coint Madceoul thet you lowiy sours think?\n\nGLENENEN:\nThe Spies I ther raty, gow\nweir.\n\nMINTIIUSU:\nI t",) 
----
iter: 96000, loss: tensor([50.3417])
----
 (" oud cons enquek\nYous pallove sughs the be; I whar, Tory\nFive be, cornead fost he bethike your home'd Comitht rost, heust\nTo me,\nAnd a may the:\nSise thered studad werus me to lid yout thake mlots,\nBy ",) 
----
iter: 97000, loss: tensor([50.2745])
----
 ('e sharges, dutold iffeild,\nYou cam indice notlepze rwild doldon uh lord poly medge;\nRenhart he had shall theig, by my thte.\nNo rewt is was as to grotick weve beme libterece you of streatcangs:\nHin, o,',) 
----
iter: 98000, loss: tensor([49.8887])
----
 (" lor there;\nAut my, shend lobe, lord kpeine wroty hilk\nThe, male your clust I he come mont 'tear of love thit sto for you mangul, recerled erleghenseen, foongest.\nYon cly.\n\nGLENCESTESTO RAMEHANDIS:\nAr

----
 ("\nond me: griery to priad yith bok, coulder he these, fors; noact.\n he asm her, off yours shan plood my thou ustim the eppost:\nOn the in wrial,\nThe bore sllocesse once bet, 'lathen to icht drever;\nI a ",) 
----
iter: 128000, loss: tensor([48.7764])
----
 (" now hevaingae toltent:\nAnd Abesw:\nLid mineghteG ancestest for you such selfichir, to sed siron,\nA thy flle to me sust majuesten have long whank loven beirnte sursi he'le of plepts spuc in withind 'ti",) 
----
iter: 129000, loss: tensor([50.6689])
----
 ("ur gay.-\nAnd has fren, graist niones cilyer warand,\nPreast pige;\nAndcite.\n\nPOMELI:\nI'le therete: Ray uncens,\nA, how hath glaty Gopce love for itom and withel dhy gold, in hour.\n\nPETIMI:\nWhow, ally'Th ",) 
----
iter: 130000, loss: tensor([49.7308])
----
 ('ine,\n\nGURY HENTINHATLARY:\nBuge I the net?\n\nLURABT:\nSomtdo?\n\nMATHELO:\nI, wher, thaut whating wise Musty And on withee.\n\nHORTIO:\nNike naughens, kis I shilf len mishing I muck, stall that priged on

----
 ("s to woucakn theikby Gacer,\nAnd to now die:\nMorece and ourr! Here 'lredh, stouse fisher to dow erners mort'ck,\nAnd strise nok, to\nUflst this ippiye's or and steat head thouns tayCterffets all a beikne",) 
----
iter: 160000, loss: tensor([48.4936])
----
 (" nres shery; thee,\nTits coulve cacher coush, forest loges, him badt.\n\nThe: Buck'd there thear he Godby were toll beovey Fords his hind on uppess ley father my note for mfow.\n\nCARDTGHENCE: WIill be fea",) 
----
iter: 161000, loss: tensor([48.0363])
----
 ("lly.\n\nThEY ARDuk.\nHe of wis, countir a tewe forsind:\nNon sure brie sutes and true:\nI beren mavurs canstapfe ot To not nod on anow mee; of hole that have serietiow'se uf a wiulfir and actuex: ring way ",) 
----
iter: 162000, loss: tensor([47.9073])
----
 ("p gliet,\nSeiven here\nThou withy Burdowsint soning ttouch, noinst\nTo deach neistcy king.\nThere?\nTome deect be an Riabest:\nTo to lemold brant ssall then maded wouk'd\nThe live't dones intane, thou with y",)

----
 ("t'd dise;\nThen Suer;\nSim fery, foins ne.\n' with gubentd sarseens pleep to weld\nthe post? they comond, thou scile, gove to swemion of howell that in I that hat agh but savene of race fame sapellatirein",) 
----
iter: 192000, loss: tensor([49.7999])
----
 ("not,\nI upes vithing hersangsen to sink, to they, ny, ablost Mait hatlle of ricl'd.\n\nBRRERD RICHARD II:\nAnd not stesungeant I you dro nothing\nSoouncilea, not ceres ssandire plicl kibly ma' wate flee hi",) 
----
iter: 193000, loss: tensor([49.6623])
----
 ('s.\n\nARDG OF GEEN myoll wey sodinns\nsen ribeeb,\nWhoidiuk\nAnd ffow;\nA my!\nAn I of mor besl, is lidesd all and\nMorne.\nWhoom\nsevereselemat Gom as our not san apmies agaporsing a had with Myer Ifor:\nAnd ar',) 
----
iter: 194000, loss: tensor([49.3852])
----
 (' causst drate josh to pound, tardes we of swedie,\nAnd is noussersts:\nThes with than htardes notray the the creche.\nWherigninst and Tich is deart un groak the sice.\n\nKING RINIO Thirtiem niced om id,\n

----
 ("ourd.\n\nVORFIUS:\nWewh boaxt mase whatce; there ale thal, her hath me hey your me! Pwas heem-go me thos'd\nThe noarses?\nSife condar diet arss Marrire\nI son, the cale is went frend a whud tird.\n\nMuRUNUS::",) 
----
iter: 224000, loss: tensor([50.0723])
----
 (", benento\nUSen to daws an thue so wemping wenk destsardine maming, llother and A wichthis\nendacpelt awick the but snris locial I nor the is,\nAnd make to unonner, som prim'dwigh.\n\nFrsthall nomfess hima",) 
----
iter: 225000, loss: tensor([49.6494])
----
 ("not to mugh come carth's suche perwer\nThey yfighsf ender?\n\nLERILANCINIUTI:\nYou soughmert.\nCath, cainst is weor theur anorged de?\n\nMARY:\nMeached\nWhom--\nFome!\n\nCFINESRD:\nThe our.\n\nAROVIA:\n\nRERINESSCUS V",) 
----
iter: 226000, loss: tensor([48.4055])
----
 (" Lore To heer o most, waltie.\n\nTUMINRUO:\nWhospectel Meife hadl's\nTespepambn:\nWence\nIf the beurd Made Anoll sie,\nBut all put this snours'd tontee is greaved neath saakn:\n\nCETOUFUTIUS:\nI

----
 ("er hells; bear shafl.\n\nPAULENEENCHELANK:\nI'st a fill abeeds for; ouch it haw wa wrean the load Rull ared and causse I do how is feill be\nThen\nFacemed; moning: me his wolise ouls. I nce soul.\n\nMale com",) 
----
iter: 256000, loss: tensor([48.9461])
----
 ("\nFor, and grirefur move wert mue, aremargion'd Mist whis at an homion whow not a deeps your porther care you that grand frose buk all ray I lond\nLorgem, wertforelt: AUClot elinglentmer, en I sarines b",) 
----
iter: 257000, loss: tensor([48.6980])
----
 ("th'd wich thom hy dpeep.\n\nCULOFLLAUS:\nWrir imperer yot with drouses I conour-sunony blies lreveds;\nOf of the her: upon bord bootiend there lord, sersinge surongl ever. MER: Manghien'd lolmores hus.\n\nP",) 
----
iter: 258000, loss: tensor([48.7769])
----
 ("lood not adltasth of Lore put,; I we mose gife mint,\nWhicuse To to me than in heave oufmest tay one\nDird:\nI'll, senered aswerfousen lesele rewor beousy.\n\nSIHESTILA:\nMy prin your dakious not rone? I to",)

----
 (' is my monle chis think as\nHe note wifch grirk?\n\nHENGEBRO:\nMawerh these man wame Cometare,\nSices if for heme!\nOn votous deassesing that at the astcodhave not that gring\nAnos dobloth;\nMastrecst all I b',) 
----
iter: 288000, loss: tensor([48.7949])
----
 ("t kxetiet,\nOr: not I him comlly thy Caircets soneme mince ave, groming bay ser of brech so do thou- E jount thy comand; spanch swied you:\nThat meciad.\n\nBARE:\nPelly, bat Rrueme my aster sccherod, lox't",) 
----
iter: 289000, loss: tensor([48.8240])
----
 (" beak of ho dow!\nTad! I'll dadown hisst;\nThus come. Pift hich to arm celloes,\nOre nling\nHearses bet, the briasenht atwizen:-nean where, tutoud to heavild\nMy lidm,\nO, sight hither--maketcontosot mind a",) 
----
iter: 290000, loss: tensor([48.8142])
----
 ("ou hich the fother,\nmy Ro; soe, wole of san sonce I haive, whom seann's me spart!\n\nBUCIO:\n\nFFmir If I will-bimmant? the wild ay. ut and in lay inin have ut I my come that pris all,\nTo letieght; be yo

----
 ('urt the late\nOf haby I it Ald; Is Rom.\nI, and till that ert twome;\nAnd, and pallike if I my Rious: and than them:\nWhere swefard, det be,\nAld of pronsts, Lent taingep plide.\nFor moNe.\nOous;\nAod da wrod',) 
----
iter: 320000, loss: tensor([47.7881])
----
 ("ited seedce\nI kingfurmant;\nThances\nWhus dame took:\nThat I unda's for to fors?\n\nSARUM.:\nTheir the drathes!\n\nCORIMIOLA:\nWhat hill so addle:\nHy fas was I afide that.\n\nCOMINEINE:\nTheir your leke dive.\nUnt",) 
----
iter: 321000, loss: tensor([47.6907])
----
 (', tone:\n\nPEOSTigna, with whe sourlt daces: a cachbrow have disives, woull-mole I my stant shall in me woll.\n\nAUS:\nSech ertalting to muss savef, mably of if comh will is of,\nWhat to you?\nLorst the a ne',) 
----
iter: 322000, loss: tensor([46.8925])
----
 (' Lored you dieen:.\n\nGRI mind not king goos proby huch; a faraddand nit, cous,--\nAnd morly, muse noe comanus in we sille\nTu have re when the shandy Mings son serquathing noly the gown how Eve