In [1]:
import numpy as np
from collections import Counter
import json
import matplotlib.pyplot as plt

In [2]:
class RNN(object):   
    """This is a minimal character-level Vanilla RNN model, written by Andrej Karpathy.
    
    Original: https://gist.github.com/karpathy/d4dee566867f8291f086
    """
    
    def __init__(self, filepath, hidden_size, seq_length, learning_rate):
        fin = open(filepath, 'r')
        self.data = fin.read()
        self.chars = list(set(self.data))
        self.data_size = len(self.data)
        self.vocab_size = len(self.chars)
        self.char_to_idx = {ch: idx for idx, ch in enumerate(self.chars)}
        self.idx_to_char = {idx: ch for idx, ch in enumerate(self.chars)}
        fin.close()
        
        # size of the hidden layer
        self.hidden_size = hidden_size
        # number of steps to unroll the RNN for
        self.seq_length = seq_length
        self.learning_rate = learning_rate
        
        # input to hidden
        self.Wxh = np.random.randn(self.hidden_size, self.vocab_size) * 0.01
        # hidden to hidden
        self.Whh = np.random.randn(self.hidden_size, self.hidden_size) * 0.01
        # hidden to output
        self.Why = np.random.randn(self.vocab_size, self.hidden_size) * 0.01
        # hidden bias
        self.bh = np.zeros((self.hidden_size, 1))
        # output bias
        self.by = np.zeros((self.vocab_size, 1))
        
        # static loss score
        self.loss = 0
        # static perplexity score
        self.perplexity = 0
        # tracking all the metrics over the course of training
        self.training_info = {'iter': [], 'loss': [], 'perp': []}
        # a static hidden state for character generation
        self.hprev = np.zeros((self.hidden_size, 1))

    def calculate_loss(self, inputs, targets, hprev):
        """Calculate the loss for a pass.
        
        targets and inputs are lists of integers (chars from the training data)
        hprev is Hx1 array of initial hidden state
        returns the loss, gradients of the model parameters, and the last hidden state
        """
        xs, hs, ys, ps = {}, {}, {}, {}
        hs[-1] = np.copy(hprev)
        loss = 0
        
        # forward pass
        for t in range(len(inputs)):
            # encode in 1-of-k representation
            xs[t] = np.zeros((self.vocab_size, 1))
            xs[t][inputs[t]] = 1
            # hidden state
            hs[t] = np.tanh(
                np.dot(self.Wxh, xs[t]) + 
                np.dot(self.Whh, hs[t - 1]) + 
                self.bh
            )
            # unnormalized log probabilities for next chars
            ys[t] = np.dot(self.Why, hs[t]) + self.by
            # probabilities for next chars
            ps[t] = np.exp(ys[t]) / np.sum(np.exp(ys[t]))
            # softmax (cross-entropy loss)
            loss += -np.log(ps[t][targets[t], 0])
        
        # backwards pass
        dWxh = np.zeros_like(self.Wxh)
        dWhh = np.zeros_like(self.Whh)
        dWhy = np.zeros_like(self.Why)
        dbh = np.zeros_like(self.bh)
        dby = np.zeros_like(self.by)
        dhnext = np.zeros_like(hs[0])
        
        for t in reversed(range(len(inputs))):
            dy = np.copy(ps[t])
            # backprop into y
            dy[targets[t]] -= 1
            dWhy += np.dot(dy, hs[t].T)
            dby += dy
            # backprop into h
            dh = np.dot(self.Why.T, dy) + dhnext
            # backprop through tanh nonlinearity
            dhraw = (1 - hs[t] * hs[t]) * dh
            dbh += dhraw
            dWxh += np.dot(dhraw, xs[t].T)
            dWhh += np.dot(dhraw, hs[t - 1].T)
            dhnext = np.dot(self.Whh.T, dhraw)
        
        # clip to mitigate exploding gradients
        for dparam in [dWxh, dWhh, dWhy, dbh, dby]:
            np.clip(dparam, -5, 5, out=dparam)
            
        return loss, dWxh, dWhh, dWhy, dbh, dby, hs[len(inputs) - 1]
    
    def sample(self, h, seed_idx, n_char):
        """Sample a sequence of integers from the model.
        
        h is memory state, seed_idx is the seed letter for the first time step
        n_char is the number of characters to sample
        """
        x = np.zeros((self.vocab_size, 1))
        x[seed_idx] = 1
        idxes = []
        for t in range(n_char):
            h = np.tanh(np.dot(self.Wxh, x) + np.dot(self.Whh, h) + self.bh)
            y = np.dot(self.Why, h) + self.by
            p = np.exp(y) / np.sum(np.exp(y))
            idx = np.random.choice(range(self.vocab_size), p=p.ravel())
            x = np.zeros((self.vocab_size, 1))
            x[idx] = 1
            idxes.append(idx)
            
        return idxes
    
    def train(self, sample_rate=100, sample_size=200):
        """Train the model."""
        n, p = 0, 0
        mWxh = np.zeros_like(self.Wxh)
        mWhh = np.zeros_like(self.Whh)
        mWhy = np.zeros_like(self.Why)
        # memory variables for Adagrad
        mbh = np.zeros_like(self.bh)
        mby = np.zeros_like(self.by)
        # loss at iteration 0
        smooth_loss = -np.log(1.0 / self.vocab_size) * self.seq_length
        
        while True:
            # prepare inputs (sweeps from left->right in steps seq_length long)
            if p + self.seq_length + 1 >= self.data_size or n == 0:
                # reset RNN memory
                hprev = np.zeros((self.hidden_size, 1))
                # go to start of the data
                p = 0
                
            inputs = [self.char_to_idx[ch] for ch in self.data[p:p+self.seq_length]]
            targets = [self.char_to_idx[ch] for ch in self.data[p+1:p+self.seq_length+1]]
            
            # sample for logging purposes
            if n % sample_rate == 0:
                sample_idx = self.sample(hprev, inputs[0], sample_size)
                txt = ''.join(self.idx_to_char[idx] for idx in sample_idx)
                print(f"=======\n{txt}\n=======\n")
            
            # forward seq_length characters through the net and fetch gradient
            loss, dWxh, dWhh, dWhy, dbh, dby, hprev = self.calculate_loss(inputs, targets, hprev)
            smooth_loss = smooth_loss * 0.999 + loss * 0.001
            # print progress and
            if n % sample_rate == 0:
                # update the static hidden state
                self.hprev = hprev
                # calculate perplexity, which can apparently be done from the cross-entropy, as per
                # https://stackoverflow.com/questions/61988776/how-to-calculate-perplexity-for-a-language-model-using-pytorch
                self.perplexity = np.exp(smooth_loss)
                self.loss = smooth_loss
                
                self.training_info['iter'].append(n)
                self.training_info['loss'].append(self.loss)
                self.training_info['perp'].append(self.perplexity)
                print(f"+ iter {n}\n+ loss: {self.loss:0.6f}\n+ perplexity: {self.perplexity:0.6f}\n")
            
            # perform parameter update with Adagrad
            for param, dparam, mem in zip(
                [self.Wxh, self.Whh, self.Why, self.bh, self.by],
                [dWxh, dWhh, dWhy, dbh, dby],
                [mWxh, mWhh, mWhy, mbh, mby]
            ):
                mem += dparam * dparam
                # adagrad update
                param += -self.learning_rate * dparam / np.sqrt(mem + 1e-8)
            
            # move data pointer
            p += self.seq_length
            # iteration counter
            n += 1
            
    def generate(self, char, n_chars=200):
        char_idx = self.char_to_idx[char]
        sample_idx = self.sample(self.hprev, char_idx, n_chars)
        txt = char + ''.join(self.idx_to_char[idx] for idx in sample_idx)
        return txt
            
    def plot_performance(self, metric=None, size=10):
        chunks = zip(
            np.array_split(self.training_info['iter'], size),
            np.array_split(self.training_info[metric], size)
        )
        graph_data = [(a[0], b[0]) for a, b in chunks]
        x = [i[0] for i in graph_data]
        y = [i[1] for i in graph_data]
        fig = plt.figure(figsize=(15,9))
        plt.plot(x, y)
        plt.ticklabel_format(style='plain');
        plt.savefig(metric + '.png', format='png')
        
    def save_training_info(self, filepath):
        with open(filepath, 'w') as j:
            json.dump(self.training_info, j)

In [3]:
rnn = RNN('sherlock.txt', 100, 25, 1e-1)

In [4]:
rnn.train()

F»jE€&.M¨)7TLâ
J-F8Qj©?`1U]AP¼CPjU57¢M&oKkZ46*cg1k&ibzª€l´;PAZ°]
 ;znM4IDcyXFYq(4¼0nYIQâ`YwM4n;Pbmw Z*9N™r4O2xcA¼±HX1V4±H*rF®L™[gÃ'cIu.vU,F[€Rkng4EâaUÃ7©EIpDvCMxSgbb)j4LI`XmD]bâoFF®ZÂ1£0ª*zFVnV*Ph00kf

+ iter 0
+ loss: 114.877998
+ perplexity: 77782270311736830341947225640822210190734415364096.000000

TimrAre  heru  oH Cd  Teuh dn   A   eoe bfn   vf- vmndu ie
   v   v C A neo e s h ep
dAfMfeoe o ore
r v  
enede F       or t  CetfT rTT Tre uru     H  t re e   eroio 
SnT a  r e T dd n
Fnfe e  sleaBt 

+ iter 100
+ loss: 113.373929
+ perplexity: 17285098822441030602894643868121710608914587844608.000000

 araWaMts  hoaeovotda re ks id a kel tde ,edusbunab teylta  tmo,ta abifane eh Iaiapiva drat p,r iab.nltLaybatapa  ts
d a 
eJwetoh,takavmtrOvaa dBd
 plw-outre eaaaoaaL awanahwde aoaraopf iaabewbov,Iaoc

+ iter 200
+ loss: 110.414422
+ perplexity: 896136315200401093628217636438694831359543738368.000000

 eaootee  ren.r s 
 ma rursn ih " yen sn rr unn. oataiee.eily "hpoirsn eaa rn"uos Lttu  O

nit alid ry ase kidcacelivdari d s. thitertey thoredtd,d cilid n the aped thitf, ifbfirring, do merdidink hand thimimr ,
 cos woratt "ididhid ad lt cheren bis d lory . ariof towy oxcariixk erMire rato

+ iter 2700
+ loss: 66.421223
+ perplexity: 70205410853875687942696992768.000000

the tt a, cidkoacgepCg
 I sy. "an mivialegeoR
" We hye

 I
 Whoue ,'
 pNon ous ale ontt an he
 dint dame arle thike whe,
 hufhe ghee Hnen rid" aaned womt, hor arrivenlath camt mat Hewe unghtemay s-are

+ iter 2800
+ loss: 65.799334
+ perplexity: 37695337616610946167986978816.000000

ouctond pof; on up loppon eul to sorenesn-upusga pithe bu' thit walk on midouked the es fs es shon. luan, "Yit
 fhonit
 piop-thoucat Hose sogk ir

 bankand mo gamper pim. "Thes-yt fan the in pen-'e
 i

+ iter 2900
+ loss: 65.127995
+ perplexity: 19263215559886388761620119552.000000


 Races-norin' stind DPt lentA hame sha whoVenes Hre Athe bight mas the le min heing lutaml. The *lled oA haco coweng cou
 Hiss ofis Chl tWe A
n

 T

s is lish sit cat ohanst art w'exm thros ghivaz at
 ss a
 it befsidgbas tul and thr tor diid we
 bushe hos wrale. Th om
 rlow so hreist am
 be thand the tas ilt ynriresoundy salize ongher dozk fHe siv

+ iter 5500
+ loss: 57.763977
+ perplexity: 12206086686870952925462528.000000

 anningien. He in stors waed the heit worer? Sund,"
 fertar iv-rored
 apprond she cand
 and here jrog on "Ralk
"""
 ard.


"stserdes buot of of veene bo and. thery of was ceve, hieg." aserast apad,f w

+ iter 5600
+ loss: 57.632238
+ perplexity: 10699490422055500951584768.000000

s the of menmnchesiat ware the
e ho the wiyingcolere.


 Tit- dazesers thc ysers mreped soichis in kall, soighen,

 bplensengerten, on comirtith so
 son ore blow hitt and angrededts. Fuse ticerga a. I

+ iter 5700
+ loss: 57.453902
+ perplexity: 8951849433833618209243136.000000

aney, andithe beseanctolithe poathes wan bow, bered a cy endely iticso c, apex ap weito

 bese."

 fa therede, the of er thane hes it as pathece the ytanverte

pe the Snmeveva loote
 hoa hichin exbest Jrom tuer pest andinning heaurk tens sowa hue vore a sern
 rily wis was
 sont we wive his Hhed, frid eve secreras, iven plarvang arbous a he the hearrinhis hob

+ iter 8300
+ loss: 54.696622
+ perplexity: 568121518024905325543424.000000

 uper and ot of inr, fowe ablaed id haed, nupenn, unchourcatvan, fur wad fortien,
 hilianked,r por. Tourthavelrew. "We his thauke hey of the ry kumluzlithre-on
 thared, wan the newhen, ind whed u amle

+ iter 8400
+ loss: 54.617430
+ perplexity: 524866222740898234499072.000000

e seDy. poxsest.
 besr aich ge orrecis oat he lueneps ald ore dnreden apdrow inst meminge. That Huplyith sove whwot Lrableved ppeld herer
 thabimirsed oter. Durs, mited pro toteered assteuth yow Whod 

+ iter 8500
+ loss: 54.898842
+ perplexity: 695447179253588512735232.000000

ming taxeigt, ham tive ±olrled arlixters er pisn I tuns fofe "Time thry
 thay a toumro'."
 IIst me waid to mace beor It wolne, tor, the wan his yo rron indant to b

btises. Hepurly
 if Thienditting. The feslagerned axand?"

 "Tops
redy exfenry getere in it Iciluytukmed poupfikitiry doughil' net, theprun, davto on

 piemet atlrnecesligheledsincessuat a
 mor soofat

+ iter 11100
+ loss: 55.254492
+ perplexity: 992478181919244450529280.000000

 voughre Sheseryenre no renspook,
 deme fisoucis, Mrel ferefmas? I an the an fate in tum outs
 in it sened wagted, see sigouck, pumeng grer if the cewinge andt a sersic inm
 ohs me I fomit on tikenepl

+ iter 11200
+ loss: 55.350644
+ perplexity: 1092645028856988260368384.000000

soused corrred vote whingpinkesid in," tho wat allU dighitwening and last bcer
s ok tat powire to and
 und arvyderdetssongrey wurre tide der andered. Murt. I upuprre laen towint
. Shg. I -ith woon ter

+ iter 11300
+ loss: 55.389000
+ perplexity: 1135369567423124572471296.000000

vesuone this, lofther, rod lings, nhe lethere, pons, endent have blory, on progkeln,
 ath hiscer,

 you ton rerere aike, the-gud age," epevave meres ins couka

ly.

 ins ack mucf I hisht worises?"'therllerst acmicto-gor? Ind of aply salls sith whish dacp of crend sread mattos utfid.
 the hi
 wisceclure."

 Nurkessensow that to beack stor woHe." hapky moule i

+ iter 13900
+ loss: 53.898401
+ perplexity: 255727870753324027346944.000000

 be. He there goorcetull!cssupenrin Growe undite stee thexy cook of his in of yale
 dede, ther foll
 thathif. The leching the retsutiintenely yout-id Itst
 of the dorso one thilleonel fouln fshered mo

+ iter 14000
+ loss: 53.753233
+ perplexity: 221173085768920133009408.000000

 "eaininy thas thaomlalm a semessase tan-shardt as of re anm  faw ohe." hlouled,"

 "Thalapedered gho Rage. Hy th the theene handling
 warcis the to wham wery in be mot. whane of ve bele hay Mmirefint

+ iter 14100
+ loss: 53.854448
+ perplexity: 244731359702450735939584.000000

t,

 Hicn ulben a to son the fould
 avo a mighings. The of thaule to aticand the hate whe pow-to it wao and fith haveerred fher indy
 Vith thas raed. Heclvting


, bus was meching a clon oter! und sors wabrrew! He thaks llunfere
 bond tat pupwistlat rolt age ytel op wnoce. Tours and, lronec. Ef ben saubyt hit waved Thes to I ha donkt and waghe onme we vith!
 a

+ iter 16700
+ loss: 53.156123
+ perplexity: 121733736269912880447488.000000

 hind of, yse were dlow that yres houle ir vrefr of hat pag out on heve Ifrest. I em him. strole thoun seactourdong of hat co whowe born in Joreft thele mat yofe it or youn on a sher h sore whibt to k

+ iter 16800
+ loss: 52.904550
+ perplexity: 94657323694263698980864.000000

 bo dan soling pith fourd, van ouuin in elthany ver doin in hll lung, iou samay vice," fe s is to helfsseutuy,"

 "I cank, anfty saines, untgees, diles Home of chove orine. "Be duan weveitb Mr. "dit s

+ iter 16900
+ loss: 52.903346
+ perplexity: 94543398341038649114624.000000

e whad ert
 salfol rind thas withes. Aid I way hesenuind. If tous dorland light demist bangty. "Tor
 fory this sa, sheren, to shand tuve, wave hy she beocto, whle

 of I shrl and.
 Thy geeke ther the hol sect whevea as , yo sey asl-ho Nigve, bleswed has uld?hine the him this his
 urd hames and arly sors
 to und the wist the Lighs hithoure st soe,, I fass af an."

+ iter 19500
+ loss: 52.446733
+ perplexity: 59886199038193394778112.000000

morm "ald and to aally of no blanted brarmet ther yaa doughty,  touct apacbours on whe
 ove fint, He the the ben on sey,
 my." be on meen entey thartuppect it folde prose at thistal ytance whewy
 apow

+ iter 19600
+ loss: 52.453214
+ perplexity: 60275554020861788815360.000000

ng brelsts sang poppestfo welutnt, The

 sholl gouR ched ulg
s the post. What hamleupled the selte Qofpins foje mectenBt anctowive, "The or tore. A and nnates. AAt fous naluansniteon, Havren sifed har

+ iter 19700
+ loss: 52.797085
+ perplexity: 85012454735435893047296.000000

 Hisey.

 I to
 bwoy the the inry. Hederinls youtest," ham.

 Home Awry that tans mondre thithiras the abich appound dean nrid trea lphos, youg. Ho, "pafl. It I wa

ss ap puld
 dor is on," Ro my a bat to deredeon on, torst hes nen bithere lle cornod soyt ivenon ben, I
 brich whitu hed. A hay at of a fonde
 senst had the was thakser lotmllestourl to whlairuted hea

+ iter 22300
+ loss: 52.338028
+ perplexity: 53717639513489845256192.000000

 inas the to it to atbetuines ang; papere and fry and ap fits. Bre to nujed roged a
 foulding expitid myshrere sinuerd
 evened the alqury a fond?"

 "St ques an
 as to zimed sted yoted and be baick
 o

+ iter 22400
+ loss: 52.225647
+ perplexity: 48007613881383374553088.000000

Og on coubless, Culland be to preast sere rrorlinnret helper muck urly pit me stein was it  o eat toer. "Ytace
 role lock. "lechlown, Vemapatriem comed br am pene-wiit avered ge tor the twet mace thuh

+ iter 22500
+ loss: 52.337671
+ perplexity: 53698423751626872324096.000000

iniry the cay Bmirein. Bn.
 dorlow be the cighass, cillersere,
 fllidriife, of wharened,"


 Mrst urs and asf. He rerhand out Mate sacpey.

 "I rell
 en, Ind indy 

lestily wnoced taitm Ml, bunme in be on a mley owvemy hay in hintor, to to my be diqks.

 Lich gistler mont, youd boled imorlos timoret on
 Mrer heorgachudy deare. Whomhas coriubant,
 "Osnitragwurkeve

+ iter 25100
+ loss: 51.564579
+ perplexity: 24786312498680042618880.000000

intither I fad yor her shense son sents. "that om pomteut noar
 dan dall is be camith indank is llains, that und the what of thevesshes but fave hlon as is rea ganest the thes bes, sar as stessmoon fa

+ iter 25200
+ loss: 51.398874
+ perplexity: 21001353297213473685504.000000

it he saigeon der coary cermong womjuns. I
 han shave mors, I ribrer magtry Hasd and was as of and cor fatbong a soed han
 had it mathenh, has a sReant
 athawd us arghe cor fadsed sent ich she awhatfo

+ iter 25300
+ loss: 51.362999
+ perplexity: 20261278554812663726080.000000

y hated to the tor sack nof at of the fates me his cut os I pither of the sing fow'
 a so oW upherince ous. fab' Wusse so"'s Amtave fourdy ot. Wore the semed, Wen?

KeyboardInterrupt: 

In [5]:
rnn.generate('T')

"Tseughister'mery and ot\n 'turning tabatity, sops ot a liised abliked\n sartrure deslaminkeiky fase liil is twavated everimiexmerely'med whawlate havin; o7 thenter therter anverged.\n\n Abimllman momidimme"