In [3]:
test = { }
test.

In [1]:
"""

Minimal character-level Vanilla RNN model. Written by Andrej Karpathy (@karpathy)

BSD License

"""

import numpy as np



# data I/O

data = 'The first appearance of an Infinity Gem occurred in 1972 in Marvel Premiere #1. It was originally called a "Soul Gem."[1][2] In 1976, a second "Soul Gem" appeared in a Captain Marvel story and established that there were six Soul Gems, each with different powers.' # should be simple plain text file

chars = list(set(data))

data_size, vocab_size = len(data), len(chars)

print('data has %d characters, %d unique.' % (data_size, vocab_size))

print('Original Text :', data, '\n')

char_to_ix = { ch:i for i,ch in enumerate(chars) }

ix_to_char = { i:ch for i,ch in enumerate(chars) }



# hyperparameters

hidden_size = 5 # size of hidden layer of neurons

seq_length = 25 # number of steps to unroll the RNN for

learning_rate = 1e-1



# model parameters

Wxh = np.random.randn(hidden_size, vocab_size)*0.01 # input to hidden

Whh = np.random.randn(hidden_size, hidden_size)*0.01 # hidden to hidden

Why = np.random.randn(vocab_size, hidden_size)*0.01 # hidden to output

bh = np.zeros((hidden_size, 1)) # hidden bias

by = np.zeros((vocab_size, 1)) # output bias



def lossFun(inputs, targets, hprev):

  """

  inputs,targets are both list of integers.

  hprev is Hx1 array of initial hidden state

  returns the loss, gradients on model parameters, and last hidden state

  """

  xs, hs, ys, ps = {}, {}, {}, {}

  hs[-1] = np.copy(hprev)

  loss = 0

  # forward pass

  for t in range(len(inputs)):

    xs[t] = np.zeros((vocab_size,1)) # encode in 1-of-k representation

    xs[t][inputs[t]] = 1

    hs[t] = np.tanh(np.dot(Wxh, xs[t]) + np.dot(Whh, hs[t-1]) + bh) # hidden state

    ys[t] = np.dot(Why, hs[t]) + by # unnormalized log probabilities for next chars

    ps[t] = np.exp(ys[t]) / np.sum(np.exp(ys[t])) # probabilities for next chars

    loss += -np.log(ps[t][targets[t],0]) # softmax (cross-entropy loss)

  # backward pass: compute gradients going backwards

  dWxh, dWhh, dWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)

  dbh, dby = np.zeros_like(bh), np.zeros_like(by)

  dhnext = np.zeros_like(hs[0])

  for t in reversed(range(len(inputs))):

    dy = np.copy(ps[t])

    dy[targets[t]] -= 1 # backprop into y. see http://cs231n.github.io/neural-networks-case-study/#grad if confused here

    dWhy += np.dot(dy, hs[t].T)

    dby += dy

    dh = np.dot(Why.T, dy) + dhnext # backprop into h

    dhraw = (1 - hs[t] * hs[t]) * dh # backprop through tanh nonlinearity

    dbh += dhraw

    dWxh += np.dot(dhraw, xs[t].T)

    dWhh += np.dot(dhraw, hs[t-1].T)

    dhnext = np.dot(Whh.T, dhraw)

  for dparam in [dWxh, dWhh, dWhy, dbh, dby]:

    np.clip(dparam, -5, 5, out=dparam) # clip to mitigate exploding gradients

  return loss, dWxh, dWhh, dWhy, dbh, dby, hs[len(inputs)-1]



def sample(h, seed_ix, n):

  x = np.zeros((vocab_size, 1))

  x[seed_ix] = 1

  ixes = []

  for t in range(n):

    h = np.tanh(np.dot(Wxh, x) + np.dot(Whh, h) + bh)

    y = np.dot(Why, h) + by

    p = np.exp(y) / np.sum(np.exp(y))

    ix = np.argmax(p)

    #ix = np.random.choice(list(range(vocab_size)), p=p.ravel())

    x = np.zeros((vocab_size, 1))

    x[ix] = 1

    ixes.append(ix)

  return ixes



n, p = 0, 0

mWxh, mWhh, mWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)

mbh, mby = np.zeros_like(bh), np.zeros_like(by) # memory variables for Adagrad

smooth_loss = -np.log(1.0/vocab_size)*seq_length # loss at iteration 0

while True:

  # prepare inputs (we're sweeping from left to right in steps seq_length long)

  if p+seq_length+1 >= len(data) or n == 0: 

    hprev = np.zeros((hidden_size,1)) # reset RNN memory

    p = 0 # go from start of data

  inputs = [char_to_ix[ch] for ch in data[p:p+seq_length]]

  targets = [char_to_ix[ch] for ch in data[p+1:p+seq_length+1]]



  # sample from the model now and then

  if n % 100 == 0:

    sample_ix = sample(hprev, inputs[0], 200)

    txt = ''.join(ix_to_char[ix] for ix in sample_ix)

    print('----\n %s \n----' % (txt, ))



  # forward seq_length characters through the net and fetch gradient

  loss, dWxh, dWhh, dWhy, dbh, dby, hprev = lossFun(inputs, targets, hprev)

  smooth_loss = smooth_loss * 0.999 + loss * 0.001

  if n % 100 == 0: print('iter %d, loss: %f' % (n, smooth_loss)) # print progress

  

  # perform parameter update with Adagrad

  for param, dparam, mem in zip([Wxh, Whh, Why, bh, by], 

                                [dWxh, dWhh, dWhy, dbh, dby], 

                                [mWxh, mWhh, mWhy, mbh, mby]):

    mem += dparam * dparam

    param += -learning_rate * dparam / np.sqrt(mem + 1e-8) # adagrad update

  p += seq_length # move data pointer

  n += 1 # iteration counte

IndentationError: unexpected indent (<ipython-input-1-111b3b61b130>, line 198)

In [2]:
"""

Minimal character-level Vanilla RNN model. Written by Andrej Karpathy (@karpathy)

BSD License

"""

import numpy as np



# data I/O

data = 'The first appearance of an Infinity Gem occurred in 1972 in Marvel Premiere #1. It was originally called a "Soul Gem."[1][2] In 1976, a second "Soul Gem" appeared in a Captain Marvel story and established that there were six Soul Gems, each with different powers.' # should be simple plain text file

chars = list(set(data))

data_size, vocab_size = len(data), len(chars)

print('data has %d characters, %d unique.' % (data_size, vocab_size))

print('Original Text :', data, '\n')

char_to_ix = { ch:i for i,ch in enumerate(chars) }

ix_to_char = { i:ch for i,ch in enumerate(chars) }



# hyperparameters

hidden_size = 5 # size of hidden layer of neurons

seq_length = 25 # number of steps to unroll the RNN for

learning_rate = 1e-1



# model parameters

Wxh = np.random.randn(hidden_size, vocab_size)*0.01 # input to hidden

Whh = np.random.randn(hidden_size, hidden_size)*0.01 # hidden to hidden

Why = np.random.randn(vocab_size, hidden_size)*0.01 # hidden to output

bh = np.zeros((hidden_size, 1)) # hidden bias

by = np.zeros((vocab_size, 1)) # output bias



def lossFun(inputs, targets, hprev):

  """

  inputs,targets are both list of integers.

  hprev is Hx1 array of initial hidden state

  returns the loss, gradients on model parameters, and last hidden state

  """

  xs, hs, ys, ps = {}, {}, {}, {}

  hs[-1] = np.copy(hprev)

  loss = 0

  # forward pass

  for t in range(len(inputs)):

    xs[t] = np.zeros((vocab_size,1)) # encode in 1-of-k representation

    xs[t][inputs[t]] = 1

    hs[t] = np.tanh(np.dot(Wxh, xs[t]) + np.dot(Whh, hs[t-1]) + bh) # hidden state

    ys[t] = np.dot(Why, hs[t]) + by # unnormalized log probabilities for next chars

    ps[t] = np.exp(ys[t]) / np.sum(np.exp(ys[t])) # probabilities for next chars

    loss += -np.log(ps[t][targets[t],0]) # softmax (cross-entropy loss)

  # backward pass: compute gradients going backwards

  dWxh, dWhh, dWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)

  dbh, dby = np.zeros_like(bh), np.zeros_like(by)

  dhnext = np.zeros_like(hs[0])

  for t in reversed(range(len(inputs))):

    dy = np.copy(ps[t])

    dy[targets[t]] -= 1 # backprop into y. see http://cs231n.github.io/neural-networks-case-study/#grad if confused here

    dWhy += np.dot(dy, hs[t].T)

    dby += dy

    dh = np.dot(Why.T, dy) + dhnext # backprop into h

    dhraw = (1 - hs[t] * hs[t]) * dh # backprop through tanh nonlinearity

    dbh += dhraw

    dWxh += np.dot(dhraw, xs[t].T)

    dWhh += np.dot(dhraw, hs[t-1].T)

    dhnext = np.dot(Whh.T, dhraw)

  for dparam in [dWxh, dWhh, dWhy, dbh, dby]:

    np.clip(dparam, -5, 5, out=dparam) # clip to mitigate exploding gradients

  return loss, dWxh, dWhh, dWhy, dbh, dby, hs[len(inputs)-1]



def sample(h, seed_ix, n):

  x = np.zeros((vocab_size, 1))

  x[seed_ix] = 1

  ixes = []

  for t in range(n):

    h = np.tanh(np.dot(Wxh, x) + np.dot(Whh, h) + bh)

    y = np.dot(Why, h) + by

    p = np.exp(y) / np.sum(np.exp(y))

#     ix = np.argmax(p)

    ix = np.random.choice(list(range(vocab_size)), p=p.ravel())

    x = np.zeros((vocab_size, 1))

    x[ix] = 1

    ixes.append(ix)

  return ixes



n, p = 0, 0

mWxh, mWhh, mWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)

mbh, mby = np.zeros_like(bh), np.zeros_like(by) # memory variables for Adagrad

smooth_loss = -np.log(1.0/vocab_size)*seq_length # loss at iteration 0

while True:

  # prepare inputs (we're sweeping from left to right in steps seq_length long)

  if p+seq_length+1 >= len(data) or n == 0: 

    hprev = np.zeros((hidden_size,1)) # reset RNN memory

    p = 0 # go from start of data

  inputs = [char_to_ix[ch] for ch in data[p:p+seq_length]]

  targets = [char_to_ix[ch] for ch in data[p+1:p+seq_length+1]]



  # sample from the model now and then

  if n % 100 == 0:

    sample_ix = sample(hprev, inputs[0], 200)

    txt = ''.join(ix_to_char[ix] for ix in sample_ix)

    print('----\n %s \n----' % (txt, ))



  # forward seq_length characters through the net and fetch gradient

  loss, dWxh, dWhh, dWhy, dbh, dby, hprev = lossFun(inputs, targets, hprev)

#   smooth_loss = smooth_loss * 0.999 + loss * 0.001
  smooth_loss = loss
  if n % 100 == 0: print('iter %d, loss: %f' % (n, smooth_loss)) # print progress

  

  # perform parameter update with Adagrad

  for param, dparam, mem in zip([Wxh, Whh, Why, bh, by], 

                                [dWxh, dWhh, dWhy, dbh, dby], 

                                [mWxh, mWhh, mWhy, mbh, mby]):

    mem += dparam * dparam

    param += -learning_rate * dparam / np.sqrt(mem + 1e-8) # adagrad update

  p += seq_length # move data pointer

  n += 1 # iteration counte

data has 263 characters, 41 unique.
Original Text : The first appearance of an Infinity Gem occurred in 1972 in Marvel Premiere #1. It was originally called a "Soul Gem."[1][2] In 1976, a second "Soul Gem" appeared in a Captain Marvel story and established that there were six Soul Gems, each with different powers. 

----
 upbucummT,.TxulMP9xdrne]]xvTeSsrmvhx.eS"gop pGht"6SCvyT7evsg]v],,,du9fupP,tIv]v7dxe.9ssupe,lTfx7rx..]wcyTP"aw#7[ uobga[9]171T#]iph7Mp2M]PTmelnx.6hMogiwC T[,SiSw e1.7bw1vPtsyG6mInG1eP2I."ddha1td]u6h6cm 
----
iter 0, loss: 92.838330
----
 Cd Is em isrefvl [SaMfI Mip" .r17C76a tidl aiTPid isetn o.y ietib,e v1Cn Ctplnra xr]fre we o Sstid Me e ihMeib[]lredrera ffenpeae 7ylllM"l G hb nSoGalml 2 pi.mteme i wen ]cln PgareTae en efia, M Get G 
----
iter 100, loss: 62.007134
----
  #h m s972 Geml o  1792vioGht C 9wyd e Mal t9.In on an an."cor 9I "fen ol 1ulpara sSald hariId #sta 1676t aSIl MIymf ap a Cn e Geudl GerGed sa 1v Cy en on e]"9"bm]cst Plred x,l s17pPt o" fer  wyri[Pfe

----
 heffillPrveg th oin Maf eararind tapsiappeecGem 192ll erin MacCor d tobl a .1ullad "l1][1]2ppeirv]nemth "."fifcGsl al96v] In or acupse #92ll od taremt opiarel a "Soul efifih an tnta#apsar ax herin G]d 
----
iter 3300, loss: 40.932176
----
 1urs s st]yped werf t wnareir, 1pupsed "[, "]Shl ty ea apSo. Puc. In wabl Main eere it976, e c76, 1]2l in In sher ab shan sa6, ocw taul Gerf a thveioia"had in "p.s"]S1uprarf an in in e w "[1pp,i a6,ir 
----
iter 3400, loss: 40.648287
----
 he shem an Gem"Soul anirGedmsar wfsal earertis6ead fere Preg odithim Mau s Mabl earerfi#Cos G cn a #Soul Gemiar]a#[72 cor cal , Gy eirvel Gempe xS7x 1op odlare fiddlein wareitaseccorhid terel ty in sa 
----
iter 3500, loss: 40.622014
----
 heind Man Ininfa war,itaremscalce f am"yy fe tap Marvefsed fed a Ini#[6, e wshan " 6, a 1oul Pd wams, Mar arurcarpit xn agiathad terlarintaf eaf et "p, an ani ocG]ppenfsacumihGarrein InaisIfsem"v, tap 
----
iter 3600, loss: 40.491935
----
 hin Pcerrarf a sSory e a Inin In 1

KeyboardInterrupt: 