In [1]:
import numpy as np
import pickle

## loaded "char-rnn-snapshot.npz"

In [2]:
a = np.load('char-rnn-snapshot.npz') 

## model parameters loaded from "char-rnn-snapshot.npz"

In [3]:
Wxh = a["Wxh"] 
Whh = a["Whh"]
Why = a["Why"]
bh = a["bh"]
by = a["by"]
mWxh, mWhh, mWhy = a["mWxh"], a["mWhh"], a["mWhy"]
mbh, mby = a["mbh"], a["mby"]  # memory variables for Adagrad


vocab_size =  a["vocab_size"].tolist() # len of characters present
ix_to_char = a["ix_to_char"].tolist() # dictionary of key is number and value is characters
char_to_ix = a["char_to_ix"].tolist() # dictionary where keys is characters and values is corresponding number form above
chars = a["chars"].tolist()           # list of characters
data_size = a["data_size"].tolist()   # len of data

# HyperParameters

In [4]:
hidden_size = Whh.shape[0]
n=200
seq_length = 25 # number of steps to unroll the RNN for
temp=1
epochs =1000
learning_rate = 1e-1


# Sample Input

In [5]:
def sample(h, n,x,ixes,temp=1.5):
    """ 
    sample a sequence of integers from the model 
    h is memory state
    """
    for t in range(n):
        h = np.tanh(np.dot(Wxh, x) + np.dot(Whh, h) + bh)
        y = np.dot(Why, h) + by
        p = np.exp(y*temp) / np.sum(np.exp(y*temp))
        ix = np.random.choice(range(vocab_size), p=p.ravel())
        x = np.zeros((vocab_size, 1))
        x[ix] = 1
        ixes.append(ix)
    return ixes


# Part 1

## Load the data

In [6]:
data = open('shakespeare_train.txt', 'r').read() # should be simple 

In [7]:
n,p=0,0
for i in [0.1,0.5,0.9,1.0,2.0,5.0]:

    if p+seq_length+1 >= len(data) or n == 0: 
        hprev = np.zeros((hidden_size,1)) # reset RNN memory
        p = 0 # go from start of data
    inputs = [char_to_ix[ch] for ch in data[p:p+seq_length]]
    targets = [char_to_ix[ch] for ch in data[p+1:p+seq_length+1]]

    x = np.zeros((vocab_size, 1))
    x[inputs[0]] = 1
    ixes = []
    
    sample_ix = sample(hprev, 200,x,ixes,i)
    txt = ''.join(ix_to_char[ix] for ix in sample_ix)
    print ('---- for alpha %.1f ---- \n %s\n-----------------------\n' % (i,txt, ))

---- for alpha 0.1 ---- 
 n:fi;HYuD,
ljo!,:'TyGLNLeNlvga? as?pfsAggl.'
ey'ksxD!fIyerd'ysld cryOsMD pl Ry
Cgs'',tYY;l:UYVhIaxtljegag;t CAAGfrH;  DhN?G?v
LUvyRRGRcc&RYOESKGu'?rBYne.oBf,oilyZ:l.inP
Ovm?KJe-DEk,;P!sgQ'nmNtcfbaxbH'
-----------------------

---- for alpha 0.5 ---- 
 ; smralabo spevedrey,
Urbean,?fies.
; h, sh umiy,
Aabout? O Holl Knkaie
IVim, wy wo.,! coun;
helliseibire,
Fileqellly?
YeUTUCENEaR Dyws to Botce for us;
Comt O'wtught!
K'eneys, stwork!

Allbf yore,. A
-----------------------

---- for alpha 0.9 ---- 
 irself!
Whicking loos'd tiuen tice,
Thour and whule crits?
No, done, heand and re beay's
see!
OwSWART RITUS:
I kill whe deas, somes manseragely your and thric do'banemtest hes long neranf aft opan tal
-----------------------

---- for alpha 1.0 ---- 
 irst she who hilf aged bake thee he thou be strine; our un arging for hast you have time't.
O shalood fock, O'll nerangry, he Rome:
What pavealsees I forfly.

CLOUSENBUSt by who God hass her,
Inf imin
-----------------

### Report 
High temperature(alpha low =0.1), we can see the nice representation of characters. As probability vector is nicely spread over the vocabulary/characters i.e. gives almost equal prob to all the sampled characters. Result is unreadable. 

Moderate temperature(alpha low =1), we can see the nice representation of words. As probability vector is nicely 
spread over the sequence of characters. Results are quite readable.

Low temperature(alpha low = 5), we can see that softmax fuction beahaves as max function. As probability vector is highly biased words i.e. it takes highest prob to all the sampled characters. Result consist of almost same words. 

# Part 2

In [8]:
inputs_string = ["ll speak a little",'garden','when proud-pied April dress','As euery Alien pen','Your shallowest helpe']

In [9]:
len(inputs_string)

5

In [10]:
n=200
for i in range(len(inputs_string)):
    inputs = [char_to_ix[ch] for ch in inputs_string[i]]
    h = np.zeros((hidden_size,1))   

    ixes = inputs                          # So that starting word will be there.
    for i in range(len(inputs)):        
        seed_ix = inputs[i]                # seed all the input character to numpy array.
        #print("input Seed ", seed_ix)
        x = np.zeros((vocab_size, 1))
        x[seed_ix] = 1
        h = np.tanh(np.dot(Wxh, x) + np.dot(Whh, h) + bh)

    sample_ix = sample(h,n,x,ixes)
    txt = ''.join(ix_to_char[ix] for ix in sample_ix)  
    print("---------------------------------------\n",txt,"\n---------------------------------------\n")

---------------------------------------
 ll speak a littlen the we much steak'd with no-st one when the the wife, you have I dear you so geavon hanes.

CORIOLANUS:
Nord mast well the the bre hes, love
That lise heare word the know now she feestam, be whour t 
---------------------------------------

---------------------------------------
 gardend the retere.

SICINIUS:
And of in the to of has!

RIVERC:
Nogst me pees, do here master stand and
A for he to with a sated to mines the you him may, of the come.

CORIOLANUS:
In love with steate fral 
---------------------------------------

---------------------------------------
 when proud-pied April dress the dovest so the she's he blower the to be the with to the what kne the con is des as who a have loves pare, me day to to man the that sent peater my a so have you thy the and his the could a man th 
---------------------------------------

---------------------------------------
 As euery Alien penow hears falled
Their theor speets to y

# Part 3

In [13]:
h = np.zeros((hidden_size,1)) 
x = np.zeros((vocab_size, 1))
x[char_to_ix[':']]=1
ixes = []
x_index = np.unravel_index(np.argmax(x, axis=None), x.shape)[0]

In [15]:
h = np.tanh(np.dot(Wxh, x) + np.dot(Whh, h) + bh)
print("maximum value : ",h.max(),"index of maximum value ",np.argmax(h))
y = np.dot(Why, h) + by
p = np.exp(y*temp) / np.sum(np.exp(y*temp))
ix = np.random.choice(range(vocab_size), p=p.ravel())
x = np.zeros((vocab_size, 1))
x[ix] = 1
print(ix)
#ixes.append(ix)

maximum value :  0.9998877562219607 index of maximum value  100
0


In [109]:
print("X index :", x_index)

print("number of rows corresponding of X_index :",Wxh[:,x_index].shape[0])
print("maximum value Wxh: ",Wxh[:,x_index].max(),"index of maximum value ",np.argmax(Wxh[:,x_index]))

resultant_char = np.unravel_index(np.argmax(x, axis=None), x.shape)[0]
print("resultant character index ",resultant_char)

print("resultant character index ",Why[resultant_char,:].shape[0])

print("result of colon ",ix_to_char[ix],"-----")

X index : 9
number of rows corresponding of X_index : 250
maximum value Wxh:  4.829189868371359 index of maximum value  100
resultant character index  0
resultant character index  250
result of colon  
 -----


## Report 
Specific weights that are responsible for this behavior by the RNN.

X[9]

Wxh[100][9] due to maximum value of hidden unit with tanh activation function :  0.9998877562219607 and it's index 100

Why[0][100] hidden state h[100] multiplied with Why[100][9] mostly will get activated and result is 0 i.e. '\n'

Why[2][100] hidden state h[100] multiplied with Why[100][9] mostly will get activated and result is 2 i.e. ' ' space

y[0] and y[2] mostly get result of softmax as they get the highest probability.

# Part 4

In [251]:
h = np.zeros((hidden_size,1)) 
x = np.zeros((vocab_size, 1))
character = "&"
x[char_to_ix[character]]=1
ixes = []
x_index = np.unravel_index(np.argmax(x, axis=None), x.shape)[0]
h = np.tanh(np.dot(Wxh, x) + np.dot(Whh, h) + bh)
print("maximum value : ",h.max(),"index of maximum value ",np.argmax(h))
y = np.dot(Why, h) + by
p = np.exp(y*temp) / np.sum(np.exp(y*temp))
ix = np.random.choice(range(vocab_size), p=p.ravel())
x = np.zeros((vocab_size, 1))
x[ix] = 1

print("X index :", x_index)

print("number of rows corresponding of X_index :",Wxh[:,x_index].shape[0])
print("maximum value Wxh : ",Wxh[:,x_index].max(),"index of maximum value ",np.argmax(Wxh[:,x_index]))

resultant_char = np.unravel_index(np.argmax(x, axis=None), x.shape)[0]
print("resultant character index ",resultant_char)

print("resultant character index ",Why[resultant_char,:].shape[0])

print("result of ",character," ",ix," : ",ix_to_char[ix])

maximum value :  0.9999988372613109 index of maximum value  44
X index : 4
number of rows corresponding of X_index : 250
maximum value Wxh :  7.346495414196985 index of maximum value  44
resultant character index  12
resultant character index  250
result of  &   12  :  C


In [145]:
# D,H,C,T

Report
X index : 4
hidden state[44] maximum value :  0.9999988372613109 index of maximum value  44

Wxh[44][4]

Why[19][44] - H

Why[12][44] - C

Why[15][44] - D

H,C as a result is quite frequent sometimes D,K,T may come because of probability.
