In [15]:
import numpy as np

In [81]:
data = open('warpeace_input.txt', 'r').read() # should be simple plain text file
chars = list(set(data))
data_size, vocab_size = len(data), len(chars)
print('data has %d characters, %d unique.' % (data_size, vocab_size))
char_to_ix = { ch:i for i,ch in enumerate(chars) }
ix_to_char = { i:ch for i,ch in enumerate(chars) }

data has 3196213 characters, 83 unique.


In [335]:
class F:
    def softmax(x):
        """Compute softmax values for each sets of scores in x.

        Subtracts max(x) for numerical stability
        Args:
            x: an array of size n * 1.
        Returns:
            A probability distribution over the vector x
        """
        e_x = np.exp(x - np.max(x))
        return e_x / e_x.sum()

class RNN:
    def __init__(self, num_hidden, num_input):
        self.input_size = num_input
        self.W_hx = np.random.randn(num_hidden, num_input)*0.01 # input to hidden of dimension hidden * input
        self.W_hh = np.random.randn(num_hidden, num_hidden)*0.01 # hidden to hidden of dimension hidden * hidden
        self.W_hy = np.random.randn(num_input, num_hidden)*0.01 # hidden to output of dimension input * hidden
        
        self.b_h = np.zeros((num_hidden, 1)) # hidden bias of dimension hidden * 1
        self.b_y = np.zeros((num_input, 1)) # output bias of dimension input * 1

        self.h = np.random.randn(num_hidden,1) * 0.01
        

    def step(self, x):
        """Compute a single step using hidden states for a given x.
        
        Mathematically it is h(t) = tanh((Wh * h(t-1)) + (Wx *x) + bias)
                             y = Wy * h + bias
        Args:
            x: a vector of size n * 1.
        Returns:
            y: a vector of the same size as x i.e n * 1
        """
        h = self.h
        h = np.tanh(np.dot(self.W_hh, self.h) + np.dot(self.W_hx, x) + self.b_h)
        y = np.dot(self.W_hy, self.h) + self.b_y
    
        return y
    def train(self, x, y):
        print(x)
        x_hat = {}
        y_hat = {}
        h = {}
        p = {}
        print(self.h.shape)
        h[0-1] = self.h
        loss = 0
        for t in range(len(x)):
            x_hat[t] = np.zeros((self.input_size,1)) # Convert x into 1-hot-vector
            x_hat[t][x[t]] = 1 # Set the index x[t] of 1-hot-vector x_hat[t] to 1
            h[t] = np.tanh(np.dot(self.W_hh, h[t-1]) + np.dot(self.W_hx, x_hat[t]) + self.b_h)
            y_hat[t] = np.dot(self.W_hy, h[t]) + self.b_y
            p[t] = F.softmax(y_hat[t])
            print(p[t], p[t][y[t]], p[t][y[t],0])
            loss += -np.log(p[t][y[t],0])#softmax (cross-entropy loss)
            print(loss)
            break
    def forward(self, seed, n):
        """ 
        Do one forward pass, with the starting character as seed
        h is memory state, seed is seed letter for first time step
        
        Args:
            seed: index of first character for time step
            n : size of sequence (time steps)
        Returns:
            predicted_index: a vector of size n with predicted_indexes for timestep t + 1
        """
        x = np.zeros((self.input_size, 1))
        x[seed] = 1
        predicted_indexes = []
        for t in range(n):
            h = self.h
            h = np.tanh(np.dot(self.W_hh, h) + np.dot(self.W_hx, x) + self.b_h)
            y = np.dot(self.W_hy, h) + self.b_y
            p = F.softmax(y)
            predicted_char = np.random.choice(range(self.input_size), p=p.ravel())
            x = np.zeros((self.input_size, 1))
            x[predicted_char] = 1
            predicted_indexes.append(predicted_char)
        return predicted_indexes

    def show_weight(self):
        print(self.h)


In [336]:
sequence_length = 25
batches = int(len(data)/sequence_length)
rnn = RNN(10, vocab_size)
for i in range(batches):
    x = [char_to_ix[c] for c in data[i*sequence_length:(i+1)*sequence_length]]#inputs to the RNN
    y = [char_to_ix[c] for c in data[i*sequence_length+1:(i+1)*sequence_length+1]]#the targets it should be outputting
    pred_x = rnn.forward(x[0], sequence_length)
    txt_x = ''.join([ix_to_char[n] for n in pred_x])
    txt_y = ''.join([ix_to_char[n] for n in y])
    print(x[0],y[0])
#     print(txt_x)
#     print(txt_y)
    
    rnn.train(x,y)

    break

41 8
[41, 8, 11, 55, 7, 7, 3, 35, 12, 19, 29, 62, 50, 55, 3, 35, 82, 73, 35, 4, 55, 62, 73, 77, 35]
(10, 1)
[[0.01205163]
 [0.01205159]
 [0.01205436]
 [0.01204985]
 [0.01204488]
 [0.01205058]
 [0.01204949]
 [0.01204792]
 [0.0120452 ]
 [0.01205516]
 [0.01204534]
 [0.01204871]
 [0.01204769]
 [0.01204153]
 [0.01204756]
 [0.01204393]
 [0.01204844]
 [0.01204218]
 [0.01204726]
 [0.01204844]
 [0.01205294]
 [0.01204749]
 [0.01204511]
 [0.01205   ]
 [0.01205655]
 [0.01204998]
 [0.01204308]
 [0.01205551]
 [0.01204457]
 [0.01204931]
 [0.01204512]
 [0.01205072]
 [0.01205631]
 [0.01204164]
 [0.0120486 ]
 [0.01204135]
 [0.0120487 ]
 [0.01204129]
 [0.01204613]
 [0.01204795]
 [0.01204479]
 [0.01205076]
 [0.01204404]
 [0.0120496 ]
 [0.01204857]
 [0.01205286]
 [0.01204515]
 [0.01204491]
 [0.01205068]
 [0.01205527]
 [0.01204917]
 [0.01205213]
 [0.01204718]
 [0.01205257]
 [0.01204828]
 [0.01204661]
 [0.01204145]
 [0.01205417]
 [0.01204412]
 [0.01204731]
 [0.01204209]
 [0.01204853]
 [0.01204732]
 [0.012041

In [327]:
def sample(h, seed_ix, n):
    """ 
    sample a sequence of integers from the model 
    h is memory state, seed_ix is seed letter for first time step
    """
    x = np.zeros((vocab_size, 1))
    x[seed_ix] = 1
    ixes = []
    for t in xrange(n):
        h = np.tanh(np.dot(Wxh, x) + np.dot(Whh, h) + bh)
        y = np.dot(Why, h) + by
        p = np.exp(y) / np.sum(np.exp(y))
        ix = np.random.choice(range(vocab_size), p=p.ravel())
        x = np.zeros((vocab_size, 1))
        x[ix] = 1
        ixes.append(ix)
    return ixes
