In [1]:
import numpy as np
import tensorflow as tf

In [2]:
tf.__version__

'2.2.0'

In [3]:
with open("dinos.txt") as f:
    examples = f.readlines()

In [4]:
data = open('dinos.txt', 'r').read()

In [5]:
data = data.lower()

In [6]:
chars = list(set(data))

In [7]:
chars_to_ind = {ch:i for i,ch in enumerate(sorted(chars))}
ind_to_chars = {i:ch for i,ch in enumerate(sorted(chars))}

In [8]:
def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum(axis = 0)

In [9]:
def rnn_step_forward(parameters, a_prev, x):
    Waa, Wax, Wya, by, b = parameters['Waa'],parameters['Wax'],parameters['Wya'],parameters['by'],parameters['b']
    a_next = np.tanh(np.matmul(Wax,x)+np.matmul(Waa,a_prev)+b) 
    y_t = softmax(np.matmul(Wya, a_next) + by)

    return a_next, y_t

In [10]:
def rnn_step_back(dy, gradients, parameters, a_prev, a, x):
    gradients['Wya'] += np.matmul(dy, a.T)
    gradients['by'] += dy
    da = np.matmul(parameters['Wya'].T, dy) + gradients['da_next']
    daraw = (1-a*a)*da
    gradients['b'] += daraw
    gradients['Wax'] += np.matmul(daraw,x.T)
    gradients['Waa'] += np.matmul(daraw,a_prev.T)
    gradients['da_next'] += np.matmul(parameters['Waa'].T, daraw)
    
    return gradients
    

In [11]:
def update_gradients(parameters, gradients, lr):
    for key in ['Waa','Wax','Wya','b','by']:
        parameters[key] += -lr*gradients[key]
    return parameters

In [12]:
def rnn_forward(X,Y, a0, parameters):
    x, a, y_hat = {}, {}, {}
    a[-1] = np.copy(a0)
    
    loss = 0 
    
    for t in range(len(X)):
        x[t] = X[t:t+1].T
        a[t], y_hat[t] = rnn_step_forward(parameters, a[t-1], x[t])
        loss -= np.sum(Y[t]*np.log(y_hat[t])+(1-Y[t])*np.log(1-y_hat[t]))
        
    cache = (y_hat, a, x)
    
    return loss, cache

In [13]:
def rnn_back(X,Y,parameters, cache):
    gradients = {}
    (y_hat, a, x) = cache
    Waa, Wax, Wya, by, b = parameters['Waa'],parameters['Wax'],parameters['Wya'],parameters['by'],parameters['b']
    gradients['Waa'] = np.zeros_like(parameters['Waa'])
    gradients['Wax'] = np.zeros_like(parameters['Wax'])
    gradients['Wya'] = np.zeros_like(parameters['Wya'])
    gradients['by'] = np.zeros_like(parameters['by'])
    gradients['b'] = np.zeros_like(parameters['b'])
    gradients['da_next'] = np.zeros_like(a[0])
    
    for t in reversed(range(len(X))):
        dy = np.copy(y_hat[t])
        #print(dy.shape, Y[t].shape)
        #dy[np.argmax(Y[t])] -= 1    ### ?
        dy = dy - Y[t:t+1].T
        gradients = rnn_step_back(dy, gradients, parameters, a[t-1], a[t], x[t])
    
    return gradients, a

In [14]:
np.random.shuffle(examples)

In [32]:
n_a = 50 # change it!
n_x = 27
n_y = 27
Wax = np.random.randn(n_a,n_x)*0.01
Waa = np.random.randn(n_a,n_a)*0.01
Wya = np.random.randn(n_y,n_a)*0.01
b = np.zeros([n_a,1])
by = np.zeros([n_y,1])

parameters = {'Wax':Wax, 'Waa':Waa, 'Wya':Wya, 'b':b, 'by':by}

In [33]:
a0 = np.zeros([n_a,1])

In [34]:
for c in range(10):
    for k in range(len(examples)):
        X = np.zeros([len(examples[k]),27])
        Y = np.zeros([len(examples[k]),27])
        for i,char in enumerate(examples[k].lower()[:-1]):
            X[i+1,chars_to_ind[char]] = 1

        for i,char in enumerate(examples[k].lower()):
            Y[i,chars_to_ind[char]] = 1
            
        a0 = np.zeros([n_a,1])
        loss, cache = rnn_forward(X,Y, a0, parameters)
        gradients, a = rnn_back(X,Y,parameters, cache)
        #a0 = a[len(X)-1]
        parameters = update_gradients(parameters, gradients, 0.001)
    print(loss)

1894.894617367917
1943.7273906957134
1951.966855754581
1954.5940632853744
1956.182569950101
1957.652371135845
1959.2688876276845
1961.2730292889814
1964.040067690214
1966.9704205785147


In [43]:
X = np.zeros([27,1])

a_prev = np.zeros([n_a,1])

count = 0 
while True:
    a_prev, y_t = rnn_step_forward(parameters, a_prev, X)
    ind = np.random.choice(27, p=y_t[:,0])
    X = np.zeros([27,1])
    X[ind,0] = 1
    print(ind_to_chars[ind],end = '')
    count += 1
    
    if ind == 0 or count == 50:
        break
    

akeuosnoe
