This notebook has been done using Tensorflow 1.7

In [None]:
%tensorflow_version 1.x

import numpy as np

import matplotlib.pyplot as plt
import scipy.stats as stats

import codes.rnn_tf as rnn_tf
import codes.rnn_elman as rnn_elman # Con ruido
import codes.rnn_xhcy as rnn_xhcy # Dual CPU + Mem (conectada a CPU)

import matplotlib.patches as mpatch

import time
import psutil

# Run

In [None]:
# Posible values: SUMA, SUMA10, SUMA4, SUMA3

problema = "SUMA10"


input_file_s0 = 'files/train/' + problema + '_sumando_0.txt'
input_file_s1 = 'files/train/' + problema + '_sumando_1.txt'
output_file = 'files/train/' + problema + '_y.txt'

## Elman RNN

In [None]:
hidden_size = 10
cpu_size = 20
seq_len = 25

num_batches = 10
noise_level = 0.4
learning_rate = 1e-2
l1reg = 1e-4
shock = 0.5

symbols = ['$', 'a', 'b']

In [None]:
base = 10

ficheros = [input_file_s0, input_file_s1]

data_dic, dim_i, dim_o, _ = rnn_tf.data_transform_multiple_inputs_many_to_many(num_batches,
                                                                  seq_len,
                                                                  input_files=ficheros,
                                                                  output_file=output_file,
                                                                  chars_x = [str(j) for j in range(base)]+['$'],
                                                                  chars_y = [str(j) for j in range(base)],
                                                                  longitud = 1000000)
dataset_x_train = data_dic['dataset_x']
dataset_y_train = data_dic['dataset_y']

In [None]:
# ELMAN + ruido (ICANN'19)
rnn = rnn_elman.rnn_elman_tf(dim_i, 
                             hidden_size,
                             dim_o,
                             learning_rate,
                             seq_len=seq_len,
                             noise_level=noise_level,
                             num_batches=num_batches,
                             regularizationL1=l1reg, 
                             shock=shock,
                             clipvalue=5.0,
                             #optimizer_name='Adam',
                             factor_inicializacion=0.01
                            )

In [None]:
rnn_tf.train(rnn, 50, dataset_x_train, dataset_y_train, 
             write_tensorboard=False, 
             ruido_progresivo=True, 
             ruido_max = 1.0,
             pendiente_ruido = 2.0,
             l1reg_mem_progresivo=True,
             l1reg_mem_max = 1e-1,
             pendiente_l1reg_mem = 10.0);

In [None]:
ficheros = [input_file_s0, input_file_s1]

data_dic, dim_i, dim_o, _ = rnn_tf.data_transform_multiple_inputs_many_to_many(1,
                                                                  seq_len,
                                                                  input_files=ficheros,
                                                                  output_file=output_file,
                                                                  chars_x = [str(j) for j in range(base)]+['$'],
                                                                  chars_y = [str(j) for j in range(base)],
                                                                  longitud = 1000000000)
dataset_x_test = data_dic['dataset_x']
dataset_y_test = data_dic['dataset_y']

target, pred, accuracy = rnn_tf.test(rnn, dataset_x_test, dataset_y_test)
print ("Test:", accuracy, "%")

## Dual RNN

In [None]:
hidden_size = 10
cpu_size = 20
seq_len = 25

num_batches = 10
noise_level = 0.0
learning_rate = 1e-2
l1reg = 0.0
shock = 0.0

symbols = ['$', 'a', 'b']

In [None]:
base = 10

ficheros = [input_file_s0, input_file_s1]

data_dic, dim_i, dim_o, _ = rnn_tf.data_transform_multiple_inputs_many_to_many(num_batches,
                                                                  seq_len,
                                                                  input_files=ficheros,
                                                                  output_file=output_file,
                                                                  chars_x = [str(j) for j in range(base)]+['$'],
                                                                  chars_y = [str(j) for j in range(base)],
                                                                  longitud = 1000000)
dataset_x_train = data_dic['dataset_x']
dataset_y_train = data_dic['dataset_y']

In [None]:
# DUAL - CPU + Mem (conectada a CPU)
rnn = rnn_xhcy.rnn_xhcy(dim_i, 
                        hidden_size,
                        cpu_size,
                        dim_o,
                        learning_rate,
                        seq_len=seq_len,
                        noise_level=noise_level,
                        num_batches=num_batches,
                        regularizationL1=l1reg, 
                        shock=shock,
                        clipvalue=5.0,
                        #optimizer_name='Adam',
                        factor_inicializacion=0.01
                       )

In [None]:
rnn_tf.train(rnn, 50, dataset_x_train, dataset_y_train, 
             write_tensorboard=False, 
             ruido_progresivo=True, 
             ruido_max = 1.0,
             pendiente_ruido = 2.0,
             l1reg_mem_progresivo=True,
             l1reg_mem_max = 1e-1,
             pendiente_l1reg_mem = 10.0);

In [None]:
ficheros = [input_file_s0, input_file_s1]

data_dic, dim_i, dim_o, _ = rnn_tf.data_transform_multiple_inputs_many_to_many(1,
                                                                  seq_len,
                                                                  input_files=ficheros,
                                                                  output_file=output_file,
                                                                  chars_x = [str(j) for j in range(base)]+['$'],
                                                                  chars_y = [str(j) for j in range(base)],
                                                                  longitud = 1000000000)
dataset_x_test = data_dic['dataset_x']
dataset_y_test = data_dic['dataset_y']

target, pred, accuracy = rnn_tf.test(rnn, dataset_x_test, dataset_y_test)
print ("Test:", accuracy, "%")

# Analysis

In [None]:
f2 = [open(ficheros[i], 'r').read() for i in range(2)]
tabla_combinaciones_sin_acarreo = np.ones((base+1, base+1), dtype=int)*(-1)
tabla_combinaciones_con_acarreo = np.ones((base+1, base+1), dtype=int)*(-1)

acarreo = 0
for i, (s1_str, s2_str) in enumerate(zip([s for s in f2[0]], [s for s in f2[1]])):
    try:
        s1 = int(s1_str)
        s2 = int(s2_str)
    except:
        s1 = -1
        s2 = -1
    
    if acarreo == 0:
        if tabla_combinaciones_sin_acarreo[s1][s2] == -1:
            tabla_combinaciones_sin_acarreo[s1][s2] = i - 1
    elif acarreo == 1:
        if tabla_combinaciones_con_acarreo[s1][s2] == -1:
            tabla_combinaciones_con_acarreo[s1][s2] = i - 1
    
    if s1 == -1: acarreo = 0
    else: acarreo = (s1+s2+acarreo)/base

In [None]:
rnn.v_h_prev = np.zeros((hidden_size, 1))

hs, ys, inputs, outputs, _, _, hzs, cs = rnn.estados(input_files = ficheros, 
                                                 output_file = output_file, 
                                                 longitud=50,
                                                 chars_x = [str(j) for j in range(base)]+['$'],
                                                 chars_y = [str(j) for j in range(base)])
rnn.v_h_prev = hs[0][:, None]
hs, ys, inputs, outputs, _, _, hzs, cs = rnn.estados(input_files = ficheros, 
                                                 output_file = output_file, 
                                                 longitud=40000,
                                                 chars_x = [str(j) for j in range(base)]+['$'],
                                                 chars_y = [str(j) for j in range(base)])

hs.shape

In [None]:
Wxh = rnn.sess.run(rnn.Wxh)
Whh = rnn.sess.run(rnn.Whh)
bh = rnn.sess.run(rnn.bh)
by = rnn.sess.run(rnn.by)

mapa_con_acarreo = []
for n in range(hidden_size):
    mapa_n = []
    
    for s1 in range(base):
        for s2 in range(base):
            vector = np.zeros(base*2+2)[:, None]
            vector[s1] = 1
            vector[(base+1)+s2] = 1
            hprev = hs[tabla_combinaciones_con_acarreo[s1][s2]]
            h_z = np.matmul(Wxh, vector) + np.matmul(Whh, hprev[:, None]) + bh
            mapa_n.append([s1, s2, np.tanh(h_z[n][0])])

    mapa_con_acarreo.append(np.array(mapa_n))
    
mapa_con_acarreo = np.array(mapa_con_acarreo)

mapa_sin_acarreo = []
for n in range(hidden_size):
    mapa_n = []
    
    for s1 in range(base):
        for s2 in range(base):
            vector = np.zeros(base*2+2)[:, None]
            vector[s1] = 1
            vector[(base+1)+s2] = 1
            hprev = hs[tabla_combinaciones_sin_acarreo[s1][s2]]
            h_z = np.matmul(Wxh, vector) + np.matmul(Whh, hprev[:, None]) + bh
            mapa_n.append([s1, s2, np.tanh(h_z[n][0])])

    mapa_sin_acarreo.append(np.array(mapa_n))
    
mapa_sin_acarreo = np.array(mapa_sin_acarreo)

In [None]:
import matplotlib.cm as cm
import IPython.display as display
colormap = cm.get_cmap(name='bwr', lut=1000)

for neurona in range(hidden_size):
    display.display(display.HTML('<h2>Neuron '+str(neurona)+'</h2>'))
    colors_con_acarreo = np.zeros((base, base))
    for punto in mapa_con_acarreo[neurona]:
        colors_con_acarreo[int(punto[0]), int(punto[1])] = punto[2]
        
    colors_sin_acarreo = np.zeros((base, base))
    for punto in mapa_sin_acarreo[neurona]:
        colors_sin_acarreo[int(punto[0]), int(punto[1])] = punto[2]

    fig = plt.figure(figsize=(8, 6))
    plt.subplot(1,2,1)
    ax = fig.gca()
    pos = ax.imshow(colors_sin_acarreo, cmap="gray", vmin=-1, vmax=1, origin='lower')
    plt.grid(True)
    plt.xlabel("sum 1", size=24)
    plt.ylabel("sum 2", size=24)
    ax.set_xticks([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
    ax.set_yticks([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
    ax.tick_params(axis='both', labelsize=20)
    plt.subplot(1,2,2)
    ax = fig.gca()
    pos = ax.imshow(colors_con_acarreo, cmap="gray", vmin=-1, vmax=1, origin='lower')
    plt.grid(True)
    plt.xlabel("sum 1", size=24)
    ax.set_xticks([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
    ax.set_yticks([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
    ax.tick_params(axis='both', labelsize=20)
    #ax.get_ylabel().set_visible(False)
    plt.ylabel("")
    
    #plt.subplots_adjust(wspace=0.5)

    #fig.colorbar(pos)

    plt.show()