In [1]:
import numpy as np
import pandas as pd
import string
import spacy
import re
import tensorflow_datasets as tfds
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from keras.models import Model
import random
from sklearn.model_selection import train_test_split
import tensorflow_datasets as tfds
import os
import time

Using TensorFlow backend.


In [2]:
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], enable=True)

# ETAPA DE CARREGAMENTO DO DATASET

In [3]:
data = pd.read_json('tcc1.json', encoding='utf-8')

# ETAPA DE PRÉ-PROCESSAMENTO

In [4]:
titulo_input = ['<start> ' + m + ' <end>' for m in data.título.tolist()]
noticia_input = ['<start> ' + m + ' <end>' for m in data.texto.tolist()]

# ETAPA DE TOKENIZAÇÃO

In [5]:
def token(texto, tam_max):
    tokens = tf.keras.preprocessing.text.Tokenizer(lower=True, filters='', num_words=2**16)
    tokens.fit_on_texts(texto)
    tensor = tokens.texts_to_sequences(texto)
    tensor = tf.keras.preprocessing.sequence.pad_sequences(tensor, padding='post', maxlen=tam_max)
    return tensor, tokens

In [6]:
data_input_tokens, data_input = token(noticia_input, tam_max=600)
data_target_tokens, target_input = token(titulo_input, tam_max=20)

In [7]:
num_encoder_tokens = data_input_tokens.shape[1]
num_decoder_tokens = data_target_tokens.shape[1]

# Divisão dos dados

In [8]:
input_data_train, input_data_test, input_decoder_train, input_decoder_test = train_test_split(data_input_tokens, data_target_tokens, test_size=0.2)

# Variáveis de configuração da rede

In [9]:
buffer = len(input_data_train)
batch = 16
steps_por_epoca = len(input_data_train)//batch
embedding_dim = 200
units = 1024
vocab_size_input = len(data_input.word_index)+1
vocab_size_target = len(target_input.word_index)+1

dataset = tf.data.Dataset.from_tensor_slices((input_data_train, input_decoder_train)).shuffle(buffer)
dataset = dataset.batch(batch, drop_remainder=True)

In [10]:
example_input_batch, example_target_batch = next(iter(dataset))
example_input_batch.shape, example_target_batch.shape

(TensorShape([16, 600]), TensorShape([16, 20]))

# Arquitetura da Rede

In [11]:
class Encoder(tf.keras.Model):
    def __init__(self, vocab_size, emb_dim, units, batch):
        super(Encoder, self).__init__()
        self.batch = batch
        self.units = units
        self.embedding = tf.keras.layers.Embedding(vocab_size, emb_dim)
        self.lstm = tf.keras.layers.GRU(units, return_sequences=True, return_state=True, recurrent_initializer='glorot_uniform')
    
    def call(self, x, hidden):
        x = tf.cast(x, dtype=tf.float32)
        x = self.embedding(x)
        output, state = self.lstm(x, initial_state = hidden)
        return output, state
    
    def intializer_hidden_state(self):
        return tf.zeros((self.batch, self.units))

In [12]:
encoder = Encoder(vocab_size_input, embedding_dim, units, batch)

In [13]:
class BahdanauAttention(tf.keras.layers.Layer):
    def __init__(self, units):
        super(BahdanauAttention, self).__init__()
        self.W1 = tf.keras.layers.Dense(units)
        self.W2 = tf.keras.layers.Dense(units)
        self.V = tf.keras.layers.Dense(1)

    def call(self, query, values):
        query_with_time_axis = tf.expand_dims(query, 1)
        score = self.V(tf.nn.tanh(self.W1(query_with_time_axis) + self.W2(values)))

        attention_weights = tf.nn.softmax(score, axis=1)

        context_vector = attention_weights * values
        context_vector = tf.reduce_sum(context_vector, axis=1)

        return context_vector, attention_weights

In [25]:
class Decoder(tf.keras.Model):
    def __init__(self, vocab_size, emb_dim, units, batch):
        super(Decoder, self).__init__()
        self.batch = batch
        self.units = units
        self.embedding = tf.keras.layers.Embedding(vocab_size, emb_dim)
        self.lstm = tf.keras.layers.LSTM(units, return_sequences=True, return_state=True, recurrent_initializer='glorot_uniform')
        self.attention = BahdanauAttention(self.units)
    
    def call(self, x, hidden, output):
        #print(x)
        context_vector, attention_weights = self.attention(hidden, output)
        #print(context_vector)
        x = self.embedding(x)
        #print(x)
        x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)
        output, state = self.gru(x)
        output = tf.reshape(output, (-1, output.shape[2]))
        return output, state, attention_weights

In [26]:
decoder = Decoder(vocab_size_target, embedding_dim, units, batch)

In [27]:
def erro(real, pred):
    mask = tf.math.logical_not(tf.math.equal(real, 0))
    loss_ = loss_object(real, pred)

    mask = tf.cast(mask, dtype=loss_.dtype)
    loss_ *= mask

    return tf.reduce_mean(loss_)

In [28]:
checkpoint_dir = './treinamento_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
checkpoint = tf.train.Checkpoint(optimizer=tf.keras.optimizers.Adam(), encoder=encoder, decoder=decoder)

In [29]:
print(target_input.word_index['<start>'])
print([target_input.word_index['<start>']] * batch)
print(target_input.word_index['<start>'] * batch)

1
[]
0


In [36]:
@tf.function
def treino(input_data, target, hidden):
    loss = 0
    #print(input_data)
    #print(target)
    with tf.GradientTape() as tape:
        encoder_output, hidden = encoder(input_data, hidden)
        print(encoder_output)
        decoder_input = tf.expand_dims([target_input.word_index['<start>']] * batch, 1)
        print(decoder_input)
        for t in range(1, target.shape[1]):
            predictions, dec_hidden, _ = decoder(decoder_input, hidden, encoder_output)
            loss += erro(target[:, t], predictions)
            # using teacher forcing
            dec_input = tf.expand_dims(target[:, t], 1)

    batch_loss = (loss / int(target.shape[1]))
    variables = encoder.trainable_variables + decoder.trainable_variables
    gradients = tape.gradient(loss, variables)
    optimizer.apply_gradients(zip(gradients, variables))

    return batch_loss

In [37]:
epocas = 10
erro_acumulado = []
for e in range(epocas):
    inicio = time.time()
    hidden = encoder.intializer_hidden_state()
    erro_total = 0
    for (batch, (input_data, target)) in enumerate(dataset.take(steps_por_epoca)):
        batch_loss = treino(input_data, target, hidden)
        erro_total += batch_loss
        
    print('Epoca {} Erro {:.04f}'.format(epoca+1, erro_total/steps+por_epoca))
    erro_acumulado.append(epoca+1, erro_total/steps+por_epoca)
    print('Para uma época levou {}s'.format(time.time() - inicio))
checkpoint.save(file_prefix = checkpoint_prefix)

Tensor("encoder/gru/transpose_1:0", shape=(16, 600, 1024), dtype=float32)
Tensor("ExpandDims:0", shape=(0, 1), dtype=float32)


ValueError: in converted code:

    <ipython-input-36-b6cd02ba9b49>:12 treino  *
        predictions, dec_hidden, _ = decoder(decoder_input, hidden, encoder_output)
    C:\Users\renat\Anaconda3\lib\site-packages\tensorflow_core\python\keras\engine\base_layer.py:778 __call__
        outputs = call_fn(cast_inputs, *args, **kwargs)
    <ipython-input-25-756b9c2be4ea>:16 call  *
        x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)
    C:\Users\renat\Anaconda3\lib\site-packages\tensorflow_core\python\util\dispatch.py:180 wrapper
        return target(*args, **kwargs)
    C:\Users\renat\Anaconda3\lib\site-packages\tensorflow_core\python\ops\array_ops.py:1517 concat
        return gen_array_ops.concat_v2(values=values, axis=axis, name=name)
    C:\Users\renat\Anaconda3\lib\site-packages\tensorflow_core\python\ops\gen_array_ops.py:1126 concat_v2
        "ConcatV2", values=values, axis=axis, name=name)
    C:\Users\renat\Anaconda3\lib\site-packages\tensorflow_core\python\framework\op_def_library.py:742 _apply_op_helper
        attrs=attr_protos, op_def=op_def)
    C:\Users\renat\Anaconda3\lib\site-packages\tensorflow_core\python\framework\func_graph.py:595 _create_op_internal
        compute_device)
    C:\Users\renat\Anaconda3\lib\site-packages\tensorflow_core\python\framework\ops.py:3322 _create_op_internal
        op_def=op_def)
    C:\Users\renat\Anaconda3\lib\site-packages\tensorflow_core\python\framework\ops.py:1786 __init__
        control_input_ops)
    C:\Users\renat\Anaconda3\lib\site-packages\tensorflow_core\python\framework\ops.py:1622 _create_c_op
        raise ValueError(str(e))

    ValueError: Dimension 0 in both shapes must be equal, but are 16 and 0. Shapes are [16,1] and [0,1]. for 'decoder_1/concat' (op: 'ConcatV2') with input shapes: [16,1,1024], [0,1,200], [] and with computed input tensors: input[2] = <-1>.


In [None]:
model = Model([encoder_inputs, decoder_inputs], decoder_inputs)

In [None]:
model.summary()

# Compilação do modelo

In [None]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Treinamento do modelo

In [None]:
model.fit([encoder_input_data, decoder_input_data], decoder_input_data, batch_size=batch, epochs=100, validation_split=0.2)

In [None]:
model.save('sumAbstrat.h5')