In [16]:
import numpy as np
import pandas as pd
import string
import spacy
import re
import tensorflow_datasets as tfds
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from keras.models import Model
import random
from sklearn.model_selection import train_test_split
import tensorflow_datasets as tfds
import os
import time

In [2]:
if tf.config.list_physical_devices('GPU'):
    physical_devices = tf.config.list_physical_devices('GPU')
    tf.config.experimental.set_memory_growth(physical_devices[0], enable=True)
    tf.config.experimental.set_virtual_device_configuration(physical_devices[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=4000)])

# ETAPA DE CARREGAMENTO DO DATASET

In [3]:
data = pd.read_json('tcc1.json', encoding='utf-8')

# ETAPA DE PRÉ-PROCESSAMENTO

In [4]:
titulo_input = ['<start> ' + m + ' <end>' for m in data.título.tolist()]
noticia_input = ['<start> ' + m + ' <end>' for m in data.texto.tolist()]

# ETAPA DE TOKENIZAÇÃO

In [5]:
def token(texto, tam_max):
    tokens = tf.keras.preprocessing.text.Tokenizer(lower=True, filters='', num_words=2**16)
    tokens.fit_on_texts(texto)
    tensor = tokens.texts_to_sequences(texto)
    tensor = tf.keras.preprocessing.sequence.pad_sequences(tensor, padding='post', maxlen=tam_max)
    return tensor, tokens

In [6]:
data_input_tokens, data_input = token(noticia_input, 600)
data_target_tokens, target_input = token(titulo_input, 20)

In [7]:
num_encoder_tokens = data_input_tokens.shape[1]
num_decoder_tokens = data_target_tokens.shape[1]

# Divisão dos dados

In [8]:
input_data_train, input_data_test, input_decoder_train, input_decoder_test = train_test_split(data_input_tokens, data_target_tokens, test_size=0.2)

# Variáveis de configuração da rede

In [9]:
buffer = len(input_data_train)
batch = 16
steps_por_epoca = len(input_data_train)//batch
embedding_dim = 200
units = 1024
vocab_size_input = len(data_input.word_index)+1
vocab_size_target = len(target_input.word_index)+1

dataset = tf.data.Dataset.from_tensor_slices((input_data_train, input_decoder_train)).shuffle(buffer)
dataset = dataset.batch(batch, drop_remainder=True)

# Criação do dataset

In [72]:
encoder_input_data = np.zeros(
    (len(noticia_input), num_encoder_tokens, vocab_size_input), dtype='float32')
decoder_input_data = np.zeros(
    (len(noticia_input), num_decoder_tokens, vocab_size_target), dtype='float32')

MemoryError: 

print(encoder_input_data.shape)
print(decoder_input_data.shape)

In [11]:
for i, input_text in enumerate(input_data_train):
    for t, char in enumerate(input_text):
        encoder_input_data[i, t, char] = 1.

In [12]:
for i, decoder_text in enumerate(input_decoder_train):
    for t, char in enumerate(decoder_text):
        decoder_input_data[i, t, char] = 1.

# Arquitetura da Rede

In [42]:
class Encoder(tf.keras.Model):
    def __init__(self, vocab_size, emb_dim, units, batch):
        super(Encoder, self).__init__()
        self.batch = batch
        self.units = units
        self.embedding = tf.keras.layers.Embedding(vocab_size, emb_dim)
        self.lstm = tf.keras.layers.LSTM(units, return_sequences=True, return_state=True, recurrent_initializer='glorot_uniform')
    
    def call(self, x, hidden):
        x = self.embedding(x)
        output, state = self.lstm(x, initial_state = hidden)
        return output, state
    def intializer_hidden_state(self):
        return tf.zeros((self.batch, self.units))

In [43]:
encoder = Encoder(vocab_size_input, embedding_dim, units, batch)

In [44]:
class BahdanauAttention(tf.keras.layers.Layer):
    def __init__(self, units):
        super(BahdanauAttention, self).__init__()
        self.W1 = tf.keras.layers.Dense(units)
        self.W2 = tf.keras.layers.Dense(units)
        self.V = tf.keras.layers.Dense(1)

    def call(self, query, values):
        query_with_time_axis = tf.expand_dims(query, 1)
        score = self.V(tf.nn.tanh(self.W1(query_with_time_axis) + self.W2(values)))

        attention_weights = tf.nn.softmax(score, axis=1)

        context_vector = attention_weights * values
        context_vector = tf.reduce_sum(context_vector, axis=1)

        return context_vector, attention_weights

In [45]:
attention_layer = BahdanauAttention(10)

In [53]:
class Decoder(tf.keras.Model):
    def __iniit__(self, vocab_size, emb_dim, units, batch):
        super(Decoder, self).__init__()
        self.batch = batch
        self.units = units
        self.embedding = tf.keras.layers.Embedding(vocab_size, emb_dim)
        self.lstm = tf.keras.layers.LSTM(units, return_sequences=True, return_state=True, recurrent_initializer='glorot_uniform')
        self.attention = BahdanauAttention(self.units)
    
    def call(self, x, hidden, output):
        context_vector, attention_weights = self.attention(hidden, output)
        x = self.embedding(x)
        x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)
        output, state = self.gru(x)
        output = tf.reshape(output, (-1, output.shape[2]))
        return output, state, attention_weights

In [54]:
decoder = Decoder(vocab_size_target, embedding_dim, units, batch)

In [55]:
def erro(real, pred):
    mask = tf.math.logical_not(tf.math.equal(real, 0))
    loss_ = loss_object(real, pred)

    mask = tf.cast(mask, dtype=loss_.dtype)
    loss_ *= mask

    return tf.reduce_mean(loss_)

In [56]:
checkpoint_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
checkpoint = tf.train.Checkpoint(optimizer=tf.keras.optimizers.Adam(), encoder=encoder, decoder=decoder)

In [57]:
@tf.function
def treino(input_data, target, hidden):
    loss = 0

    with tf.GradientTape() as tape:
        enc_output, hidden = encoder(input_data, hidden)
        dec_hidden = hidden
        dec_input = tf.expand_dims([targ_lang.word_index['<start>']] * batch, 1)

        for t in range(1, targ.shape[1]):
            predictions, dec_hidden, _ = decoder(dec_input, dec_hidden, enc_output)
            loss += erro(targ[:, t], predictions)
            # using teacher forcing
            dec_input = tf.expand_dims(targ[:, t], 1)

    batch_loss = (loss / int(targ.shape[1]))
    variables = encoder.trainable_variables + decoder.trainable_variables
    gradients = tape.gradient(loss, variables)
    optimizer.apply_gradients(zip(gradients, variables))

    return batch_loss

In [63]:
(input_data, target, hidden)

(<tf.Tensor: shape=(16, 600), dtype=int32, numpy=
 array([[   42,     2,   185, ...,     0,     0,     0],
        [ 3738,     4,   161, ...,  1738, 19778,    43],
        [   42,  3153,     6, ...,     0,     0,     0],
        ...,
        [   42,  4625,    31, ...,     0,     0,     0],
        [   42,     2,  9570, ...,     0,     0,     0],
        [ 3023,   299,    41, ...,  1367, 18348,    43]])>,
 <tf.Tensor: shape=(16, 20), dtype=int32, numpy=
 array([[   1,   94,    5,   35,  371,  173, 1332,   60,  336, 1333,    3,
           59,   29, 1334,    2,    0,    0,    0,    0,    0],
        [   1,   72,   53, 1448,    6,   98,  234,    8,  113,   15, 1449,
            3, 1450,    2,    0,    0,    0,    0,    0,    0],
        [   1,  997,    7,  338,  410,   39,  998,    3,  999,    8,  159,
            9,  337,    4, 1000,    7, 1001,    2,    0,    0],
        [ 107,    5,   21,  108,   13,   93,    8,  983,  984,    3,  985,
          986,   74,  169,    7,  170,  107,    3, 

In [71]:
epocas = 10
erro_acumulado = []
for e in range(epocas):
    inicio = time.time()
    hidden = encoder.intializer_hidden_state()
    erro_total = 0
    for (batch, (input_data, target)) in enumerate(dataset.take(steps_por_epoca)):
        batch_loss = treino(input_data, target, hidden)
        erro_total += batch_loss
        
    print('Epoca {} Erro {:.04f}'.format(epoca+1, erro_total/steps+por_epoca))
    erro_acumulado.append(epoca+1, erro_total/steps+por_epoca)
    print('Para uma época levou {}s'.format(time.time() - inicio))
checkpoint.save(file_prefix = checkpoint_prefix)

ValueError: in converted code:

    <ipython-input-36-97a6f1c6cfba>:6 treino  *
        enc_output, hidden = encoder(input_data, hidden)
    C:\Users\renat\Anaconda3\lib\site-packages\tensorflow_core\python\keras\engine\base_layer.py:778 __call__
        outputs = call_fn(cast_inputs, *args, **kwargs)
    <ipython-input-42-aad46f990c03>:11 call  *
        output, state = self.lstm(x, initial_state = hidden)
    C:\Users\renat\Anaconda3\lib\site-packages\tensorflow_core\python\keras\layers\recurrent.py:697 __call__
        return super(RNN, self).__call__(inputs, **kwargs)
    C:\Users\renat\Anaconda3\lib\site-packages\tensorflow_core\python\keras\engine\base_layer.py:748 __call__
        self._maybe_build(inputs)
    C:\Users\renat\Anaconda3\lib\site-packages\tensorflow_core\python\keras\engine\base_layer.py:2116 _maybe_build
        self.build(input_shapes)
    C:\Users\renat\Anaconda3\lib\site-packages\tensorflow_core\python\keras\layers\recurrent.py:574 build
        self._validate_state_spec(state_size, self.state_spec)
    C:\Users\renat\Anaconda3\lib\site-packages\tensorflow_core\python\keras\layers\recurrent.py:605 _validate_state_spec
        raise validation_error

    ValueError: An `initial_state` was passed that is not compatible with `cell.state_size`. Received `state_spec`=ListWrapper([InputSpec(shape=(0, 1024), ndim=2)]); however `cell.state_size` is [1024, 1024]


In [None]:
model = Model([encoder_inputs, decoder_inputs], decoder_inputs)

In [None]:
model.summary()

# Compilação do modelo

In [None]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Treinamento do modelo

In [None]:
model.fit([encoder_input_data, decoder_input_data], decoder_input_data, batch_size=batch, epochs=100, validation_split=0.2)

In [None]:
model.save('sumAbstrat.h5')