In [1]:
import numpy as np
import pandas as pd
import string
import spacy
import re
import tensorflow_datasets as tfds
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from keras.models import Model

import random
from sklearn.model_selection import train_test_split
import tensorflow_datasets as tfds
import os
import time

Using TensorFlow backend.


In [2]:
if tf.config.list_physical_devices('GPU'):
    physical_devices = tf.config.list_physical_devices('GPU')
    tf.config.experimental.set_memory_growth(physical_devices[0], enable=True)
    tf.config.experimental.set_virtual_device_configuration(physical_devices[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=4000)])

# ETAPA DE CARREGAMENTO DO DATASET

In [3]:
data = pd.read_json('tcc1.json', encoding='utf-8')

# ETAPA DE PRÉ-PROCESSAMENTO

In [4]:
titulo_input = ['<start> ' + m + ' <end>' for m in data.título.tolist()]
noticia_input = ['<start> ' + m + ' <end>' for m in data.texto.tolist()]

# ETAPA DE TOKENIZAÇÃO

In [5]:
def token(texto, tam_max):
    tokens = tf.keras.preprocessing.text.Tokenizer(lower=True, filters='', num_words=2**16)
    tokens.fit_on_texts(texto)
    tensor = tokens.texts_to_sequences(texto)
    tensor = tf.keras.preprocessing.sequence.pad_sequences(tensor, padding='post', maxlen=tam_max)
    return tensor, tokens

In [6]:
data_input_tokens, data_input = token(noticia_input, 600)
data_target_tokens, target_input = token(titulo_input, 20)

In [7]:
num_encoder_tokens = data_input_tokens.shape[1]
num_decoder_tokens = data_target_tokens.shape[1]

# Divisão dos dados

In [8]:
input_data_train, input_data_test, input_decoder_train, input_decoder_test = train_test_split(data_input_tokens, data_target_tokens, test_size=0.2)

# Variáveis de configuração da rede

In [9]:
buffer = len(input_data_train)
batch = 16
steps_por_epoca = len(input_data_train)//batch
embedding_dim = 200
units = 1024
vocab_size_input = len(data_input.word_index)+1
vocab_size_target = len(target_input.word_index)+1

# Criação do dataset

In [10]:
encoder_input_data = np.zeros(
    (len(noticia_input), num_encoder_tokens, vocab_size_input),
    dtype='float32')
decoder_input_data = np.zeros(
    (len(noticia_input), num_decoder_tokens, vocab_size_target),
    dtype='float32')

print(encoder_input_data.shape)
print(decoder_input_data.shape)

In [11]:
for i, input_text in enumerate(input_data_train):
    for t, char in enumerate(input_text):
        #print(i, "->", t, "->", data_input_tokens[char])
        encoder_input_data[i, t, char] = 1.

In [12]:
for i, decoder_text in enumerate(input_decoder_train):
    for t, char in enumerate(decoder_text):
        decoder_input_data[i, t, char] = 1.

# Arquitetura da Rede

In [18]:
def define_models(n_input, n_output, n_units):
	# define training encoder
	encoder_inputs = tf.keras.layers.Input(shape=(None, n_input))
	encoder = tf.keras.layers.LSTM(n_units, return_state=True)
	encoder_outputs, state_h, state_c = encoder(encoder_inputs)
	encoder_states = [state_h, state_c]
	# define training decoder
	decoder_inputs = tf.keras.layers.Input(shape=(None, n_output))
	decoder_lstm = tf.keras.layers.LSTM(n_units, return_sequences=True, return_state=True)
	decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state=encoder_states)
	decoder_dense = tf.keras.layers.Dense(n_output, activation='softmax')
	decoder_outputs = decoder_dense(decoder_outputs)
	model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
	# define inference encoder
	encoder_model = Model(encoder_inputs, encoder_states)
	# define inference decoder
	decoder_state_input_h = tf.keras.layers.Input(shape=(n_units,))
	decoder_state_input_c = tf.keras.layers.Input(shape=(n_units,))
	decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
	decoder_outputs, state_h, state_c = decoder_lstm(decoder_inputs, initial_state=decoder_states_inputs)
	decoder_states = [state_h, state_c]
	decoder_outputs = decoder_dense(decoder_outputs)
	decoder_model = Model([decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states)
	# return all models
	return model, encoder_model, decoder_model

In [19]:
# generate target given source sequence
def predict_sequence(infenc, infdec, source, n_steps, cardinality):
	# encode
	state = infenc.predict(source)
	# start of sequence input
	target_seq = array([0.0 for _ in range(cardinality)]).reshape(1, 1, cardinality)
	# collect predictions
	output = list()
	for t in range(n_steps):
		# predict next char
		yhat, h, c = infdec.predict([target_seq] + state)
		# store prediction
		output.append(yhat[0,0,:])
		# update state
		state = [h, c]
		# update target sequence
		target_seq = yhat
	return array(output)

In [20]:
'''checkpoint_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
checkpoint = tf.train.Checkpoint(optimizer=tf.keras.optimizers.Adam(), encoder=encoder, decoder=decoder)'''
train, infenc, infdec = define_models(vocab_size_input, vocab_size_input, 128)
train.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

TypeError: object of type 'LSTM' has no len()

In [None]:
model = Model([encoder_inputs, decoder_inputs], decoder_inputs)

In [None]:
model.summary()

# Compilação do modelo

In [None]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Treinamento do modelo

In [None]:
model.fit([encoder_input_data, decoder_input_data], decoder_input_data, batch_size=batch, epochs=100, validation_split=0.2)

In [None]:
model.save('sumAbstrat.h5')