In [None]:
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
plt.style.use('default')
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from keras.models import Model
from keras.layers import Input, Dense, LSTM
import tensorflow_hub as hub
import tensorflow as tf
import tensorflow_text
from tensorflow.keras.utils import plot_model
from keras.callbacks import EarlyStopping
from sklearn.metrics import classification_report

# Baixar o modelo BERT pré-treinado

In [None]:
text_input = Input(shape=(), dtype=tf.string)
preprocessor = hub.KerasLayer(
    "https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3",
    trainable=False)
encoder_inputs = preprocessor(text_input)
encoder = hub.KerasLayer(
    "https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-2_H-128_A-2/2",
    trainable=False)
outputs = encoder(encoder_inputs)
pooled_output = outputs["pooled_output"]      # [batch_size, 128].
sequence_output = outputs["sequence_output"]  # [batch_size, seq_length, 128].

# Mapear frases para embeddings

In [None]:
embedding_model = Model(text_input, pooled_output)
sentences = tf.constant(["Fruit flies like fruits"])
print(embedding_model(sentences)[0][0:30])


# Um modelo incorporando BERT

In [None]:
saida = Dense(2, activation='softmax')(pooled_output)
rede_neural = Model(text_input, saida)
rede_neural.compile(optimizer='adam', loss='categorical_crossentropy')
plot_model(rede_neural, show_shapes=True, show_layer_activations=True)

In [None]:
df = pd.read_csv('./datasets/IMDB Dataset.csv')
reviews = list(df['review'])
labels = np.array([list(df['sentiment'])]).T
ohe = OneHotEncoder()
y_ohe = ohe.fit_transform(labels).toarray()

X_train, X_test, y_train, y_test = train_test_split(reviews, y_ohe, test_size=0.2)

In [None]:
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=3, restore_best_weights=True)
history = rede_neural.fit(tf.convert_to_tensor(X_train), y_train, epochs=500, validation_split=0.2, callbacks=es)

In [None]:
y_est = rede_neural.predict(tf.convert_to_tensor(X_test))
print(classification_report(ohe.inverse_transform(y_test), ohe.inverse_transform(y_est)))

# Modelo com BERT palavra-a-palavra, e LSTM

In [None]:
agregador = LSTM(256)(sequence_output)
saida = Dense(2, activation='softmax')(agregador)
rede_neural = Model(text_input, saida)
rede_neural.compile(optimizer='adam', loss='categorical_crossentropy')
plot_model(rede_neural, show_shapes=True, show_layer_activations=True)

In [None]:
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=3, restore_best_weights=True)
history = rede_neural.fit(tf.convert_to_tensor(X_train), y_train, epochs=500, validation_split=0.2, callbacks=es)

In [None]:
y_est = rede_neural.predict(tf.convert_to_tensor(X_test))
print(classification_report(ohe.inverse_transform(y_test), ohe.inverse_transform(y_est)))