In [1]:
#Problemas com ambiente virtual e configuração da gpu 
import os

conda_prefix = os.environ.get('CONDA_PREFIX', '')
os.environ['XLA_FLAGS'] = f'--xla_gpu_cuda_data_dir={conda_prefix}'

In [2]:
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GlobalAveragePooling1D, Dense
import numpy as np

2025-08-25 22:08:45.340175: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-08-25 22:08:45.349261: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1756170525.361076   13438 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1756170525.365028   13438 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-08-25 22:08:45.377014: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

In [8]:
"""
Passagem dos parametros.
Carregamento base de dados.
Pré processamento dos dados.
"""

vocab_size = 10000
maxlen = 200

(X_treino, y_treino), (X_testes, y_testes) = imdb.load_data(num_words=vocab_size)

X_treino_padded = pad_sequences(X_treino, maxlen=maxlen, padding='post', truncating='post')
X_testes_padded = pad_sequences(X_testes, maxlen=maxlen, padding='post', truncating='post')

y_treino = np.array(y_treino)
y_testes = np.array(y_testes)

print("Dados de treino (shape):", X_treino_padded.shape)
print("Dados de teste (shape):", X_testes_padded.shape)

Dados de treino (shape): (25000, 200)
Dados de teste (shape): (25000, 200)


In [4]:
"""
Criação do modelo com embedding, pooling e 1 camada oculta de 24 neuronios.
Compilação com otimizador adam, métrica de acerto acurácia
"""
embedding_dim = 16

model = Sequential([
    Embedding(vocab_size, embedding_dim),
    GlobalAveragePooling1D(),
    Dense(24, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

model.summary()

I0000 00:00:1756170533.083780   13438 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 4123 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3050 6GB Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.6


In [5]:
num_epochs = 10
history = model.fit(
    training_padded,
    training_labels,
    epochs=num_epochs,
    validation_data=(testing_padded, testing_labels),
    verbose=2
)

Epoch 1/10


I0000 00:00:1756170540.476658   13565 service.cc:148] XLA service 0x7fe2a00199a0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1756170540.476684   13565 service.cc:156]   StreamExecutor device (0): NVIDIA GeForce RTX 3050 6GB Laptop GPU, Compute Capability 8.6
2025-08-25 22:09:00.493028: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1756170540.595762   13565 cuda_dnn.cc:529] Loaded cuDNN version 91200
I0000 00:00:1756170541.166270   13565 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


782/782 - 5s - 6ms/step - accuracy: 0.7378 - loss: 0.5222 - val_accuracy: 0.8021 - val_loss: 0.4115
Epoch 2/10
782/782 - 2s - 3ms/step - accuracy: 0.8726 - loss: 0.3089 - val_accuracy: 0.8358 - val_loss: 0.3651
Epoch 3/10
782/782 - 3s - 3ms/step - accuracy: 0.9020 - loss: 0.2506 - val_accuracy: 0.8566 - val_loss: 0.3333
Epoch 4/10
782/782 - 2s - 3ms/step - accuracy: 0.9154 - loss: 0.2190 - val_accuracy: 0.8584 - val_loss: 0.3382
Epoch 5/10
782/782 - 2s - 3ms/step - accuracy: 0.9276 - loss: 0.1938 - val_accuracy: 0.8538 - val_loss: 0.3579
Epoch 6/10
782/782 - 2s - 3ms/step - accuracy: 0.9349 - loss: 0.1781 - val_accuracy: 0.8518 - val_loss: 0.3774
Epoch 7/10
782/782 - 3s - 3ms/step - accuracy: 0.9430 - loss: 0.1601 - val_accuracy: 0.8430 - val_loss: 0.4109
Epoch 8/10
782/782 - 2s - 2ms/step - accuracy: 0.9476 - loss: 0.1477 - val_accuracy: 0.8489 - val_loss: 0.4097
Epoch 9/10
782/782 - 2s - 3ms/step - accuracy: 0.9546 - loss: 0.1346 - val_accuracy: 0.8435 - val_loss: 0.4481
Epoch 10/10


In [6]:
"""
Carrega um dicionario pré existente do imdb.
Cria uma função para receber um review e devolver uma matriz
"""

indice_palavras = imdb.get_word_index()

def preparar_novas_reviews(lista_reviews):
    sequencias = []
    for review_texto in lista_reviews:
        sequencia = [indice_palavras.get(palavra, 2) + 3 for palavra in review_texto.lower().split()] #O "+3" é necessário por causa dos índices 0, 1 e 2, que são reservados no dataset
        sequencias.append(sequencia)
    
    return pad_sequences(sequencias, maxlen=maxlen, padding='post', truncating='post') #Aplica padding para que todas as sequências tenham o mesmo tamanho


reviews_para_teste = [
    "This movie was fantastic! The acting was incredible and the story was amazing",
    "I really did not like this movie. The plot was weak and the acting was terrible",
    "An average movie, nothing special but also not terrible",
]

reviews_preparadas = preparar_novas_reviews(reviews_para_teste)

In [7]:
"""
Carrega o que o modelo prevê 
Itera sobre as reviews e calcula o 'sentimento' e a 'confiança' de cada frase.
Printa os resultados
"""

predicoes = model.predict(reviews_preparadas)

for i, review_texto in enumerate(reviews_para_teste):
    predicao_valor = predicoes[i][0]

    sentimento = "Positivo" if predicao_valor > 0.5 else "Negativo"

    confianca = predicao_valor if predicao_valor > 0.5 else 1 - predicao_valor

    print(f"Review: {review_texto}")
    print(f"Sentimento: {sentimento} (Confiança: {confianca:.2f})")
    print("-" * 70)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 264ms/step
Review: This movie was fantastic! The acting was incredible and the story was amazing
Sentimento: Negativo (Confiança: 0.98)
----------------------------------------------------------------------
Review: I really did not like this movie. The plot was weak and the acting was terrible
Sentimento: Negativo (Confiança: 1.00)
----------------------------------------------------------------------
Review: An average movie, nothing special but also not terrible
Sentimento: Negativo (Confiança: 1.00)
----------------------------------------------------------------------
