# Importación de librerías

In [1]:
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing import sequence
import tensorflow as tf
import numpy as np

# Cargar datos con un límite de palabras
(train_texts, train_labels), (test_texts, test_labels) = imdb.load_data(num_words=50000)

# Ajuste de longitud de secuencias
train_texts_padded = sequence.pad_sequences(train_texts, maxlen=150)
test_texts_padded = sequence.pad_sequences(test_texts, maxlen=150)


# Preparación del índice de palabras y listas de palabras clave

In [2]:
# Obtención e inversión del índice de palabras para decodificación
word_index = tf.keras.datasets.imdb.get_word_index()
inverted_index = {i + 3: word for word, i in word_index.items()}
inverted_index[1] = "[START]"
inverted_index[2] = "[OOV]"

# Listas de palabras con connotación positiva y negativa
positive_set = {"good", "great", "excellent", "amazing", "awesome", "fantastic"}
negative_set = {"bad", "terrible", "horrible", "worst", "awful"}


# Extracción y escalado de características

In [3]:
from sklearn.preprocessing import StandardScaler

def calculate_features(sequences):
    calculated_features = []
    for seq in sequences:
        decoded = [inverted_index.get(i, "") for i in seq]
        positive_hits = sum(word in positive_set for word in decoded)
        negative_hits = sum(word in negative_set for word in decoded)
        total_words = len(decoded)
        ratio = (positive_hits - negative_hits) / total_words if total_words > 0 else 0
        calculated_features.append([len(seq), ratio])
    return np.array(calculated_features)

# Extracción y normalización de características
features_train = calculate_features(train_texts_padded)
features_test = calculate_features(test_texts_padded)

scaler = StandardScaler()
features_train_scaled = scaler.fit_transform(features_train)
features_test_scaled = scaler.transform(features_test)


# Construcción y compilación del modelo

In [4]:
from tensorflow.keras.layers import Input, Concatenate, LSTM, Dense, Embedding, Dropout
from tensorflow.keras.models import Model

sequence_input = Input(shape=(150,))
embedded_sequence = Embedding(50000, 128)(sequence_input)
lstm_layer = LSTM(128, return_sequences=True)(embedded_sequence)
lstm_layer = Dropout(0.3)(lstm_layer)
lstm_layer = LSTM(64, return_sequences=False)(lstm_layer)
lstm_layer = Dropout(0.3)(lstm_layer)

feature_input = Input(shape=(2,))
merged_layers = Concatenate()([lstm_layer, feature_input])

dense_layers = Dense(64, activation='relu')(merged_layers)
dense_layers = Dropout(0.5)(dense_layers)
dense_layers = Dense(32, activation='relu')(dense_layers)
output_layer = Dense(1, activation='sigmoid')(dense_layers)

model = Model(inputs=[sequence_input, feature_input], outputs=output_layer)
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])


# Entrenamiento y evaluación del modelo

In [5]:
# Entrenamiento del modelo
model.fit([train_texts_padded, features_train_scaled], train_labels, 
          batch_size=32, 
          epochs=15, 
          validation_data=([test_texts_padded, features_test_scaled], test_labels), 
          verbose=2)

# Evaluación del modelo
test_loss, test_accuracy = model.evaluate([test_texts_padded, features_test_scaled], test_labels, batch_size=32, verbose=2)
print('Pérdida de prueba:', test_loss)
print('Exactitud de prueba:', test_accuracy)


Epoch 1/15
782/782 - 163s - 208ms/step - accuracy: 0.7940 - loss: 0.4427 - val_accuracy: 0.8641 - val_loss: 0.3273
Epoch 2/15
782/782 - 168s - 215ms/step - accuracy: 0.9174 - loss: 0.2280 - val_accuracy: 0.8600 - val_loss: 0.3323
Epoch 3/15
782/782 - 169s - 216ms/step - accuracy: 0.9544 - loss: 0.1307 - val_accuracy: 0.8450 - val_loss: 0.5018
Epoch 4/15
782/782 - 170s - 217ms/step - accuracy: 0.9746 - loss: 0.0759 - val_accuracy: 0.8441 - val_loss: 0.4903
Epoch 5/15
782/782 - 170s - 218ms/step - accuracy: 0.9854 - loss: 0.0460 - val_accuracy: 0.8423 - val_loss: 0.6183
Epoch 6/15
782/782 - 171s - 219ms/step - accuracy: 0.9892 - loss: 0.0351 - val_accuracy: 0.8366 - val_loss: 0.5982
Epoch 7/15
782/782 - 190s - 243ms/step - accuracy: 0.9894 - loss: 0.0349 - val_accuracy: 0.8382 - val_loss: 0.9565
Epoch 8/15
782/782 - 210s - 268ms/step - accuracy: 0.9929 - loss: 0.0236 - val_accuracy: 0.8322 - val_loss: 0.7305
Epoch 9/15
782/782 - 228s - 292ms/step - accuracy: 0.9950 - loss: 0.0155 - val_a