In [2]:
# Importar Keras, TensorFlow y otras librerías útiles

import tensorflow as tf
from tensorflow import keras

import numpy as np

import matplotlib
import matplotlib.pyplot as plt

In [3]:
# Importar el dataset

imdb_dataset = keras.datasets.imdb

"""
Este dataset es utilizado para la clasificación binaria de reseñas (positivas o negativas) de peliculas.
Consiste en 25.000 reseñas tomadas de IMDB, etiquetadas segun sentimiento (positivo o negativo).
Estas reseñas ya han sido pre procesadas, y cada una codificada como una secuencia de índices (enteros).
Estos indices represan palabras que aparecen en la reseña.
Estas palabras fueron indexadas según su frecuencia en el dataset, es decir el índice 5 representa a la
quinta palabra más frecuente en el dataset. Esto permite un rápido filtro durante opeaciones tales como
considerar solo las 5000 palabras más frecuentes del vocabularios y otros.
"""

# Al cargar el dataset se obtienen 4 arreglos NumPy
"""
x_train, x_test: list of sequences, which are lists of indexes (integers).
    If the num_words argument was specific, the maximum possible index value is num_words-1.
    If the maxlen argument was specified, the largest possible sequence length is maxlen.
    
y_train, y_test: list of integer labels (1 for positive or 0 for negative).
"""

maximum_index = 20000

(train_data, train_labels), (test_data, test_labels) = imdb_dataset.load_data(num_words=maximum_index)


print("TRAIN DATA:", train_data[0], "....\n")
print("TRAIN LABELS:", train_labels)

# Si quisieramos cambiar la proporción de training data vs test data podemos concatenar los datasets y volver
# a dividirlos

all_data = np.concatenate((train_data, test_data), axis=0)
all_labels = np.concatenate((train_labels, test_labels), axis=0)

TRAIN DATA: [1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65, 458, 4468, 66, 3941, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 2, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 2, 336, 385, 39, 4, 172, 4536, 1111, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2025, 19, 14, 22, 4, 1920, 4613, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 1247, 4, 22, 17, 515, 17, 12, 16, 626, 18, 19193, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2223, 5244, 16, 480, 66, 3785, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 1415, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 10311, 8, 4, 107, 117, 5952, 15, 256, 4, 2, 7, 3766, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 12118, 1029, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2071, 56, 26, 141, 6, 194, 7486, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 5535, 18, 51, 36, 28, 224, 92, 25, 104, 4, 226, 65, 16, 38, 1334, 88, 12, 16, 283, 5, 16, 4472, 113, 103, 32, 15, 16, 5345, 19, 178, 32] ....

TRAIN LABELS: [1 0 0 ... 0 1 0

In [4]:
# Ejemplo de como se ve una de las reviews
indice_ejemplo = 0

index = imdb_dataset.get_word_index()
reverse_index = dict([(value, key) for (key, value) in index.items()]) 
decoded = " ".join( [reverse_index.get(i - 3, "#") for i in test_data[indice_ejemplo]] )
print("Review: ", decoded, "\n")
print("Label: ", test_labels[indice_ejemplo])

Review:  # this film requires a lot of patience because it focuses on mood and character development the plot is very simple and many of the scenes take place on the same set in frances austen's the sandy dennis character apartment but the film builds to a disturbing climax br br the characters create an atmosphere rife with sexual tension and psychological trickery it's very interesting that robert altman directed this considering the style and structure of his other films still the trademark altman audio style is evident here and there i think what really makes this film work is the brilliant performance by sandy dennis it's definitely one of her darker characters but she plays it so perfectly and convincingly that it's scary michael burns does a good job as the mute young man regular altman player michael murphy has a small part the # moody set fits the content of the story very well in short this movie is a powerful study of loneliness sexual repression and desperation be patient #

In [5]:
# Normalización del training y test set.

def normalizar(secuencias, dimension = maximum_index):
    #relleno con 0 cuando la dimension es menor
    normalizado = np.zeros((len(secuencias), dimension))
    for i, secuencia in enumerate(secuencias):
        normalizado[i, secuencia] = 1
    return normalizado

normalized_data = normalizar(all_data)
# Convierto los labels a floats
float_labels = np.array(all_labels).astype("float32")

In [13]:
# Separo el dataset en los casos utilizados para entrenamiento y para pruebas.
factor_entrenamiento = 80 #porcentaje para entrenamiento, el resto para testing
cantidad_casos = len(normalized_data)
cantidad_entrenamiento = (int)((cantidad_casos * factor_entrenamiento) / 100) 

test_data = normalized_data[cantidad_entrenamiento:]
test_labels = float_labels[cantidad_entrenamiento:]
train_data = normalized_data[:cantidad_entrenamiento]
train_labels = float_labels[:cantidad_entrenamiento]

In [8]:
model = keras.models.Sequential()
# Input - Layer
model.add(keras.layers.Dense(50, activation = "relu", input_shape=(maximum_index, )))
# Hidden - Layers
model.add(keras.layers.Dropout(0.3, noise_shape=None, seed=None))
# Dropout selecciona neuronas al azar para ignorar durante el entrenamiento, previene overfitting
model.add(keras.layers.Dense(50, activation = "relu"))
model.add(keras.layers.Dropout(0.2, noise_shape=None, seed=None))
model.add(keras.layers.Dense(50, activation = "relu"))
# Output- Layer
model.add(keras.layers.Dense(1, activation = "sigmoid"))
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 50)                1000050   
_________________________________________________________________
dropout (Dropout)            (None, 50)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 50)                2550      
_________________________________________________________________
dropout_1 (Dropout)          (None, 50)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 50)                2550      
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 51        
Total params: 1,005,201
Trainable params: 1,005,201
Non-trainable params: 0
______________________________________________

In [14]:
# Compila el modelo, utiliza Adam como optimizador
model.compile(
 optimizer = "adam",
 loss = "binary_crossentropy",
 metrics = ["accuracy"]
)

In [17]:
results = model.fit(
 train_data, train_labels,
 epochs = 4,
 batch_size = 500,
 validation_data = (test_data, test_labels)
)

Train on 40000 samples, validate on 10000 samples
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


In [18]:
print("Precisión del modelo:" , np.mean(results.history["val_accuracy"]))

Precisión del modelo: 0.89175


In [25]:
# Ejemplo tomo una review
review_index = 0
decoded = " ".join( [reverse_index.get(i - 3, "#") for i in all_data[review_index]] )
print("Reseña: ", decoded, "\n")
print("Esperado : ", all_labels[review_index])

### Ejemplo de predicción con modelo entrenado ###

prediccion = model.predict(test_data)

print(prediccion)
print("Prediccion:" , prediccion[review_index])

# Obtener el caracter con mayores posibilidades de matchear

#print("Índice dentro del array del caracter con mayores probabilidades de matchear:", np.argmax(predictions[0]))

# Examinar el valor asociado a caracter

#print("Dígito predecido:",test_labels[7])

Reseña:  # this film was just brilliant casting location scenery story direction everyone's really suited the part they played and you could just imagine being there robert # is an amazing actor and now the same being director # father came from the same scottish island as myself so i loved the fact there was a real connection with this film the witty remarks throughout the film were great it was just brilliant so much that i bought the film as soon as it was released for retail and would recommend it to everyone to watch and the fly fishing was amazing really cried at the end it was so sad and you know what they say if you cry at a film it must have been good and this definitely was also congratulations to the two little boy's that played the # of norman and paul they were just brilliant children are often left out of the praising list i think because the stars that play them all grown up are such a big profile for the whole film but these children are amazing and should be praised fo