In [7]:
from keras.datasets import imdb

#Cargamos los datos de imdb

(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=10000)

In [24]:
#Así se podría decodificar una reseña de imdb de vuelta a texto
import random

index = random.randint(0, len(train_data))

word_index = imdb.get_word_index() #Obtiene un diccionario para mapear cada palabra a un índice numérico único
reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])

decoded_review = ' '.join([reverse_word_index.get(i- 3, '?') for i in train_data[index]])

print(decoded_review)
print(train_labels[index])

? true the setting in paris is great the actors are fine the story is a twisted morality play is it supposed to say that if you want someone badly enough it's ok to hurt everyone else along the way in a real romance you sort of want less cliché than the man who has become bored with his wife and is willing to dump his family and the woman who is ok with ? him to do this so what if they are decent looking and if karen allen shows off her body the characters are still self absorbed and ? maybe the moral of the story is you get what you deserve i give it a 4 only for the fast ? potential through the male interest bits
0


In [9]:
import numpy as np

#Codificación one-hot: vector de tamaño 10.000 donde los indices de las palabras de la reseña se establecen en 1 y el resto en 0

def vectorize_sequences(sequences, dimension=10000):
 results = np.zeros((len(sequences), dimension))
 for i, sequence in enumerate(sequences):
     results[i, sequence] = 1.
 return results
 
x_train = vectorize_sequences(train_data)
x_test = vectorize_sequences(test_data)

In [10]:
y_train = np.asarray(train_labels).astype('float32')
y_test = np.asarray(test_labels).astype('float32')

In [11]:
from keras import models
from keras import layers

#Input: texto vectorizado -> Capa de 16 unidades (relu) -> Capa de 16 unidades (relu) -> Capa de 1 unidad (sigmoide) -> Output: provabilidad [0-1]

model = models.Sequential()
#model.add(layers.Dense(16, activation='relu', input_shape=(10000,)))
model.add(layers.Input(shape=(10000,)))
model.add(layers.Dense(16, activation='relu'))
model.add(layers.Dense(16, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

In [12]:
model.compile(optimizer='rmsprop',
loss='binary_crossentropy',
metrics=['accuracy'])

In [13]:
x_val = x_train[:10000]
partial_x_train = x_train[10000:]
y_val = y_train[:10000]
partial_y_train = y_train[10000:]

In [14]:
history = model.fit(partial_x_train,
    partial_y_train,
    epochs=20,
    batch_size=512,
    validation_data=(x_val, y_val)
)

Epoch 1/20
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 23ms/step - accuracy: 0.6757 - loss: 0.6112 - val_accuracy: 0.8339 - val_loss: 0.4294
Epoch 2/20
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.8889 - loss: 0.3618 - val_accuracy: 0.8664 - val_loss: 0.3471
Epoch 3/20
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.9198 - loss: 0.2632 - val_accuracy: 0.8857 - val_loss: 0.2984
Epoch 4/20
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.9389 - loss: 0.2053 - val_accuracy: 0.8848 - val_loss: 0.2903
Epoch 5/20
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.9477 - loss: 0.1702 - val_accuracy: 0.8887 - val_loss: 0.2809
Epoch 6/20
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.9565 - loss: 0.1458 - val_accuracy: 0.8850 - val_loss: 0.2874
Epoch 7/20
[1m30/30[0m [32m━━━━━━━━

In [15]:
import matplotlib.pyplot as plt

history_dict = history.history
loss_values = history_dict['loss']
val_loss_values = history_dict['val_loss']
epochs = range(1, len(loss_values) + 1)
plt.plot(epochs, loss_values, 'bo', label='Training loss')
plt.plot(epochs, val_loss_values, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

ModuleNotFoundError: No module named 'matplotlib'

In [16]:
 plt.clf()
 acc_values = history_dict['acc']
 val_acc_values = history_dict['val_acc']

 plt.plot(epochs, acc_values, 'bo', label='Training acc')
 plt.plot(epochs, val_acc_values, 'b', label='Validation acc')
 plt.title('Training and validation accuracy')
 plt.xlabel('Epochs')
 plt.ylabel('Loss')
 plt.legend()
 plt.show()

NameError: name 'plt' is not defined

In [17]:
model.predict(x_test)

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step


array([[0.02660017],
       [0.9999193 ],
       [0.91328114],
       ...,
       [0.00318769],
       [0.01363045],
       [0.8855818 ]], dtype=float32)

In [36]:
def text_to_sequence(text):
    word_index = imdb.get_word_index() #Obtiene un diccionario para mapear cada palabra a un índice numérico único
    reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])
    words = text.lower().split()

    #Creamos un vector de tamaño 10,000 y asignamos 1s en las posiciones de los índices de las palabras
    vector = np.zeros(10000)
    for word in words:
        index = word_index.get(word, None)
        if index is not None and index < 10000:  # Asegurarse de que el índice esté dentro del rango permitido
            vector[index] = 1.0
        
    return vector


text_review = input("Escribe una reseña: ")
input_vector = text_to_sequence(text_review)

print(input_vector)

prediction = model.predict(np.array([input_vector]))
print(f"Reseña {"positiva" if prediction[0] > 0.5 else "negativa"}: {prediction[0]}")


KeyboardInterrupt: Interrupted by user

In [39]:
# Validación K-fold
import tensorflow
import numpy as np
from keras import models, layers
from keras.datasets import imdb
from keras.utils import to_categorical


def vectorize_sequences(sequences, dimension=10000):
    results = np.zeros((len(sequences), dimension))
    for i, sequence in enumerate(sequences):
        results[i, sequence] = 1.0
    return results


# Definir el modelo
def build_model():
    model = models.Sequential()
    model.add(layers.Dense(16, activation='relu', input_shape=(10000,)))
    model.add(layers.Dense(16, activation='relu'))
    model.add(layers.Dense(1, activation='sigmoid'))
    model.compile(optimizer='rmsprop',
                  loss='binary_crossentropy',
                  metrics=['acc'])
    return model

# Cargar y preparar los datos
(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=10000)
all_data = train_data#np.concatenate((train_data, test_data))
all_labels = train_labels#np.concatenate((train_labels, test_labels))

all_data = vectorize_sequences(all_data)
all_labels = np.array(all_labels).astype('float32')

k = 4  # Número de particiones
folds_data = []
folds_labels = []

fold_size = int(len(all_data) / k)
for fold in range(k):
    print(f"Creado fold: #{fold + 1}")

    folds_data.append(all_data[fold_size * fold: fold_size * (fold + 1)])
    folds_labels.append(all_labels[fold_size * fold: fold_size * (fold + 1)])
num_epochs = 20
batch_size = 512

all_scores = []

# Realizar K-Fold Cross-Validation
for fold in range(k):
    print(f"Analizando fold: #{fold + 1}")

    fold_training_data = np.concatenate([folds_data[i] for i in range(k) if i != fold])
    fold_test_data = folds_data[fold]

    fold_training_labels = np.concatenate([folds_labels[i] for i in range(k) if i != fold])
    fold_test_labels = folds_labels[fold]

    # Crear un nuevo modelo
    model = build_model()

    # Entrenar el modelo
    history = model.fit(
        fold_training_data, fold_training_labels,
        epochs=num_epochs, batch_size=batch_size,
        validation_data=(fold_test_data, fold_test_labels)
    )

    # Evaluar el modelo en los datos de validación
    val_loss, val_acc = model.evaluate(fold_test_data, fold_test_labels, verbose=0)
    print(f"Fold #{fold + 1}: {val_acc}")

    all_scores.append(val_acc)  # Guardar la puntuación

# Calcular el promedio de las puntuaciones
print(f"Media del k-fold: {np.mean(all_scores)}")
print(f"Desviación típica del k-fold: {np.std(all_scores)}")

Creado fold: #1
Creado fold: #2
Creado fold: #3
Creado fold: #4
Analizando fold: #1
Epoch 1/20
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - acc: 0.7069 - loss: 0.5888 - val_acc: 0.8565 - val_loss: 0.3879
Epoch 2/20
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - acc: 0.8971 - loss: 0.3220 - val_acc: 0.8730 - val_loss: 0.3200
Epoch 3/20
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - acc: 0.9263 - loss: 0.2342 - val_acc: 0.8898 - val_loss: 0.2762
Epoch 4/20
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - acc: 0.9399 - loss: 0.1859 - val_acc: 0.8894 - val_loss: 0.2760
Epoch 5/20
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - acc: 0.9490 - loss: 0.1567 - val_acc: 0.8901 - val_loss: 0.2778
Epoch 6/20
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - acc: 0.9633 - loss: 0.1265 - val_acc: 0.8790 - val_loss: 0.3167
Epoch 7/20
[1m