# Sentiment Training - IMDB
Entrenamiento de modelo simple con TensorFlow usando un dataset público de IMDB: https://www.kaggle.com/datasets/lakshmi25npathi/imdb-dataset-of-50k-movie-reviews (IMDB Dataset of 50K Movie Reviews)

In [16]:
# Se importan librerias
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.datasets import imdb
import numpy as np

Cargamos dataset IMDB y lo procesamos

In [17]:
num_words = 10000
maxlen = 100

(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=num_words)

# Preprocesar: padding
x_train = pad_sequences(x_train, maxlen=maxlen)
x_test = pad_sequences(x_test, maxlen=maxlen)

In [18]:
# Crear y ajustar el tokenizer

# Aseguramos de que todos sean strings
X_train = x_train.astype(str)

# Convertimos a lista para que Tokenizer lo acepte
X_train = X_train.tolist()

tokenizer = Tokenizer(num_words=5000, oov_token="<OOV>")
tokenizer.fit_on_texts(X_train)  # X_train = tus textos de entrenamiento

Se crea un modelo simple

In [19]:
model = Sequential([Embedding(num_words, 32, input_length=maxlen), LSTM(32), Dense(1, activation='sigmoid')])



In [20]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

Se entrena el modelo

In [21]:
# Entrenar modelo
model.fit(x_train, y_train, epochs=3, batch_size=64, validation_data=(x_test, y_test))

Epoch 1/3
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 42ms/step - accuracy: 0.7845 - loss: 0.4375 - val_accuracy: 0.8450 - val_loss: 0.3484
Epoch 2/3
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 41ms/step - accuracy: 0.8900 - loss: 0.2676 - val_accuracy: 0.8437 - val_loss: 0.3493
Epoch 3/3
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 42ms/step - accuracy: 0.9178 - loss: 0.2126 - val_accuracy: 0.8420 - val_loss: 0.3896


<keras.src.callbacks.history.History at 0x23779db00d0>

In [23]:
import os
import pickle

# Creamos carpeta 'model' 
os.makedirs('../model', exist_ok=True)

# Guardamos el modelo
# Antiguo guardado: model.save('sentiment_model.h5')
model.save('../model/sentiment_model.keras')

# Guardar tokenizer
with open("../model/tokenizer.pickle", "wb") as handle:
    pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)

print("Modelo y tokenizer guardados en la carpeta 'model'")

Modelo y tokenizer guardados en la carpeta 'model'
