In [55]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Dropout,BatchNormalization
from tensorflow.keras.optimizers import Adam
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
from keras.layers import Input
import numpy as np
import pandas as pd


In [56]:
data_test = pd.read_csv("df_test.csv")
data_training = pd.read_csv("df_training.csv")

In [57]:
x_train=data_training["x"] # El entrenamiendo del texto
y_train=data_training["y"] # Las etiquetas del entrenamiento


x_test=data_test["x"] # La prueba del texto
y_test=data_test["y"] # Las etiquetas para las pruebas



In [58]:
# Crear el vectorizador
vectorizer = TfidfVectorizer(max_features=5000, ngram_range=(1, 2), stop_words='english')


# Ajustar y transformar los datos de entrenamiento
X_train_vect = vectorizer.fit_transform(x_train)

# Transformar los datos de prueba
X_test_vect = vectorizer.transform(x_test)

X_train_vect = X_train_vect.toarray()  
X_train_vect = X_train_vect.astype('float32') 



In [59]:
# Crear el codificador
label_encoder = LabelEncoder()

# Ajustar y transformar las etiquetas de entrenamiento y prueba
y_train = label_encoder.fit_transform(y_train)
y_test = label_encoder.transform(y_test)

# Verifica las primeras etiquetas codificadas
print(y_train[:10])  # Muestra las primeras 10 etiquetas codificadas

[0 0 3 2 3 0 5 4 1 2]


In [60]:
#Creación del modelo

modelo = Sequential([
    Input(shape=(X_train_vect.shape[1],)),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(64, activation='relu'),
    Dense(len(np.unique(y_train)), activation='softmax')
])

modelo.compile(optimizer=Adam(learning_rate=0.001), 
               loss='sparse_categorical_crossentropy', 
               metrics=['accuracy'])


# Entrenar el modelo
modelo.fit(X_train_vect, y_train, epochs=5, batch_size=32)



Epoch 1/5
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - accuracy: 0.4834 - loss: 1.3530
Epoch 2/5
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.9223 - loss: 0.2422
Epoch 3/5
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.9630 - loss: 0.1183
Epoch 4/5
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9758 - loss: 0.0757
Epoch 5/5
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9846 - loss: 0.0474


<keras.src.callbacks.history.History at 0x1f99e36a4b0>

In [61]:
# Evaluar el modelo
test_loss, test_acc = modelo.evaluate(X_test_vect, y_test)
print(f"Test accuracy: {test_acc}")


[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8842 - loss: 0.3299
Test accuracy: 0.8870000243186951


In [62]:
# Hacer predicciones
predicciones = modelo.predict(X_test_vect)

# Convertir las predicciones a etiquetas
predicciones_clase = predicciones.argmax(axis=1)


[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step


In [63]:
modelo.save('modelo_sentimientos.h5')

