In [None]:
import tensorflow as tf
# https://ingenieriadesoftware.es/crea-tu-propio-motor-de-procesamiento-de-lenguaje-natural-en-python/
# Definir los parámetros de configuración
configuration = {
    'num_words': 10000,  # El tamaño del vocabulario (ajústalo según tu dataset)
    'embedding_dim': 128,  # La dimensión de los embeddings
    'input_max_num_tokens': 50  # La longitud máxima de las secuencias de entrada
}

# Definir el contexto con las intenciones
context = {
    'intents': ['saludo', 'despedida', 'consulta']  # Lista de las intenciones
}

# Ahora puedes definir el modelo sin problemas
model = tf.keras.Sequential([
    # Capa de Embedding para convertir las palabras en vectores densos
    tf.keras.layers.Embedding(input_dim=configuration['num_words'], 
                              output_dim=configuration['embedding_dim'], 
                              input_length=configuration['input_max_num_tokens']),
    
    # Capa de pooling para reducir la dimensionalidad
    tf.keras.layers.GlobalAveragePooling1D(),
    
    # Capas densas para procesar la información
    tf.keras.layers.Dense(24, activation='relu'),
    tf.keras.layers.Dense(24, activation='relu'),
    
    # Capa de salida, ajustada para clasificación de varias clases con softmax
    tf.keras.layers.Dense(len(context['intents']), activation='softmax')  # Usa 'softmax' para clasificación múltiple
])

# Compilar el modelo
model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(), 
              optimizer='adam', 
              metrics=['accuracy'])

# Resumen del modelo para ver las capas y parámetros
model.summary()


In [32]:
import uuid
import tensorflow as tf
import numpy

# Definir la configuración del NLP si es necesario (esto es solo un ejemplo)
class NlpConfiguration:
    def __init__(self, num_words, embedding_dim, input_max_num_tokens):
        self.num_words = num_words
        self.embedding_dim = embedding_dim
        self.input_max_num_tokens = input_max_num_tokens

# Definir la clase Bot
class Bot:
    def __init__(self, bot_id: uuid.UUID, name: str, configuration: NlpConfiguration = None):
        self.bot_id: uuid.UUID = bot_id
        self.name: str = name
        self.configuration = configuration
        self.tokenizer: tf.keras.preprocessing.text.Tokenizer = None
        self.nlp_model: tf.keras.models.Sequential = None

    def build_model(self):
        configuration = self.configuration

        model = tf.keras.Sequential([
            tf.keras.layers.Embedding(input_dim=configuration.num_words, 
                                      output_dim=configuration.embedding_dim, 
                                      input_length=configuration.input_max_num_tokens),
            tf.keras.layers.GlobalAveragePooling1D(),
            tf.keras.layers.Dense(24, activation='relu'),
            tf.keras.layers.Dense(24, activation='relu'),
            tf.keras.layers.Dense(3, activation='softmax')  # Ajusta este número al de intenciones que tengas
        ])

        model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(), 
                      optimizer='adam', 
                      metrics=['accuracy'])

        self.nlp_model = model
        return model

# Crear un bot con una configuración y un id único
config = NlpConfiguration(num_words=10000, embedding_dim=128, input_max_num_tokens=50)
bot = Bot(bot_id=uuid.uuid4(), name="MiBot", configuration=config)

# Construir el modelo NLP
nlp_model = bot.build_model()
nlp_model.summary()




In [33]:
def train(bot: Bot):
    for context in bot.contexts:
        __train_context(context, bot.configuration)


def __train_context(context: NLUContext, configuration: NlpConfiguration):
    tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=configuration.num_words, lower=configuration.lower, oov_token=configuration.oov_token)
    total_training_sentences: list[str] = []
    total_labels_training_sentences: list[int] = []
    for intent in context.intents:
        preprocess_training_sentences(intent, configuration)
        index_intent = context.intents.index(intent)
        total_training_sentences.extend(intent.processed_training_sentences)
        total_labels_training_sentences.extend([index_intent for i in range(len(intent.processed_training_sentences))])

    tokenizer.fit_on_texts(total_training_sentences)
    context.tokenizer = tokenizer
    context.training_sentences = total_training_sentences
    context.training_sequences = tf.keras.preprocessing.sequence.pad_sequences(tokenizer.texts_to_sequences(total_training_sentences),
                                                                               padding='post', maxlen=configuration.input_max_num_tokens)
    context.training_labels = total_labels_training_sentences

   # Definition of the NN model as shown before
    
    history = model.fit(np.array(context.training_sequences), np.array(context.training_labels), epochs=configuration.num_epochs, verbose=2)

In [34]:
def predict(context: NLUContext, sentence: str, configuration: NlpConfiguration) -> numpy.ndarray:

    prediction: numpy.ndarray
    sentences = [preprocess_prediction_sentence(sentence, configuration)]
    sequences = context.tokenizer.texts_to_sequences(sentences)
    if configuration.discard_oov_sentences and all(i==1 for i in sequences[0]):
        # the sentence to predict consists of only out of focabulary tokens so we can automatically assign a zero probability to all classes
        prediction = numpy.zeros(len(context.intents))
    else:
        padded = tf.keras.preprocessing.sequence.pad_sequences(sequences, padding='post', maxlen=configuration.input_max_num_tokens, truncating='post')
        full_prediction = context.nlp_model.predict(padded)
        prediction = full_prediction[0] # We return just the a single array with the predictions as we predict for just one sentence

    return prediction