In [3]:
# Import all the essential libraries
import numpy as np
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
import pickle
import tensorflow as tf
import pandas as pd

# Load the dataset
data = pd.read_csv("../data/final_dataset.csv")
# Ensure necessary columns exist
assert 'QuestionText' in data.columns, "QuestionText column is missing"
assert 'AnswerText' in data.columns, "AnswerText column is missing"

questions = data['QuestionText'].astype(str).values
answers = data['AnswerText'].astype(str).values

# Add start and end tokens to answers
answers = ["<start> " + answer + " <end>" for answer in answers]

# Tokenize and pad sequences
vocab_size = 20000  # Define vocabulary size
max_length = 20  # Define maximum sequence length

# Tokenizer for questions and answers
tokenizer = Tokenizer(num_words=vocab_size, filters='', oov_token='<unk>')
tokenizer.fit_on_texts(questions + answers)

# Save the tokenizer model for futher use
with open('../models/tokenizer.pkl', 'wb') as f:
    pickle.dump(tokenizer, f)

# Convert texts to sequences
question_sequences = tokenizer.texts_to_sequences(questions)
answer_sequences = tokenizer.texts_to_sequences(answers)

# Pad sequences
train_questions_padded = pad_sequences(question_sequences, maxlen=max_length, padding='post')
train_answers_padded = pad_sequences(answer_sequences, maxlen=max_length, padding='post')

# Split data into train and test sets
train_questions, test_questions, train_answers, test_answers = train_test_split(
    train_questions_padded, train_answers_padded, test_size=0.2, random_state=42
)

# Build the neural network model.
# Encoder
# Encoder Inputs
encoder_inputs = Input(shape=(None,), name="encoder_inputs")
encoder_embedding = Embedding(input_dim=vocab_size, output_dim=128, mask_zero=True, name="encoder_embedding")(encoder_inputs)
encoder_lstm = LSTM(128, return_state=True, name="encoder_lstm")
_, state_h, state_c = encoder_lstm(encoder_embedding)

# Decoder Inputs
decoder_inputs = Input(shape=(None,), name="decoder_inputs")
decoder_embedding = Embedding(input_dim=vocab_size, output_dim=128, mask_zero=True, name="decoder_embedding")(decoder_inputs)
decoder_lstm = LSTM(128, return_sequences=True, return_state=True, name="decoder_lstm")
decoder_outputs, _, _ = decoder_lstm(decoder_embedding, initial_state=[state_h, state_c])

# Dense Layer
decoder_dense = Dense(vocab_size, activation="softmax", name="decoder_dense")
decoder_outputs = decoder_dense(decoder_outputs)

# Training Model
training_model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
training_model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])

# Debug Model Summary
training_model.summary()

# Prepare Data
encoder_input_data = train_questions_padded
decoder_input_data = train_answers_padded[:, :-1]
decoder_target_data = np.expand_dims(train_answers_padded[:, 1:], axis=-1)

# Train the Model
training_model.fit(
    [encoder_input_data, decoder_input_data],
    decoder_target_data,
    batch_size=64,
    epochs=10,
    validation_split=0.2
)

# Save Training Model
training_model.save('../models/neural_network_model.keras')
print("Training model saved successfully.")

# Save Encoder Model
encoder_model = Model(encoder_inputs, [state_h, state_c])
encoder_model.save('../models/encoder_model.keras')
print("Encoder model saved successfully.")

# Save Decoder Model
decoder_state_input_h = Input(shape=(128,), name="decoder_state_h")
decoder_state_input_c = Input(shape=(128,), name="decoder_state_c")
decoder_lstm_outputs, state_h, state_c = decoder_lstm(
    decoder_embedding, initial_state=[decoder_state_input_h, decoder_state_input_c]
)
decoder_outputs = decoder_dense(decoder_lstm_outputs)

decoder_model = Model(
    [decoder_inputs, decoder_state_input_h, decoder_state_input_c],
    [decoder_outputs, state_h, state_c]
)
decoder_model.save('../models/decoder_model.keras')
print("Decoder model saved successfully.")

# Load the trained encoder and decoder models
encoder_model = load_model('../models/encoder_model.keras', compile=False)
decoder_model = load_model('../models/decoder_model.keras', compile=False)

# Load the tokenizer
with open('../models/tokenizer.pkl', 'rb') as f:
    tokenizer = pickle.load(f)

# Define function to preprocess input questions
def preprocess_question(question, tokenizer, max_length=20):
    sequence = tokenizer.texts_to_sequences([question])
    padded_sequence = pad_sequences(sequence, maxlen=max_length, padding='post')
    return np.array(padded_sequence, dtype='int32')

# Define function to decode sequences
def decode_sequence(input_seq):
    # Encode the input to get initial states
    states_value = encoder_model.predict(input_seq)

    # Generate an empty target sequence with the start token
    target_seq = np.zeros((1, 1), dtype="int32")
    target_seq[0, 0] = tokenizer.word_index.get("<start>", 0)

    # Sampling loop to generate the output sequence
    stop_condition = False
    decoded_sentence = ''
    while not stop_condition:
        output_tokens, h, c = decoder_model.predict([target_seq] + states_value)

        # Sample a token
        sampled_token_index = np.argmax(output_tokens[0, -1, :])
        sampled_token_index = min(sampled_token_index, vocab_size - 1)  # Ensure valid index
        sampled_word = tokenizer.index_word.get(sampled_token_index, '<unk>')

        # Stop if <end> token is reached or max length exceeded
        if sampled_word == '<end>' or len(decoded_sentence.split()) > 20:
            stop_condition = True
        else:
            decoded_sentence += ' ' + sampled_word

        # Update the target sequence and states
        target_seq = np.zeros((1, 1), dtype="int32")
        target_seq[0, 0] = sampled_token_index
        states_value = [h, c]

    return decoded_sentence.strip()

# Test the trained model
def test_model(question):
    input_seq = preprocess_question(question, tokenizer, max_length=20)
    decoded_answer = decode_sequence(input_seq)
    return decoded_answer

# Example test
question = "has anyone got experience creating sql-based asp.net"
answer = test_model(question)
print(f"Question: {question}")
print(f"Predicted Answer: {answer}")


Epoch 1/10




[1m2221/2221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m307s[0m 137ms/step - accuracy: 0.2086 - loss: 5.7760 - val_accuracy: 0.2868 - val_loss: 4.6603
Epoch 2/10
[1m2221/2221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m297s[0m 133ms/step - accuracy: 0.3001 - loss: 4.4295 - val_accuracy: 0.3125 - val_loss: 4.2507
Epoch 3/10
[1m2221/2221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m299s[0m 135ms/step - accuracy: 0.3293 - loss: 3.9730 - val_accuracy: 0.3350 - val_loss: 3.9529
Epoch 4/10
[1m2221/2221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m297s[0m 134ms/step - accuracy: 0.3615 - loss: 3.5923 - val_accuracy: 0.3594 - val_loss: 3.7108
Epoch 5/10
[1m2221/2221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m296s[0m 133ms/step - accuracy: 0.3946 - loss: 3.2863 - val_accuracy: 0.3839 - val_loss: 3.5125
Epoch 6/10
[1m2221/2221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m298s[0m 134ms/step - accuracy: 0.4284 - loss: 3.0200 - val_accuracy: 0.4071 - val_loss: 3.3561
Epo



InvalidArgumentError: Graph execution error:

Detected at node functional_5_1/decoder_embedding_1/GatherV2 defined at (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main

  File "<frozen runpy>", line 88, in _run_code

  File "C:\Users\pujaris\AppData\Local\anaconda3\Lib\site-packages\ipykernel_launcher.py", line 17, in <module>

  File "C:\Users\pujaris\AppData\Local\anaconda3\Lib\site-packages\traitlets\config\application.py", line 1075, in launch_instance

  File "C:\Users\pujaris\AppData\Local\anaconda3\Lib\site-packages\ipykernel\kernelapp.py", line 701, in start

  File "C:\Users\pujaris\AppData\Local\anaconda3\Lib\site-packages\tornado\platform\asyncio.py", line 205, in start

  File "C:\Users\pujaris\AppData\Local\anaconda3\Lib\asyncio\windows_events.py", line 322, in run_forever

  File "C:\Users\pujaris\AppData\Local\anaconda3\Lib\asyncio\base_events.py", line 641, in run_forever

  File "C:\Users\pujaris\AppData\Local\anaconda3\Lib\asyncio\base_events.py", line 1986, in _run_once

  File "C:\Users\pujaris\AppData\Local\anaconda3\Lib\asyncio\events.py", line 88, in _run

  File "C:\Users\pujaris\AppData\Local\anaconda3\Lib\site-packages\ipykernel\kernelbase.py", line 534, in dispatch_queue

  File "C:\Users\pujaris\AppData\Local\anaconda3\Lib\site-packages\ipykernel\kernelbase.py", line 523, in process_one

  File "C:\Users\pujaris\AppData\Local\anaconda3\Lib\site-packages\ipykernel\kernelbase.py", line 429, in dispatch_shell

  File "C:\Users\pujaris\AppData\Local\anaconda3\Lib\site-packages\ipykernel\kernelbase.py", line 767, in execute_request

  File "C:\Users\pujaris\AppData\Local\anaconda3\Lib\site-packages\ipykernel\ipkernel.py", line 429, in do_execute

  File "C:\Users\pujaris\AppData\Local\anaconda3\Lib\site-packages\ipykernel\zmqshell.py", line 549, in run_cell

  File "C:\Users\pujaris\AppData\Local\anaconda3\Lib\site-packages\IPython\core\interactiveshell.py", line 3075, in run_cell

  File "C:\Users\pujaris\AppData\Local\anaconda3\Lib\site-packages\IPython\core\interactiveshell.py", line 3130, in _run_cell

  File "C:\Users\pujaris\AppData\Local\anaconda3\Lib\site-packages\IPython\core\async_helpers.py", line 128, in _pseudo_sync_runner

  File "C:\Users\pujaris\AppData\Local\anaconda3\Lib\site-packages\IPython\core\interactiveshell.py", line 3334, in run_cell_async

  File "C:\Users\pujaris\AppData\Local\anaconda3\Lib\site-packages\IPython\core\interactiveshell.py", line 3517, in run_ast_nodes

  File "C:\Users\pujaris\AppData\Local\anaconda3\Lib\site-packages\IPython\core\interactiveshell.py", line 3577, in run_code

  File "C:\Users\pujaris\AppData\Local\Temp\ipykernel_3260\2996271311.py", line 167, in <module>

  File "C:\Users\pujaris\AppData\Local\Temp\ipykernel_3260\2996271311.py", line 162, in test_model

  File "C:\Users\pujaris\AppData\Local\Temp\ipykernel_3260\2996271311.py", line 139, in decode_sequence

  File "C:\Users\pujaris\AppData\Local\anaconda3\Lib\site-packages\keras\src\utils\traceback_utils.py", line 117, in error_handler

  File "C:\Users\pujaris\AppData\Local\anaconda3\Lib\site-packages\keras\src\backend\tensorflow\trainer.py", line 510, in predict

  File "C:\Users\pujaris\AppData\Local\anaconda3\Lib\site-packages\keras\src\backend\tensorflow\trainer.py", line 208, in one_step_on_data_distributed

  File "C:\Users\pujaris\AppData\Local\anaconda3\Lib\site-packages\keras\src\backend\tensorflow\trainer.py", line 198, in one_step_on_data

  File "C:\Users\pujaris\AppData\Local\anaconda3\Lib\site-packages\keras\src\backend\tensorflow\trainer.py", line 96, in predict_step

  File "C:\Users\pujaris\AppData\Local\anaconda3\Lib\site-packages\keras\src\utils\traceback_utils.py", line 117, in error_handler

  File "C:\Users\pujaris\AppData\Local\anaconda3\Lib\site-packages\keras\src\layers\layer.py", line 899, in __call__

  File "C:\Users\pujaris\AppData\Local\anaconda3\Lib\site-packages\keras\src\utils\traceback_utils.py", line 117, in error_handler

  File "C:\Users\pujaris\AppData\Local\anaconda3\Lib\site-packages\keras\src\ops\operation.py", line 46, in __call__

  File "C:\Users\pujaris\AppData\Local\anaconda3\Lib\site-packages\keras\src\utils\traceback_utils.py", line 156, in error_handler

  File "C:\Users\pujaris\AppData\Local\anaconda3\Lib\site-packages\keras\src\models\functional.py", line 182, in call

  File "C:\Users\pujaris\AppData\Local\anaconda3\Lib\site-packages\keras\src\ops\function.py", line 171, in _run_through_graph

  File "C:\Users\pujaris\AppData\Local\anaconda3\Lib\site-packages\keras\src\models\functional.py", line 584, in call

  File "C:\Users\pujaris\AppData\Local\anaconda3\Lib\site-packages\keras\src\utils\traceback_utils.py", line 117, in error_handler

  File "C:\Users\pujaris\AppData\Local\anaconda3\Lib\site-packages\keras\src\layers\layer.py", line 899, in __call__

  File "C:\Users\pujaris\AppData\Local\anaconda3\Lib\site-packages\keras\src\utils\traceback_utils.py", line 117, in error_handler

  File "C:\Users\pujaris\AppData\Local\anaconda3\Lib\site-packages\keras\src\ops\operation.py", line 46, in __call__

  File "C:\Users\pujaris\AppData\Local\anaconda3\Lib\site-packages\keras\src\utils\traceback_utils.py", line 156, in error_handler

  File "C:\Users\pujaris\AppData\Local\anaconda3\Lib\site-packages\keras\src\layers\core\embedding.py", line 140, in call

  File "C:\Users\pujaris\AppData\Local\anaconda3\Lib\site-packages\keras\src\ops\numpy.py", line 5206, in take

  File "C:\Users\pujaris\AppData\Local\anaconda3\Lib\site-packages\keras\src\backend\tensorflow\numpy.py", line 2029, in take

indices[0,0] = 200522 is not in [0, 20000)
	 [[{{node functional_5_1/decoder_embedding_1/GatherV2}}]] [Op:__inference_one_step_on_data_distributed_657268]