In [None]:
pip install tensorflow



In [None]:
import pandas as pd
import numpy as np
import re
import string
import warnings
warnings.filterwarnings("ignore")

# NLP Libraries
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

# Visualization
from wordcloud import WordCloud
import matplotlib.pyplot as plt

# Machine Learning Libraries
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import LSTM, Input, Dense, Embedding
from tensorflow.keras.models import Model, model_from_json
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.utils import plot_model

# Save/Load Model
import pickle as pkl

In [None]:
# Load dataset
df = pd.read_csv("/content/output1433.csv")

# Convert text to lowercase
for col in df.columns:
    if df[col].dtype == 'object':
        df[col] = df[col].str.lower()

# Remove duplicates
df = df.drop_duplicates().reset_index(drop=True)

# Remove punctuation, digits, and extra spaces
def clean_text(text):
    text = re.sub(f'[{re.escape(string.punctuation)}]', '', text)  # Remove punctuation
    text = re.sub(r'\d+', '', text)  # Remove digits
    text = text.strip()  # Remove leading/trailing spaces
    return text

df['Question'] = df['Question'].apply(clean_text)
df['Answer'] = df['Answer'].apply(clean_text)

# Add start and end tokens to answers
df['Answer'] = df['Answer'].apply(lambda x: 'start_ ' + x + ' _end')

In [None]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(df['Question'], df['Answer'], test_size=0.3, random_state=101)

# Tokenize input (questions)
tokenizer_input = Tokenizer()
tokenizer_input.fit_on_texts(X_train)
X_train_seq = tokenizer_input.texts_to_sequences(X_train)
X_test_seq = tokenizer_input.texts_to_sequences(X_test)

# Tokenize output (answers)
tokenizer_target = Tokenizer()
tokenizer_target.fit_on_texts(y_train)
y_train_seq = tokenizer_target.texts_to_sequences(y_train)
y_test_seq = tokenizer_target.texts_to_sequences(y_test)

# Pad sequences
max_length_src = max(len(seq) for seq in X_train_seq)
max_length_tar = max(len(seq) for seq in y_train_seq)

X_train_pad = pad_sequences(X_train_seq, maxlen=max_length_src, padding='post')
X_test_pad = pad_sequences(X_test_seq, maxlen=max_length_src, padding='post')
y_train_pad = pad_sequences(y_train_seq, maxlen=max_length_tar, padding='post')
y_test_pad = pad_sequences(y_test_seq, maxlen=max_length_tar, padding='post')

# Vocabulary sizes
vocab_size_input = len(tokenizer_input.word_index) + 1
vocab_size_target = len(tokenizer_target.word_index) + 1

In [None]:
with open('tokenizer_input.pkl', 'wb') as f:
    pkl.dump(tokenizer_input, f)

with open('tokenizer_target.pkl', 'wb') as f:
    pkl.dump(tokenizer_target, f)


# pkl.dump(tokenizer_input, open('tokenizer_input.pkl', 'wb'))
# pkl.dump(tokenizer_target, open('tokenizer_target.pkl', 'wb'))

In [None]:
latent_dim = 256  # Embedding dimension

# Encoder
encoder_inputs = Input(shape=(None,),name="encoder_inputs")
encoder_embedding = Embedding(vocab_size_input, latent_dim, mask_zero=True)(encoder_inputs)
encoder_lstm = LSTM(latent_dim, return_state=True)
encoder_outputs, state_h, state_c = encoder_lstm(encoder_embedding)
encoder_states = [state_h, state_c]

# Decoder
decoder_inputs = Input(shape=(None,),name="decoder_inputs")
decoder_embedding = Embedding(vocab_size_target, latent_dim, mask_zero=True)(decoder_inputs)
decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_embedding, initial_state=encoder_states)
decoder_dense = Dense(vocab_size_target, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)

# Model
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

==========================================================================================================================================================

In [None]:
def generator_batch(X, Y, batch_size=20):
    while True:
        for j in range(0, len(X), batch_size):
            encoder_input = X[j:j+batch_size]
            decoder_input = Y[j:j+batch_size][:, :-1]  # Remove 'end' token
            decoder_output = np.zeros((len(decoder_input), max_length_tar-1, vocab_size_target))  # Adjust shape

            for i, seq in enumerate(Y[j:j+batch_size]):
                for t, word in enumerate(seq[1:]):  # Shift by one for teacher forcing
                    if t < max_length_tar - 1:  # Ensure indexing within bounds
                        decoder_output[i, t, word] = 1

            yield ((encoder_input, decoder_input), decoder_output)


# Train the model
batch_size = 32
epochs = 200
history = model.fit(
    generator_batch(X_train_pad, y_train_pad, batch_size=batch_size),
    steps_per_epoch=len(X_train_pad) // batch_size,
    epochs=epochs,
    validation_data=generator_batch(X_test_pad, y_test_pad, batch_size=batch_size),
    validation_steps=len(X_test_pad) // batch_size
)

Epoch 1/200
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 1s/step - accuracy: 0.2753 - loss: 5.4555 - val_accuracy: 0.6499 - val_loss: 5.4101
Epoch 2/200
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 743ms/step - accuracy: 0.5975 - loss: 5.3868 - val_accuracy: 0.6342 - val_loss: 5.1656
Epoch 3/200
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 691ms/step - accuracy: 0.5786 - loss: 4.9170 - val_accuracy: 0.6349 - val_loss: 4.3335
Epoch 4/200
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 715ms/step - accuracy: 0.5889 - loss: 4.3592 - val_accuracy: 0.5334 - val_loss: 4.2518
Epoch 5/200
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 564ms/step - accuracy: 0.5863 - loss: 4.2341 - val_accuracy: 0.6506 - val_loss: 4.1010
Epoch 6/200
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 676ms/step - accuracy: 0.6138 - loss: 4.1265 - val_accuracy: 0.0469 - val_loss: 4.1214
Epoch 7/200
[1m4/4[0m [32m━━━━━━━

In [None]:
# Save the model
model_json = model.to_json()
with open("model_qa.json", "w") as json_file:
    json_file.write(model_json)
model.save_weights("model_qa.weights.h5")
print("Model & Weights Saved to disk")

# Load the model
json_file = open('model_qa.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
model_loaded = model_from_json(loaded_model_json)
model_loaded.load_weights("model_qa.weights.h5")
print("Model Loaded from disk")

Model & Weights Saved to disk
Model Loaded from disk


In [None]:
latent_dim = 256

#inference encoder
encoder_inputs_inf = model_loaded.input[0] #Trained encoder input layer
encoder_outputs_inf, inf_state_h, inf_state_c = model_loaded.layers[4].output # restoring the encoder lstm output and states
encoder_inf_states = [inf_state_h,inf_state_c]
encoder_model = Model(encoder_inputs_inf,encoder_inf_states)

#inference decoder
# The following tensor will store the state of the previous timestep in the "starting the encoder final time step"
decoder_state_h_input = Input(shape=(latent_dim,))
decoder_state_c_input = Input(shape=(latent_dim,))
decoder_state_input = [decoder_state_h_input,decoder_state_c_input]

# # inference decoder input
decoder_input_inf = model_loaded.input[1] #Trained decoder input layer
# decoder_input_inf._name='decoder_input'
decoder_emb_inf = model_loaded.layers[3](decoder_input_inf)
decoder_lstm_inf = model_loaded.layers[5]
decoder_output_inf, decoder_state_h_inf, decoder_state_c_inf = decoder_lstm_inf(decoder_emb_inf, initial_state =decoder_state_input)
decoder_state_inf = [decoder_state_h_inf,decoder_state_c_inf]
#inference dense layer
dense_inf = model_loaded.layers[6]
decoder_output_final = dense_inf(decoder_output_inf)# A dense softmax layer to generate prob dist. over the target vocabulary

decoder_model = Model([decoder_input_inf]+decoder_state_input,[decoder_output_final]+decoder_state_inf)


In [None]:
import os

print(os.listdir())  # Lists all files in the current directory


['.config', 'model_qa.json', 'model_qa.weights.h5', 'tokenizer_input.pkl', 'output1433.csv', 'tokenizer_target.pkl', 'sample_data']


In [None]:
with open('tokenizer_input.pkl', 'rb') as f:
    tokenizer_input = pkl.load(f)
with open('tokenizer_target.pkl', 'rb') as f:
    tokenizer_target = pkl.load(f)

# Creating the reverse mapping to get the word from the index in the sequence
reverse_word_map_input = dict(map(reversed,
                                  tokenizer_input.word_index.items()))
reverse_word_map_target = dict(
    map(reversed, tokenizer_target.word_index.items()))

============================================================

In [None]:
# Code to answer the input question


def decode_seq(input_seq):
    # print("input_seq=>",input_seq)
    state_values_encoder = encoder_model.predict(input_seq)

    # intialize the target seq with start tag
    target_seq = np.zeros((1, 1))
    target_seq[0, 0] = tokenizer_target.word_index['start']
    # print("target_seq:=>",target_seq)
    stop_condition = False
    decoder_sentance = ''
    # print("Before the while loop")
    while not stop_condition:
        sample_word, decoder_h, decoder_c = decoder_model.predict(
            [target_seq] + state_values_encoder)
        # print("sample_word: =>",sample_word)
        sample_word_index = np.argmax(sample_word[0, -1, :])
        # print("sample_word_index: ",sample_word_index)
        decoder_word = reverse_word_map_target[sample_word_index]
        decoder_sentance += ' '\
        + decoder_word
        # print("decoded word:=>",decoder_word)
        # print(len(decoder_sentance))
        # print("len(decoder_sentance) > 70: ",len(decoder_sentance) > 70)
        # print('decoder_word == "end"',decoder_word == 'end')
        # print(decoder_word == 'end' or len(decoder_sentance) > 70)
        # stop condition for the while loop
        if (decoder_word == 'end' or len(decoder_sentance) >
                100):  # Taking upto 100 words as answer
            stop_condition = True
            # print("from if condition")
        # target_seq = np.zeros((1,1))
        target_seq[0, 0] = sample_word_index
        # print(target_seq)
        state_values_encoder = [decoder_h, decoder_c]
    return decoder_sentance

=============================================================================================================

In [None]:
X_test = X_test.reset_index(drop=True)

In [None]:
y_test = y_test.reset_index(drop=True)

In [None]:
X_train

Unnamed: 0,Question
43,what if my food packaging is damaged
208,my food arrived damaged i want a refund
108,can i order from different restaurants at the ...
9,what if my food is delayed but arrives properly
171,food arrived cold and squashed can i get a refund
...,...
63,my food arrived cold
70,can i change the delivery address
81,can i request contactless delivery
11,actually the food is not spoiled


In [None]:
X_test

Unnamed: 0,Question
0,how do i know if a restaurant is open
1,food arrived cold and squashed can i get a refund
2,what if i forgot my account password
3,my order was leaking when delivered refund needed
4,can i report food packaging damage
...,...
61,how do i cancel an order i havent received
62,how can i delete my account
63,do i need to return the spoiled food
64,i want to know if my order has been shipped yet


In [None]:
# Check if the tokenizer is correctly loaded
print(tokenizer_target.word_index)

# Check if the reverse mapping is correct
print(reverse_word_map_target)

{'start': 1, 'end': 2, 'the': 3, 'refund': 4, 'for': 5, 'is': 6, 'your': 7, 'sorry': 8, 'order': 9, 'we': 10, 'be': 11, 'you': 12, 'we’ll': 13, 'processed': 14, 'a': 15, 'or': 16, 'will': 17, 'can': 18, 'yes': 19, 'apologies': 20, 'in': 21, 'being': 22, 'app': 23, 'issue': 24, 'now': 25, 'contact': 26, 'before': 27, 'an': 28, 'support': 29, 'approved': 30, 'image': 31, 'and': 32, 'initiate': 33, 'this': 34, 'to': 35, 'check': 36, 'request': 37, 'trouble': 38, 'our': 39, 'on': 40, 'way': 41, 'it': 42, 'understand': 43, 'issued': 44, 'shortly': 45, 'inconvenience': 46, 'checkout': 47, 'regret': 48, 'soon': 49, 'we’re': 50, 'experience': 51, 'process': 52, 'delivery': 53, 'apologize': 54, 'items': 55, 'receive': 56, 'replacement': 57, 'orders': 58, 'canceled': 59, 'preparation': 60, 'use': 61, 'with': 62, 'report': 63, 'food': 64, 'spoiled': 65, 'starts': 66, 'tracking': 67, 'during': 68, 'cancel': 69, 'email': 70, 'immediately': 71, 'full': 72, 'are': 73, 'if': 74, 'allowed': 75, 'tippin

============================================================================================

In [None]:
for i in range(len(X_test)):
    sentance = X_test[i]
    original_target = y_test[i]
    input_seq = tokenizer_input.texts_to_sequences([sentance])
    pad_sequence = pad_sequences(input_seq, maxlen=20, padding='post')
    # print('input_sequence =>',input_seq)
    # print("pad_seq=>",pad_sequence)
    predicted_target = decode_seq(pad_sequence)
    print("Test Question: ", i + 1)
    print("Question: ", sentance)
    print("Original Answer:", original_target[6:-4])
    print("Predicted Answer:", predicted_target[:-4])
    print("==" * 50)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 435ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 489ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 102ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 87ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 169ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 178ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 259ms/step
Test Question:  1
Question:  how do i know if a restaurant is open
Original Answer:  check the restaurants availability in the app 
Predicted Answer:  check the restaurants availability in the app
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 81ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 122ms/step
[1

In [None]:
import tensorflow as tf
print(tf.__version__)  # Check on both saving and loading environments

2.18.0


In [None]:
# After loading the model
model_loaded.summary()

In [None]:
import h5py

with h5py.File("/content/model_qa.weights.h5", "r") as f:
    if "model_weights" in f:
        print("Layers in weights file:", list(f["model_weights"].keys()))
    else:
        print("Weights file is empty or corrupt!")

Weights file is empty or corrupt!


In [None]:
import tensorflow as tf
import keras

print("TensorFlow version:", tf.__version__)
print("Keras version:", keras.__version__)


TensorFlow version: 2.18.0
Keras version: 3.8.0


In [None]:
!python --version


Python 3.11.12


In [None]:
import os
from tensorflow.keras.models import model_from_json

# Paths
model_json_path = '/content/model_qa.json'
weights_path = '/content/model_qa.weights.h5'

# Step 1: Check if files exist and are valid
def check_file(path):
    if not os.path.exists(path):
        print(f"[ERROR] File not found: {path}")
        return False
    if os.path.getsize(path) == 0:
        print(f"[ERROR] File is empty: {path}")
        return False
    print(f"[OK] File '{path}' is valid ({os.path.getsize(path)} bytes)")
    return True

json_ok = check_file(model_json_path)
weights_ok = check_file(weights_path)

# Step 2: Try loading the model and weights
if json_ok and weights_ok:
    try:
        # Load model architecture
        with open(model_json_path, 'r') as f:
            model_json = f.read()
        model = model_from_json(model_json)
        print(f"[OK] Model architecture loaded from JSON. Number of layers: {len(model.layers)}")

        # Attempt to load weights
        try:
            model.load_weights(weights_path)
            print(f"[MATCH] Weights loaded successfully. Architecture and weights match.")
        except Exception as e:
            print(f"[MISMATCH] Failed to load weights. Architecture and weights DO NOT match.")
            print(f"Reason: {e}")

    except Exception as e:
        print(f"[ERROR] Could not load model architecture: {e}")


[OK] File '/content/model_qa.json' is valid (7627 bytes)
[OK] File '/content/model_qa.weights.h5' is valid (9812344 bytes)
[OK] Model architecture loaded from JSON. Number of layers: 7
[MATCH] Weights loaded successfully. Architecture and weights match.
