In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, LSTM, Dropout

# Load dataset
df = pd.read_csv('query_dataset.csv')

In [2]:
# Split dataset into training and testing sets
X = df['query'].values
y = df['intent'].values

# Encode labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=10)

In [3]:
# Tokenize text
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(X_train)
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)

# Pad sequences
max_len = max([len(x) for x in X_train_seq])
X_train_pad = pad_sequences(X_train_seq, maxlen=max_len)
X_test_pad = pad_sequences(X_test_seq, maxlen=max_len)

In [4]:
# Define model parameters
vocab_size = 5000
embedding_dim = 128

# Build the model
model = Sequential([
    Embedding(vocab_size, embedding_dim, input_length=max_len),
    LSTM(64, return_sequences=True),
    LSTM(32),
    Dropout(0.5),
    Dense(16, activation='relu'),
    Dense(len(label_encoder.classes_), activation='softmax')
])

# Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Train the model
history = model.fit(X_train_pad, np.array(y_train), epochs=20, batch_size=32, validation_split=0.2)

Epoch 1/20




[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 53ms/step - accuracy: 0.4333 - loss: 1.0924 - val_accuracy: 0.4844 - val_loss: 1.0684
Epoch 2/20
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.4998 - loss: 1.0583 - val_accuracy: 0.4844 - val_loss: 1.0099
Epoch 3/20
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.4918 - loss: 0.9955 - val_accuracy: 0.4844 - val_loss: 0.9111
Epoch 4/20
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.5781 - loss: 0.8468 - val_accuracy: 0.7656 - val_loss: 0.7015
Epoch 5/20
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.7130 - loss: 0.7098 - val_accuracy: 0.9062 - val_loss: 0.5114
Epoch 6/20
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.7875 - loss: 0.4904 - val_accuracy: 0.9531 - val_loss: 0.3726
Epoch 7/20
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

In [5]:
# Evaluate the model
loss, accuracy = model.evaluate(X_test_pad, np.array(y_test))
print(f"Test Loss: {loss:.4f}")
print(f"Test Accuracy: {accuracy:.4f}")

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9836 - loss: 0.0200  
Test Loss: 0.0296
Test Accuracy: 0.9750


In [6]:
# Function to predict intent
def predict_intent(query):
    seq = tokenizer.texts_to_sequences([query])
    padded = pad_sequences(seq, maxlen=max_len)
    pred = model.predict(padded)
    intent = label_encoder.inverse_transform([np.argmax(pred)])
    return intent[0]

In [15]:
# model.save('intent_prediction_model.h5')



In [16]:
# import pickle

# # Save the tokenizer
# with open('tokenizer.pkl', 'wb') as f:
#     pickle.dump(tokenizer, f)

# # Save the label encoder
# with open('label_encoder.pkl', 'wb') as f:
#     pickle.dump(label_encoder, f)

In [17]:
# import pickle

# # Save max_len
# with open('max_len.pkl', 'wb') as f:
#     pickle.dump(max_len, f)

In [14]:
print(predict_intent("Show me some white shirts"))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
show_products


In [11]:
print(predict_intent("Can you give this to me in 400"))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
negotiation


In [12]:
print(predict_intent("I want to buy this product"))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
checkout
