In [23]:
import pandas as pd
from lazypredict.Supervised import LazyClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.feature_extraction.text import TfidfVectorizer  # Using TF-IDF for better text representation
import joblib

In [24]:
# Step 1: Expanded dataset with more examples and additional categories
texts = [
   # Order Tracking
   "Where is my order?",
   "How can I track my order?",
   "The shipment was delayed, where is my package?",
   "Can you tell me when my order will arrive?",
   "I haven't received my order yet.",
   # Refunds
   "How do I get a refund?",
   "I want to return my product, can you help?",
   "What is the process for getting a refund?",
   "I received a damaged product, I need a refund.",
   "Can I return my product if I don't like it?",
   # Product Inquiries
   "Can you tell me more about the new product?",
   "What are the specifications of this phone?",
   "Is the new laptop available in stock?",
   "Can you provide me with details about the latest camera?",
   "Is the new tablet available for pre-order?",
   # Technical Support
   "I am having trouble logging into my account.",
   "My password reset isn't working, can you help?",
   "I'm facing issues with my device, it keeps freezing.",
   "How do I fix the issue with the app not loading?",
   "The software update caused problems with my phone.",
   # Account Issues
   "I need help updating my account details.",
   "How can I change my email address in my account?",
   "I forgot my account password, how can I reset it?",
   "Can you help me recover my account?",
   "I want to delete my account permanently."]


In [4]:
labels = [
   0, 0, 0, 0, 0,  # Order Tracking
   1, 1, 1, 1, 1,  # Refunds
   2, 2, 2, 2, 2,  # Product Inquiries
   3, 3, 3, 3, 3,  # Technical Support
   4, 4, 4, 4, 4   # Account Issues
]

In [5]:
# Step 3: Convert text into numerical features using TF-IDF
tfidf = TfidfVectorizer(max_features=100)  # You can adjust max_features to capture more/less words
X = tfidf.fit_transform(texts)

In [6]:
# Convert the sparse matrix to a dense format (e.g., a DataFrame)
X_dense = pd.DataFrame(X.toarray(), columns=tfidf.get_feature_names_out())

In [7]:
# Step 4: Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_dense, labels, test_size=0.2, random_state=42)

In [8]:
#Step 5: Use Lazy Predict to benchmark models for classification
clf = LazyClassifier()
models, predictions = clf.fit(X_train, X_test, y_train, y_test)

100%|██████████████████████████████████████████████████████████████████████████████████| 31/31 [00:02<00:00, 14.54it/s]

[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 20, number of used features: 0
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438





In [9]:
print(models)

                               Accuracy  Balanced Accuracy ROC AUC  F1 Score  \
Model                                                                          
DecisionTreeClassifier             0.80               0.80    None      0.73   
LogisticRegression                 0.60               0.60    None      0.47   
RidgeClassifierCV                  0.60               0.60    None      0.47   
Perceptron                         0.60               0.60    None      0.53   
PassiveAggressiveClassifier        0.60               0.60    None      0.47   
NuSVC                              0.60               0.60    None      0.47   
NearestCentroid                    0.60               0.60    None      0.47   
BaggingClassifier                  0.60               0.60    None      0.53   
LinearSVC                          0.60               0.60    None      0.53   
LinearDiscriminantAnalysis         0.60               0.60    None      0.53   
SGDClassifier                      0.60 

In [11]:
from sklearn.tree import DecisionTreeClassifier

In [12]:
model = DecisionTreeClassifier()

In [13]:
model.fit(X_train,y_train)

In [16]:
# Step 6: Evaluate the selected model on the test set
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy of the selected model (DecisionTreeClassifier): {accuracy:.2f}")

Accuracy of the selected model (DecisionTreeClassifier): 0.80


In [17]:
# Step 7: Function to classify new customer queries using the trained model
def classify_query(query_text):
   # Generate embedding for the new query
   query_embedding = tfidf.transform([query_text])
   # Predict the category
   prediction = model.predict(query_embedding)[0]
   # Map prediction to the category name
   category_mapping_reverse = {0: "Order Tracking", 1: "Refunds", 2: "Product Inquiries", 3: "Technical Support", 4: "Account Issues"}
   return category_mapping_reverse[prediction]

In [18]:
# Step 8: Test the model with a new query for inferencing
new_query = "I want to return my product, how can I do that?"
predicted_category = classify_query(new_query)
print(f"Predicted Category for '{new_query}': {predicted_category}")

Predicted Category for 'I want to return my product, how can I do that?': Refunds


In [19]:
# Step 8: Test the model with a new query for inferencing
new_query = "where is my order"
predicted_category = classify_query(new_query)
print(f"Predicted Category for '{new_query}': {predicted_category}")

Predicted Category for 'where is my order': Order Tracking


In [20]:
# Step 8: Test the model with a new query for inferencing
new_query = "My account has been blocked"
predicted_category = classify_query(new_query)
print(f"Predicted Category for '{new_query}': {predicted_category}")

Predicted Category for 'My account has been blocked': Account Issues


In [25]:
joblib.dump(tfidf,'F://My-Exploration//Lazypredict-Implementation//tfidf_vectorizer.pkl')

['F://My-Exploration//Lazypredict-Implementation//tfidf_vectorizer.pkl']

In [26]:
joblib.dump(model,'F://My-Exploration//Lazypredict-Implementation//customer_query_model.pkl')

['F://My-Exploration//Lazypredict-Implementation//customer_query_model.pkl']