In [1]:
import numpy as np
import pandas as pd
import nltk
import re
import joblib
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

In [2]:
# Download stopwords
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\user\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping corpora\stopwords.zip.


True

In [3]:
# Load dataset
df = pd.read_csv("customer_queries.csv")
df.dropna(inplace=True)  # Drop missing values

In [4]:
print(df.columns)

Index(['flags', 'instruction', 'category', 'intent', 'response'], dtype='object')


In [5]:
# Function to clean text
def clean_text(text):
    text = text.lower()  # Convert to lowercase
    text = re.sub(r'\W', ' ', text)  # Remove special characters
    text = re.sub(r'\s+', ' ', text)  # Remove extra spaces
    text = ' '.join([word for word in text.split() if word not in stopwords.words('english')])  # Remove stopwords
    return text

df['clean_query'] = df['instruction'].apply(clean_text)

In [7]:
# Encode 'category' column
label_encoder = LabelEncoder()
df['category_encoded'] = label_encoder.fit_transform(df['category'])


In [8]:
# Split data
X = df['clean_query']  # Use cleaned queries
y = df['category_encoded']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("Data Preprocessing Completed!")

Data Preprocessing Completed!


In [9]:
# ✅ Fix: Fit TfidfVectorizer Before Saving
vectorizer = TfidfVectorizer()
X_train_tfidf = vectorizer.fit_transform(X_train)  # ✅ **Now properly fitted**
X_test_tfidf = vectorizer.transform(X_test)


In [11]:
# Train classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train_tfidf, y_train)

In [12]:
# Evaluate model
y_pred = model.predict(X_test_tfidf)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")

Model Accuracy: 1.00


In [13]:
# ✅ Save the properly fitted models
joblib.dump(model, "query_classifier.pkl")
joblib.dump(vectorizer, "tfidf_vectorizer.pkl")  # ✅ **Fitted vectorizer**
joblib.dump(label_encoder, "label_encoder.pkl")

['label_encoder.pkl']

In [None]:
# # ✅ **Fix: Load Dataframe in generate_response Function**
# def generate_response(user_query):
#     # Load saved models
#     model = joblib.load("query_classifier.pkl")
#     vectorizer = joblib.load("tfidf_vectorizer.pkl")
#     label_encoder = joblib.load("label_encoder.pkl")

#     # Load dataset for response lookup
#     df = pd.read_csv("customer_queries.csv")

#     # Clean user input
#     cleaned_query = clean_text(user_query)

#     # Transform input query
#     query_tfidf = vectorizer.transform([cleaned_query])

#     # Predict category
#     predicted_category_encoded = model.predict(query_tfidf)[0]
#     predicted_category = label_encoder.inverse_transform([predicted_category_encoded])[0]

#     # Retrieve response
#     response_row = df[df['category'] == predicted_category]['response']
#     response = response_row.values[0] if not response_row.empty else "No predefined response available."

#     return predicted_category, response

# # Test Example
# user_input = "Why is my account suspended?"
# category, response = generate_response(user_input)
# print(f"Predicted Category: {category}")
# print(f"Response: {response}")


Predicted Category: ACCOUNT
Response: It's fantastic to hear that you would like to create a new {{Account Type}} account for your wife! I'm here to assist you in every step of the process. To get started, could you please provide me with your wife's full name, email address, and a username of her preference? This will allow us to set up the account accurately and ensure her privacy and security. Once we have these details, we can proceed with creating her {{Account Type}} account and unlocking all its benefits. If you have any questions or need further assistance, please don't hesitate to let me know. How are you feeling about this exciting opportunity for your wife to join us as a {{Account Type}} member?
