In [24]:
import numpy as np 
import pandas as pd

In [25]:
df = pd.read_csv('../data/customer_support_tickets.csv')
df.head()

Unnamed: 0,Ticket_ID,Customer_Name,Customer_Email,Ticket_Subject,message,category,priority,Ticket_Channel,Submission_Date,Resolution_Time_Hours,Assigned_Agent,Satisfaction_Score
0,TKT-100000,George Simon,lisastrickland@example.com,Hours of operation - Individual,hi support is your headquarters located lay so...,General Inquiry,High,Web Form,2025-07-02,43,David Kim,5
1,TKT-100001,Scott Thompson,wevans@example.org,Data not syncing - Card,hi support the application crashes every time ...,Technical,High,Chat,2025-06-28,41,Elena Rodriguez,5
2,TKT-100002,Jennifer Smith,oleonard@example.net,2FA issues - Question,hi support how do i upgrade to the enterprise ...,Account,High,Web Form,2025-02-05,7,Anya Sharma,5
3,TKT-100003,Rachel Bullock,katherine67@example.net,Login failed - Let,hi support the dashboard is not loading any da...,Account,Low,Web Form,2025-03-20,41,Anya Sharma,5
4,TKT-100004,Thomas Parks DDS,raykelsey@example.com,Refund status - Attention,hi support i have been trying to update my pay...,Billing,Medium,Email,2025-04-27,40,David Kim,5


In [26]:
df.columns

Index(['Ticket_ID', 'Customer_Name', 'Customer_Email', 'Ticket_Subject',
       'message', 'category', 'priority', 'Ticket_Channel', 'Submission_Date',
       'Resolution_Time_Hours', 'Assigned_Agent', 'Satisfaction_Score'],
      dtype='object')

In [27]:
df.rename(columns={
    "Ticket_Description": "message",
    "Issue_Category": "category",
    "Priority_Level": "priority"
}, inplace=True)
df[['message','category','priority']].head()

Unnamed: 0,message,category,priority
0,hi support is your headquarters located lay so...,General Inquiry,High
1,hi support the application crashes every time ...,Technical,High
2,hi support how do i upgrade to the enterprise ...,Account,High
3,hi support the dashboard is not loading any da...,Account,Low
4,hi support i have been trying to update my pay...,Billing,Medium


In [28]:
df[['message','category','priority']].isnull().sum()

message     0
category    0
priority    0
dtype: int64

In [29]:
df.dropna(subset=["message","category","priority"], inplace=True)

In [30]:
df["message"] = df["message"].str.lower()
df["message"] = df["message"].str.replace(r"[^\w\s]", "", regex=True)

In [31]:
df.drop_duplicates(subset=["message"], inplace=True)

In [32]:
print(df.shape)

(20000, 12)


In [33]:
df['category'].value_counts()

category
Technical          5924
Billing            5038
Account            4068
General Inquiry    3920
Fraud              1050
Name: count, dtype: int64

In [34]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

In [35]:
X = df['message']
y = df['category']

In [36]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [37]:
vectorizer = TfidfVectorizer(max_features=5000)

X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

In [38]:
model = LogisticRegression(max_iter=1000)
model.fit(X_train_vec, y_train)

In [39]:
y_pred = model.predict(X_test_vec)

In [40]:
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.69
Classification Report:
                  precision    recall  f1-score   support

        Account       0.66      0.67      0.66       792
        Billing       0.72      0.71      0.71      1024
          Fraud       0.61      0.58      0.59       201
General Inquiry       0.68      0.68      0.68       817
      Technical       0.71      0.71      0.71      1166

       accuracy                           0.69      4000
      macro avg       0.67      0.67      0.67      4000
   weighted avg       0.69      0.69      0.69      4000



In [41]:
sample = ["my payment was deducted twice"]
sample_vec = vectorizer.transform(sample)

print(model.predict(sample_vec))

['Billing']


In [42]:
templates = pd.read_csv("../data/reply_templates.csv")
templates.head()

Unnamed: 0,category,priority,reply
0,Billing,High,We sincerely apologize for the billing issue. ...
1,Billing,Medium,Our billing team is reviewing your request and...
2,Billing,Low,Thank you for contacting billing support. We w...
3,Technical,High,Our technical team is actively working to reso...
4,Technical,Medium,We are investigating the technical issue and w...


In [43]:
def generate_reply(category, priority):
    matches = templates[
        (templates["category"] == category) &
        (templates["priority"] == priority)
    ]

    if len(matches) == 0:
        return "Thank you for contacting support. We will respond shortly."

    return matches.sample(1)["reply"].values[0]

In [44]:
generate_reply("Billing", "High")

'We sincerely apologize for the billing issue. Our billing team is reviewing your case urgently and will resolve it shortly.'

In [45]:
y_priority = df["priority"]

X_train_p, X_test_p, y_train_p, y_test_p = train_test_split(
    X,
    y_priority,
    test_size=0.2,
    random_state=42
)

X_train_vec_p = vectorizer.transform(X_train_p)
X_test_vec_p = vectorizer.transform(X_test_p)

priority_model = LogisticRegression(max_iter=1000)
priority_model.fit(X_train_vec_p, y_train_p)


In [46]:
msg = ["payment deducted twice"]

vec = vectorizer.transform(msg)

pred_category = model.predict(vec)[0]
pred_priority = priority_model.predict(vec)[0]

reply = generate_reply(pred_category, pred_priority)

print("Predicted Category:", pred_category)
print("Predicted Priority:", pred_priority)
print("Generated Reply:", reply)

Predicted Category: Billing
Predicted Priority: Low
Generated Reply: Thank you for contacting billing support. We will look into your request shortly.


In [48]:
import pickle

pickle.dump(model, open("../app/category_model.pkl", "wb"))
pickle.dump(priority_model, open("../app/priority_model.pkl", "wb"))
pickle.dump(vectorizer, open("../app/vectorizer.pkl", "wb"))