In [9]:
import pandas as pd
import numpy as np
import re

import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [20]:
df = pd.read_csv("customer_support_tickets.csv")
df.head()


Unnamed: 0,Ticket ID,Customer Name,Customer Email,Customer Age,Customer Gender,Product Purchased,Date of Purchase,Ticket Type,Ticket Subject,Ticket Description,Ticket Status,Resolution,Ticket Priority,Ticket Channel,First Response Time,Time to Resolution,Customer Satisfaction Rating
0,1,Marisa Obrien,carrollallison@example.com,32,Other,GoPro Hero,2021-03-22,Technical issue,Product setup,I'm having an issue with the {product_purchase...,Pending Customer Response,,Critical,Social media,2023-06-01 12:15:36,,
1,2,Jessica Rios,clarkeashley@example.com,42,Female,LG Smart TV,2021-05-22,Technical issue,Peripheral compatibility,I'm having an issue with the {product_purchase...,Pending Customer Response,,Critical,Chat,2023-06-01 16:45:38,,
2,3,Christopher Robbins,gonzalestracy@example.com,48,Other,Dell XPS,2020-07-14,Technical issue,Network problem,I'm facing a problem with my {product_purchase...,Closed,Case maybe show recently my computer follow.,Low,Social media,2023-06-01 11:14:38,2023-06-01 18:05:38,3.0
3,4,Christina Dillon,bradleyolson@example.org,27,Female,Microsoft Office,2020-11-13,Billing inquiry,Account access,I'm having an issue with the {product_purchase...,Closed,Try capital clearly never color toward story.,Low,Social media,2023-06-01 07:29:40,2023-06-01 01:57:40,3.0
4,5,Alexander Carroll,bradleymark@example.com,67,Female,Autodesk AutoCAD,2020-02-04,Billing inquiry,Data loss,I'm having an issue with the {product_purchase...,Closed,West decision evidence bit.,Low,Email,2023-06-01 00:12:42,2023-06-01 19:53:42,1.0


In [11]:
df = df.rename(columns={
    'Ticket Description': 'ticket_text',
    'Ticket Type': 'category'
})

df = df[['ticket_text', 'category']]

print("\nUsing columns:")
print(df.head())


Using columns:
                                         ticket_text         category
0  I'm having an issue with the {product_purchase...  Technical issue
1  I'm having an issue with the {product_purchase...  Technical issue
2  I'm facing a problem with my {product_purchase...  Technical issue
3  I'm having an issue with the {product_purchase...  Billing inquiry
4  I'm having an issue with the {product_purchase...  Billing inquiry


In [12]:
stop_words = set(stopwords.words('english'))

def clean_text(text):
    text = str(text).lower()
    text = re.sub(r'[^a-z\s]', '', text)
    words = text.split()
    words = [w for w in words if w not in stop_words]
    return " ".join(words)

df['clean_text'] = df['ticket_text'].apply(clean_text)

print("\nCleaned Text Sample:")
print(df[['ticket_text', 'clean_text']].head())


Cleaned Text Sample:
                                         ticket_text  \
0  I'm having an issue with the {product_purchase...   
1  I'm having an issue with the {product_purchase...   
2  I'm facing a problem with my {product_purchase...   
3  I'm having an issue with the {product_purchase...   
4  I'm having an issue with the {product_purchase...   

                                          clean_text  
0  im issue productpurchased please assist billin...  
1  im issue productpurchased please assist need c...  
2  im facing problem productpurchased productpurc...  
3  im issue productpurchased please assist proble...  
4  im issue productpurchased please assist note s...  


In [13]:
tfidf = TfidfVectorizer(max_features=5000)

X = tfidf.fit_transform(df['clean_text'])
y = df['category']


In [14]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [15]:
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)


In [16]:
y_pred = model.predict(X_test)

print("\nModel Evaluation Results:")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


Model Evaluation Results:
Accuracy: 0.19657615112160567

Classification Report:
                       precision    recall  f1-score   support

     Billing inquiry       0.19      0.13      0.16       357
Cancellation request       0.18      0.18      0.18       327
     Product inquiry       0.18      0.19      0.19       316
      Refund request       0.20      0.23      0.22       345
     Technical issue       0.22      0.25      0.23       349

            accuracy                           0.20      1694
           macro avg       0.19      0.20      0.19      1694
        weighted avg       0.20      0.20      0.19      1694

Confusion Matrix:
 [[47 72 70 91 77]
 [43 59 70 79 76]
 [51 55 59 73 78]
 [61 65 61 81 77]
 [46 77 61 78 87]]


In [17]:
def assign_priority(category):
    category = category.lower()
    if category in ['technical issue', 'billing issue', 'payment issue', 'payment failure']:
        return 'High'
    elif category in ['account issue', 'login issue', 'account problem']:
        return 'Medium'
    else:
        return 'Low'

# Apply model predictions + priority
df['predicted_category'] = model.predict(X)
df['priority'] = df['predicted_category'].apply(assign_priority)

print("\nSample Final Output:")
print(df.head())



Sample Final Output:
                                         ticket_text         category  \
0  I'm having an issue with the {product_purchase...  Technical issue   
1  I'm having an issue with the {product_purchase...  Technical issue   
2  I'm facing a problem with my {product_purchase...  Technical issue   
3  I'm having an issue with the {product_purchase...  Billing inquiry   
4  I'm having an issue with the {product_purchase...  Billing inquiry   

                                          clean_text    predicted_category  \
0  im issue productpurchased please assist billin...       Technical issue   
1  im issue productpurchased please assist need c...       Technical issue   
2  im facing problem productpurchased productpurc...  Cancellation request   
3  im issue productpurchased please assist proble...       Billing inquiry   
4  im issue productpurchased please assist note s...       Billing inquiry   

  priority  
0     High  
1     High  
2      Low  
3      Low  
4    

In [18]:
new_ticket = "Payment failed but the amount was deducted from my bank account"

cleaned_ticket = clean_text(new_ticket)
vector = tfidf.transform([cleaned_ticket])

predicted_category = model.predict(vector)[0]
predicted_priority = assign_priority(predicted_category)

print("\nNew Ticket Example:")
print("Ticket:", new_ticket)
print("Predicted Category:", predicted_category)
print("Assigned Priority:", predicted_priority)


New Ticket Example:
Ticket: Payment failed but the amount was deducted from my bank account
Predicted Category: Refund request
Assigned Priority: Low
