In [1]:
import pandas as pd

df = pd.read_csv('../data/raw_tickets.csv')
df.head()

Unnamed: 0,ticket_id,complaint_text,date
0,T1001,Order not delivered. No updates for 5 days,2025-06-01
1,T1002,Payment deducted but no confirmation. Please h...,2025-06-01
2,T1003,Product damaged on arrival. Need a replacement...,2025-06-02
3,T1004,Still waiting for refund. It's been 10 days!,2025-06-03
4,T1005,Received wrong item. Want full refund immediately,2025-06-03


In [2]:
import re


complaint_keywords = {
    'delivery_delay': r'\b(delay|no updates|not delivered|not arrived|late|no sign)\b',
    'payment_issue': r'\b(payment|charged|deducted|confirmation)\b',
    'damaged': r'\b(damaged|broken|not working)\b',
    'refund': r'\b(refund|money back|credited)\b',
    'wrong_item': r'\b(wrong item|not ordered|incorrect item)\b',
    'invoice': r'\b(invoice|bill|receipt|tax)\b'
}


urgency_keywords = r'\b(asap|immediately|quickly|please help|urgent|no updates)\b'


def extract_complaint_type(text):
    types = []
    for category, pattern in complaint_keywords.items():
        if re.search(pattern, text, re.IGNORECASE):
            types.append(category)
    return ', '.join(types) if types else 'general_inquiry'


def is_urgent(text):
    return bool(re.search(urgency_keywords, text, re.IGNORECASE))


df['complaint_type'] = df['complaint_text'].apply(extract_complaint_type)
df['is_urgent'] = df['complaint_text'].apply(is_urgent)


df[['ticket_id', 'complaint_text', 'complaint_type', 'is_urgent']]

Unnamed: 0,ticket_id,complaint_text,complaint_type,is_urgent
0,T1001,Order not delivered. No updates for 5 days,delivery_delay,True
1,T1002,Payment deducted but no confirmation. Please h...,payment_issue,True
2,T1003,Product damaged on arrival. Need a replacement...,damaged,True
3,T1004,Still waiting for refund. It's been 10 days!,refund,False
4,T1005,Received wrong item. Want full refund immediately,"refund, wrong_item",True
5,T1006,Item was supposed to arrive on Monday. No sign...,delivery_delay,False
6,T1007,Charged twice for same item. Please fix this q...,payment_issue,True
7,T1008,No invoice received. Need for tax claim,invoice,False
8,T1009,Refund processed but amount not credited yet,refund,False
9,T1010,Delay in shipping. Tracking info not working,"delivery_delay, damaged",False


In [3]:
df.to_csv('../output/cleaned_complaints.csv', index=False)