In [3]:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from textblob import TextBlob
import pandas as pd

# Download necessary NLTK data
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('punkt_tab')

# Define keywords for categories
CATEGORY_KEYWORDS = {
    "Billing": ["bill", "charge", "payment", "fee", "invoice"],
    "Product": ["product", "item", "quality", "defect", "damage"],
    "Service": ["service", "support", "help", "delay", "response"],
    "Delivery": ["delivery", "shipping", "late", "delay", "courier"],
}

# Preprocessing function
def preprocess_text(text):
    stop_words = set(stopwords.words('english'))
    tokens = word_tokenize(text.lower())
    filtered_tokens = [word for word in tokens if word.isalnum() and word not in stop_words]
    return filtered_tokens

# Keyword-based classification
def classify_by_keywords(text):
    tokens = preprocess_text(text)
    category_counts = {category: 0 for category in CATEGORY_KEYWORDS}

    for category, keywords in CATEGORY_KEYWORDS.items():
        for token in tokens:
            if token in keywords:
                category_counts[category] += 1

    # Determine the category with the highest count
    category = max(category_counts, key=category_counts.get)
    return category if category_counts[category] > 0 else "Uncategorized"

# Sentiment analysis
def analyze_sentiment(text):
    analysis = TextBlob(text)
    sentiment_score = analysis.sentiment.polarity

    if sentiment_score > 0:
        return "Positive"
    elif sentiment_score < 0:
        return "Negative"
    else:
        return "Neutral"

# Classify complaints
def classify_complaints(data):
    results = []

    for complaint in data:
        category = classify_by_keywords(complaint)
        sentiment = analyze_sentiment(complaint)
        results.append({"Complaint": complaint, "Category": category, "Sentiment": sentiment})

    return pd.DataFrame(results)

# Example usage
if __name__ == "__main__":
    complaints = [
        "I was overcharged on my bill this month.",
        "The product I received was defective and damaged.",
        "The delivery was delayed by two weeks.",
        "Customer service did not respond to my query.",
        "The quality of the item was excellent!",
    ]

    classified_complaints = classify_complaints(complaints)
    print(classified_complaints)


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


                                           Complaint  Category Sentiment
0           I was overcharged on my bill this month.   Billing   Neutral
1  The product I received was defective and damaged.   Product   Neutral
2             The delivery was delayed by two weeks.  Delivery   Neutral
3      Customer service did not respond to my query.   Service   Neutral
4             The quality of the item was excellent!   Product  Positive
