<a href="https://colab.research.google.com/github/rapp2043/customer_service_sentiment_analysis/blob/main/Sentiment_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Retrieve Classification Scores

In [None]:
# Add this line before applying the sentiment analysis
!python -m textblob.download_corpora

import pandas as pd
from textblob import TextBlob

# Load dataset
df = pd.read_csv("Customer_Feedback_Dataset.csv")

# Function to analyze sentiment sentence by sentence
def get_sentence_sentiments(text):
    blob = TextBlob(text)
    sentiments = [sentence.sentiment.polarity for sentence in blob.sentences]
    return sentiments  # List of sentiment scores for each sentence

# Apply sentiment analysis
df["sentence_sentiments"] = df["feedback"].apply(get_sentence_sentiments)

# Save results
df.to_csv("customer_feedback_with_sentence_sentiment.csv", index=False)

# Display sample results
print(df.head())

[nltk_data] Downloading package brown to /root/nltk_data...
[nltk_data]   Package brown is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger_eng is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package conll2000 to /root/nltk_data...
[nltk_data]   Package conll2000 is already up-to-date!
[nltk_data] Downloading package movie_reviews to /root/nltk_data...
[nltk_data]   Package movie_reviews is already up-to-date!
Finished.
  order_number                                           feedback  score  \
0    ORD-83634  The experience was average. It works as descri...      7   
1    ORD-69012  Very dissatisfied. It didn't meet m

## Assign Sentiment Classifications

In [None]:
import pandas as pd
from textblob import TextBlob

# Load dataset
df = pd.read_csv("Customer_Feedback_Dataset.csv")

# Function to analyze sentiment sentence by sentence
def get_sentence_sentiments(text):
    blob = TextBlob(text)
    sentiments = [sentence.sentiment.polarity for sentence in blob.sentences]
    classifications = [
        "Positive" if score > 0 else "Negative" if score < 0 else "Neutral"
        for score in sentiments
    ]
    return sentiments, classifications  # Returning both lists

# Apply sentiment analysis
df[["sentence_sentiments", "sentiment_classifications"]] = df["feedback"].apply(
    lambda text: pd.Series(get_sentence_sentiments(text))
)

# Save results
df.to_csv("customer_feedback_with_sentiment_classifications.csv", index=False)

# Display sample results
print(df.head())


  order_number                                           feedback  score  \
0    ORD-83634  The experience was average. It works as descri...      7   
1    ORD-69012  Very dissatisfied. It didn't meet my needs. Th...      2   
2    ORD-54946  Delivery was on time, but packaging could be b...      5   
3    ORD-30642            The order arrived late and was damaged.      1   
4    ORD-26775  I'm disappointed with the product quality. I w...      3   

               sentence_sentiments  \
0              [-0.15, 0.2, -0.15]   
1  [0.2, 0.0, -0.3, 0.0, 0.2, 0.0]   
2                            [0.5]   
3                           [-0.3]   
4                     [-0.75, 0.0]   

                           sentiment_classifications  
0                     [Negative, Positive, Negative]  
1  [Positive, Neutral, Negative, Neutral, Positiv...  
2                                         [Positive]  
3                                         [Negative]  
4                                [Negat

## Assign Feedback Keywords

In [None]:
# Install KeyBERT
!pip install keybert

import pandas as pd
from keybert import KeyBERT

# Load dataset with sentiment classifications
df = pd.read_csv("customer_feedback_with_sentiment_classifications.csv")

# Initialize KeyBERT model
kw_model = KeyBERT()

# Function to extract keywords
def extract_keywords(text):
    keywords = kw_model.extract_keywords(text, keyphrase_ngram_range=(1,2), stop_words="english")
    return [kw[0] for kw in keywords]  # Extract only keywords, ignoring scores

# Apply keyword extraction
df["keywords"] = df["feedback"].apply(extract_keywords)

# Group sentences by sentiment classification
grouped_keywords = df.groupby("sentiment_classifications")["keywords"].sum()

# Save results
df.to_csv("customer_feedback_with_keywords.csv", index=False)

# Display sample output
print(grouped_keywords)

Collecting keybert
  Downloading keybert-0.9.0-py3-none-any.whl.metadata (15 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers>=0.3.8->keybert)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers>=0.3.8->keybert)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers>=0.3.8->keybert)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.11.0->sentence-transformers>=0.3.8->keybert)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.11.0->sentence-transformers>=0.3.8->keybert)
  Downloading nvi

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

sentiment_classifications
['Negative', 'Negative', 'Negative', 'Negative']               [product quality, quality, quality order, disa...
['Negative', 'Negative', 'Negative']                           [product quality, quality order, quality, disa...
['Negative', 'Negative', 'Neutral', 'Neutral']                 [quality, product quality, quality won, order ...
['Negative', 'Negative', 'Neutral']                            [quality disappointed, product quality, disapp...
['Negative', 'Negative', 'Positive']                           [packaging better, packaging, average delivery...
                                                                                     ...                        
['Positive', 'Positive', 'Positive', 'Positive', 'Neutral']    [quality support, satisfied quality, support s...
['Positive', 'Positive', 'Positive', 'Positive']               [packaging better, delivery time, time packagi...
['Positive', 'Positive', 'Positive']                           [featur

## Assign Categories Based on Feedback

In [None]:
import pandas as pd

# Example categories and their related keywords
category_keywords = {
    "Billing Issues": ["invoice", "charge", "refund", "payment"],
    "Product Quality": ["defective", "broken", "quality", "damaged"],
    "Support Experience": ["rude", "helpful", "support", "customer service"],
    "Delivery Concerns": ["delay", "shipping", "late", "lost"]
}

# Load dataset from the file containing the keywords column
df = pd.read_csv("customer_feedback_with_keywords.csv")

# Function to assign categories based on keywords
def assign_category(keywords):
    # Ensure keywords is a list, as it's loaded from CSV and might be a string representation
    if isinstance(keywords, str):
        # Attempt to evaluate the string as a list
        try:
            keywords = eval(keywords)
        except (SyntaxError, NameError):
            # If eval fails, treat it as an empty list or handle as needed
            return "Other" # Or handle the error appropriately

    # Ensure keywords is iterable before checking 'in'
    if not isinstance(keywords, (list, tuple)):
        return "Other" # Or handle the error appropriately

    for category, words in category_keywords.items():
        # Ensure each word is lower case for case-insensitive matching
        if any(word.lower() in [kw.lower() for kw in keywords] for word in words):
            return category
    return "Other"  # Default category for unmatched feedback

# Apply categorization to the 'feedback' column
df["categorized_feedback"] = df["keywords"].apply(assign_category)

# Save results
df.to_csv("customer_feedback_categorized.csv", index=False)

# Display sample output
print(df.head())

  order_number                                           feedback  score  \
0    ORD-83634  The experience was average. It works as descri...      7   
1    ORD-69012  Very dissatisfied. It didn't meet my needs. Th...      2   
2    ORD-54946  Delivery was on time, but packaging could be b...      5   
3    ORD-30642            The order arrived late and was damaged.      1   
4    ORD-26775  I'm disappointed with the product quality. I w...      3   

               sentence_sentiments  \
0              [-0.15, 0.2, -0.15]   
1  [0.2, 0.0, -0.3, 0.0, 0.2, 0.0]   
2                            [0.5]   
3                           [-0.3]   
4                     [-0.75, 0.0]   

                           sentiment_classifications  \
0               ['Negative', 'Positive', 'Negative']   
1  ['Positive', 'Neutral', 'Negative', 'Neutral',...   
2                                       ['Positive']   
3                                       ['Negative']   
4                            ['Neg