In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv("customer_support_tickets.csv")
df.head(3)

Unnamed: 0,Ticket ID,Customer Name,Customer Email,Customer Age,Customer Gender,Product Purchased,Date of Purchase,Ticket Type,Ticket Subject,Ticket Description,Ticket Status,Resolution,Ticket Priority,Ticket Channel,First Response Time,Time to Resolution,Customer Satisfaction Rating
0,1,Marisa Obrien,carrollallison@example.com,32,Other,GoPro Hero,2021-03-22,Technical issue,Product setup,I'm having an issue with the {product_purchase...,Pending Customer Response,,Critical,Social media,2023-06-01 12:15:36,,
1,2,Jessica Rios,clarkeashley@example.com,42,Female,LG Smart TV,2021-05-22,Technical issue,Peripheral compatibility,I'm having an issue with the {product_purchase...,Pending Customer Response,,Critical,Chat,2023-06-01 16:45:38,,
2,3,Christopher Robbins,gonzalestracy@example.com,48,Other,Dell XPS,2020-07-14,Technical issue,Network problem,I'm facing a problem with my {product_purchase...,Closed,Case maybe show recently my computer follow.,Low,Social media,2023-06-01 11:14:38,2023-06-01 18:05:38,3.0


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8469 entries, 0 to 8468
Data columns (total 17 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   Ticket ID                     8469 non-null   int64  
 1   Customer Name                 8469 non-null   object 
 2   Customer Email                8469 non-null   object 
 3   Customer Age                  8469 non-null   int64  
 4   Customer Gender               8469 non-null   object 
 5   Product Purchased             8469 non-null   object 
 6   Date of Purchase              8469 non-null   object 
 7   Ticket Type                   8469 non-null   object 
 8   Ticket Subject                8469 non-null   object 
 9   Ticket Description            8469 non-null   object 
 10  Ticket Status                 8469 non-null   object 
 11  Resolution                    2769 non-null   object 
 12  Ticket Priority               8469 non-null   object 
 13  Tic

In [4]:
df.isna().sum()

Ticket ID                          0
Customer Name                      0
Customer Email                     0
Customer Age                       0
Customer Gender                    0
Product Purchased                  0
Date of Purchase                   0
Ticket Type                        0
Ticket Subject                     0
Ticket Description                 0
Ticket Status                      0
Resolution                      5700
Ticket Priority                    0
Ticket Channel                     0
First Response Time             2819
Time to Resolution              5700
Customer Satisfaction Rating    5700
dtype: int64

In [5]:
df.shape

(8469, 17)

In [10]:
df["Ticket Type"].value_counts().index.to_list()

['Refund request',
 'Technical issue',
 'Cancellation request',
 'Product inquiry',
 'Billing inquiry']

In [11]:
df["Ticket Subject"].value_counts().index.to_list()

['Refund request',
 'Software bug',
 'Product compatibility',
 'Delivery problem',
 'Hardware issue',
 'Battery life',
 'Network problem',
 'Installation support',
 'Product setup',
 'Payment issue',
 'Product recommendation',
 'Account access',
 'Peripheral compatibility',
 'Data loss',
 'Cancellation request',
 'Display issue']

In [8]:
df[df["Resolution"].notna()]["Resolution"]

2            Case maybe show recently my computer follow.
3           Try capital clearly never color toward story.
4                             West decision evidence bit.
10                  Measure tonight surface feel forward.
11      Measure there house management pick knowledge ...
                              ...                        
8452                    Interesting show must successful.
8453                              Skill expect admit and.
8455                    Say position key appear behavior.
8466                Eight account century nature kitchen.
8467                                We seat culture plan.
Name: Resolution, Length: 2769, dtype: object

In [9]:
df["Ticket Channel"].value_counts()

Ticket Channel
Email           2143
Phone           2132
Social media    2121
Chat            2073
Name: count, dtype: int64

In [10]:
for i in df[df["Resolution"].notna()].groupby("Ticket Channel").get_group("Chat")["Ticket Description"].index[10:20]:
    print("Order: #", i)
    print(f"Product: {df[df["Resolution"].notna()].groupby("Ticket Channel").get_group("Chat")["Product Purchased"][i]}")
    print(f"Ticket Type: {df[df["Resolution"].notna()].groupby("Ticket Channel").get_group("Chat")["Ticket Type"][i]}")
    print(f"Ticket Type: {df[df["Resolution"].notna()].groupby("Ticket Channel").get_group("Chat")["Ticket Subject"][i]}")
    print("Description: ")
    print(df[df["Resolution"].notna()].groupby("Ticket Channel").get_group("Chat")["Ticket Description"][i])
    print("Resoultion: ")
    print(df[df["Resolution"].notna()].groupby("Ticket Channel").get_group("Chat")["Resolution"][i])
    print("-"*30)

Order: # 69
Product: Canon DSLR Camera
Ticket Type: Refund request
Ticket Type: Refund request
Description: 
I'm unable to access my {product_purchased} account. It keeps displaying an 'Invalid Credentials' error, even though I'm using the correct login information. How can I regain access to my account? If you don't I'm concerned about the security of my {product_purchased} and would like to ensure that my data is safe.
Resoultion: 
Answer story series imagine discover.
------------------------------
Order: # 72
Product: Dyson Vacuum Cleaner
Ticket Type: Billing inquiry
Ticket Type: Display issue
Description: 
I'm having an issue with the {product_purchased}. Please assist. When I first noticed {product_purchased} the new price of the book was too high. I've already sent out the refund request, but they will I've checked for software updates, and my {product_purchased} is already running the latest version.
Resoultion: 
Executive wear gun child.
------------------------------
Order: #

In [11]:
df["context"] = (
    df["Ticket Type"] + " " +
    df["Ticket Subject"] + " " +
    df["Ticket Description"] + " " +
    df["Product Purchased"]
)

In [12]:
import pandas as pd
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [13]:
stop_words = set(stopwords.words('english'))

def preprocess(text):
    tokens = word_tokenize(str(text).lower())
    tokens = [t for t in tokens if t.isalpha() and t not in stop_words]
    return " ".join(tokens)

df["processed_context"] = df["context"].apply(preprocess)

In [14]:
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(df["processed_context"])

In [15]:
def chatbot_response(user_query):
    processed_query = preprocess(user_query)
    user_vec = vectorizer.transform([processed_query])
    similarities = cosine_similarity(user_vec, tfidf_matrix)
    idx = similarities.argmax()
    best_score = similarities[0, idx]
    if best_score < 0.2:
        return "Sorry, I couldn't find an answer to your question."
    return (
        f"Product: {df.iloc[idx]['Product Purchased']}\n"
        f"Ticket Type: {df.iloc[idx]['Ticket Type']}\n"
        f"Ticket Subject: {df.iloc[idx]['Ticket Subject']}\n"
        f"Description: {df.iloc[idx]['Ticket Description']}"
    )

In [16]:
df["Product Purchased"].value_counts()

Product Purchased
Canon EOS                         240
GoPro Hero                        228
Nest Thermostat                   225
Amazon Echo                       221
Philips Hue Lights                221
LG Smart TV                       219
Sony Xperia                       217
Roomba Robot Vacuum               216
Apple AirPods                     213
LG OLED                           213
iPhone                            212
Sony 4K HDR TV                    210
LG Washing Machine                208
Garmin Forerunner                 208
Canon DSLR Camera                 206
Nikon D                           204
Nintendo Switch Pro Controller    203
Google Pixel                      203
Fitbit Charge                     202
Sony PlayStation                  202
Microsoft Office                  200
HP Pavilion                       200
Amazon Kindle                     198
Dyson Vacuum Cleaner              198
Google Nest                       198
Bose SoundLink Speaker          