In [2]:
import zipfile
import os

# Path to your zip file
zip_path = "customer-support-ticket-dataset.zip"

# Extract here
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall("support_ticket_data")

print("Extracted files:", os.listdir("support_ticket_data"))

Extracted files: ['customer_support_tickets.csv']


In [5]:
import os

# List everything inside the extracted folder
print(os.listdir("support_ticket_data"))


['customer_support_tickets.csv']


In [6]:
import pandas as pd

csv_path = "support_ticket_data/customer_support_tickets.csv"

df = pd.read_csv(csv_path)
print(df.head())

# Save a working copy for Task 5
df.to_csv("support_tickets.csv", index=False)
print("Dataset saved as support_tickets.csv")


   Ticket ID        Customer Name              Customer Email  Customer Age  \
0          1        Marisa Obrien  carrollallison@example.com            32   
1          2         Jessica Rios    clarkeashley@example.com            42   
2          3  Christopher Robbins   gonzalestracy@example.com            48   
3          4     Christina Dillon    bradleyolson@example.org            27   
4          5    Alexander Carroll     bradleymark@example.com            67   

  Customer Gender Product Purchased Date of Purchase      Ticket Type  \
0           Other        GoPro Hero       2021-03-22  Technical issue   
1          Female       LG Smart TV       2021-05-22  Technical issue   
2           Other          Dell XPS       2020-07-14  Technical issue   
3          Female  Microsoft Office       2020-11-13  Billing inquiry   
4          Female  Autodesk AutoCAD       2020-02-04  Billing inquiry   

             Ticket Subject  \
0             Product setup   
1  Peripheral compatibil

In [8]:
print(df.columns)


Index(['Ticket ID', 'Customer Name', 'Customer Email', 'Customer Age',
       'Customer Gender', 'Product Purchased', 'Date of Purchase',
       'Ticket Type', 'Ticket Subject', 'Ticket Description', 'Ticket Status',
       'Resolution', 'Ticket Priority', 'Ticket Channel',
       'First Response Time', 'Time to Resolution',
       'Customer Satisfaction Rating'],
      dtype='object')


In [None]:
# Step 1: Install dependencies (only first time)
!pip install transformers datasets accelerate scikit-learn

# Step 2: Import libraries
import pandas as pd
from transformers import pipeline

# Step 3: Load dataset (from your extracted folder)
csv_path = "support_ticket_data/customer_support_tickets.csv"
df = pd.read_csv(csv_path)
print("Dataset Sample:\n", df.head())

# Step 4: Save a working copy
df.to_csv("support_tickets.csv", index=False)
print("\n Dataset saved as support_tickets.csv")

# Step 5: Define candidate labels
candidate_labels = ["Technical Issue", "Billing", "Authentication", "General Inquiry"]

# Step 6: Zero-Shot Classification with BART MNLI
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

def get_tags(text):
    result = classifier(text, candidate_labels, multi_label=True)
    tags = result['labels'][:3]   # top 3 labels
    scores = result['scores'][:3] # top 3 scores
    return list(zip(tags, scores))

# Apply on Ticket Description column
df["predicted_tags"] = df["Ticket Description"].apply(get_tags)

# Step 7: Show predictions
print("\nPredictions (Zero-Shot):\n")
print(df[["Ticket Description", "predicted_tags"]].head())

# Step 8: (Optional) Create a single predicted label (top-1)
df["predicted_label"] = df["predicted_tags"].apply(lambda x: x[0][0])

#  Note: No ground-truth labels available in this dataset
print("\n No 'category' column found → skipping accuracy/F1 evaluation")

# Step 9: Save results
df.to_csv("ticket_predictions.csv", index=False)
print("\n Predictions saved as ticket_predictions.csv")




Dataset Sample:
    Ticket ID        Customer Name              Customer Email  Customer Age  \
0          1        Marisa Obrien  carrollallison@example.com            32   
1          2         Jessica Rios    clarkeashley@example.com            42   
2          3  Christopher Robbins   gonzalestracy@example.com            48   
3          4     Christina Dillon    bradleyolson@example.org            27   
4          5    Alexander Carroll     bradleymark@example.com            67   

  Customer Gender Product Purchased Date of Purchase      Ticket Type  \
0           Other        GoPro Hero       2021-03-22  Technical issue   
1          Female       LG Smart TV       2021-05-22  Technical issue   
2           Other          Dell XPS       2020-07-14  Technical issue   
3          Female  Microsoft Office       2020-11-13  Billing inquiry   
4          Female  Autodesk AutoCAD       2020-02-04  Billing inquiry   

             Ticket Subject  \
0             Product setup   
1  Peri

Device set to use cpu
