# Ticket Triage Exploration Notebook

The goal is to understand the data, and do a few experiments testing the use of LLMs for our eventual app

In [1]:
# Imports 

import pandas as pd # for dataframe handling, CSV reading, etc.
import requests     # for forming HTTP requests
import random       # random number generator

## Set up helper functions 

In [2]:
""" 
Helper functions
"""

def show_error(err_string: str):
    """
    Print an error message and stop execution
    """
    print(f"Error: {err_string}")
    SystemExit()


def load_data(csv_path: str):
    """
    Load support ticket data from a CSV file.
    
    This function reads a CSV file containing support tickets and returns it as a 
    pandas DataFrame (think of it as a table/spreadsheet in Python).
    
    Returns:
        pd.DataFrame: A table containing all the support tickets with columns like
                     subject, body, priority, language, etc.
        None: If the file can't be found or loaded
    
    Example CSV structure:
        subject        | body            | 
        ---------------|-----------------|
        Login issue    | Can't log in... | 
    """
    # Define the path to our CSV file (relative to where the script is run)
    
    try:
        # Try to read the CSV file into a DataFrame (table)
        df = pd.read_csv(csv_path)
        return df
    except FileNotFoundError:
        # If the file doesn't exist, show an error message to the user
        show_error(f"CSV file not found at {csv_path}")
        return None
    except Exception as e:
        # If any other error occurs, show the error details
        show_error(f"Error loading CSV: {str(e)}")
        return None
    


In [3]:
# Version 2 of llm provider call using SDK rather than requests:

# remenmber to pip install openai>=1.35 if you want to use the SDK version
from openai import OpenAI
import os
from dotenv import load_dotenv


load_dotenv()  # reads .env in the current working directory to get the groq api key


# Point the SDK at LLM provider

# If we're using LM Studio locally:
_client = OpenAI(
    base_url="http://192.168.2.2:1234/v1",
    api_key="lm-studio"  # any non-empty string works for LM Studio
)

# or use this in to call a model on groq:


#_client = OpenAI(
#    base_url="https://api.groq.com/openai/v1",
#    api_key=os.environ["GROQ_API_KEY"],) # must have a GROQ_API_KEY in the .env file

def call_llm_sdk(      system_content: str,                   
                       user_content: str,                   
                       model: str = "openai/gpt-oss-20b",
                       max_tokens: int = 2000,
                       temperature: float = 0.1,
                       reasoning_effort: str="low",
                       ) -> str:
    """
    Call LLM Provider via the OpenAI SDK (function call) instead of manual HTTP.

    Args:
        system_content: System prompt/instructions.
        user_content: User message.
        model: Model name exposed by provider - defaults to 'openai/gpt-oss-20b', make sure the provider actually has this model or you'll get an error.
        max_tokens: Max tokens to generate - defaults to 1200 tokens; generation will stop when it hits this limit, so be sure to specify sufficent size to allow a response.
        temperature: Sampling temperature - defaults to 0.1 which is meant to be more 'deterministic' and less 'creative'.

    Returns:
        The assistant's reply text. Stops on errors. More sophisticated error handling would be smart!
    """
    try:
        resp = _client.chat.completions.create(
            model=model,
            messages=[
                {"role": "system", "content": system_content},
                {"role": "user", "content": user_content},
            ],
            max_tokens=max_tokens,
            temperature=temperature,
            reasoning_effort=reasoning_effort,
        )
        return resp.choices[0].message.content
    except Exception as e:
        # keep your existing helper if you have it; otherwise raise
        try:
            show_error(f"Error calling LLM: {e}")
        except NameError:
            raise


In [7]:
""" 
Load the csv file into a pandas data frame
"""

csv_file = "../Week02/IT_Tickets/dataset-tickets-multi-lang_cleaned.csv"
df = load_data(csv_file)

# set the model we'll use in subsequent operations:
our_model = 'gpt-oss-20b'


In [8]:
df

Unnamed: 0,subject,body
0,Problema crítico del servidor requiere atenció...,Es necesaria una investigación inmediata sobre...
1,Anfrage zur Verfügbarkeit des Dell XPS 13 9310,"Sehr geehrter Kundenservice,\n\nich hoffe, die..."
2,Erro na Autocompletação de Código do IntelliJ ...,"Prezado Suporte ao Cliente <name>,\n\nEstou es..."
3,Urgent Assistance Required: AWS Service,"Dear IT Services Support Team, \n\nI am reachi..."
4,Problème d'affichage de MacBook Air,Cher équipe de support du magasin en ligne Tec...
...,...,...
3993,Problem mit der HP DeskJet 3755 WLAN-Verbindung,"Sehr geehrter Tech Online Store Support, mein ..."
3994,Problemas com a impressora HP DeskJet 3755,"Olá Suporte ao Cliente,\n\nEstou escrevendo pa..."
3995,Problema urgente con el envío,"Estimado equipo de atención al cliente, me dir..."
3996,,"Cher Service Client, nous rencontrons des prob..."


# Now for calling the LLMs

We have our helper functions set up, we've sort of connected to the LLMs, now it's time to put them to use.

Our first attempt will be to simply determine the language of the the Email.  Let's grab a random email from the data frame and display it; then we can use an LLM 
with a good prompt to get the language it is written in.

In [5]:
# get a random ticket
ticket_number = random.randint(0, len(df))
print(df.iloc[ticket_number])

subject    Dringend: Adressieren Sie das Problem mit dem ...
body       Sehr geehrter Kundenservice,\n\nIch wende mich...
Name: 1489, dtype: object


In [6]:
# ========== DETECT LANGUAGE ==========
# Prepare prompts for language detection
system_prompt = "You are a helpful assistant analyzing support tickets."

# Create a detailed prompt asking the AI to detect the language
# The triple quotes (###) help the AI understand boundaries
user_prompt = \
f"""Analyze the following support request email and return ONLY the name of the language it is written in, 
return one of the following languages:
German; English; French; Portuguese; Spanish; Unknown;

###
Subject: {df.iloc[ticket_number]['subject']}

Body: {df.iloc[ticket_number]['body']}
###

Your response should be simply one of [German, English, French, Portuguese, Spanish or Unknown], with no additional commentary or charachters; 
If there is more than one language present, choose the predominent one.
"""

# Call the AI to detect language
# ai_response = call_lm_studio_requests(system_prompt, user_prompt)

ai_response = call_llm_sdk(system_prompt, user_prompt, model=our_model, max_tokens=1200, temperature=0.1)

# Clean up the response and extract just the language code

language_code = ai_response.strip().lower() if ai_response else "NO RESPONSE"

print(user_prompt)
print("="*50)
print(f"Detected: {language_code}")


Analyze the following support request email and return ONLY the name of the language it is written in, 
return one of the following languages:
German; English; French; Portuguese; Spanish; Unknown;

###
Subject: Dringend: Adressieren Sie das Problem mit dem Hochprioritäts-Cisco-Router

Body: Sehr geehrter Kundenservice,

Ich wende mich an Sie, um ein wiederkehrendes Problem mit unserem Cisco Router ISR4331 zu melden, das häufige Netzwerkunterbrechungen verursacht. Dies ist in den letzten Wochen ein anhaltendes Problem gewesen und wirkt sich kritisch auf unsere täglichen Abläufe aus. Unser Unternehmen ist stark auf eine stabile und sichere Netzwerkverbindung angewiesen, und die aktuellen Störungen beeinträchtigen unsere Produktivität und Servicequalität.

Der betroffene Router, Modell ISR4331, ist ein wesentliches Element unserer Netzwerkstruktur und benötigt daher sofortige Aufmerksamkeit. Wir haben versucht, intern eine Fehlerbehebung durchzuführen, konnten jedoch die Ursache des Leis

In [9]:
# use the ticket number from the previous cell so we can work through the ticket fields
# we will use language_code from the prior cell in this prompt to help the LLM do a nice translation.

# ========== Translate the Subject ==========
# Prepare prompts for Subject translation
system_prompt = "You are a helpful assistant whose assigned the job of translating support tickets from the original language to English."

# Create a detailed prompt asking the AI to detect the language
# The triple quotes (###) help the AI understand boundaries
user_prompt = \
f"""Translate the following email subject line from {language_code} to English, while adapting it to American idioms and phrasing - 
your translation should faithfully match the meaning of the original: 

###
{df.iloc[ticket_number]['subject']}
###

your response should be simply be the English translation with no other information."""
                    
 
# Call the AI to detect language
ai_response = call_llm_sdk(system_prompt, user_prompt, model=our_model, reasoning_effort='medium')

# Clean up the response and extract just the language code
subject = ai_response.strip() if ai_response else "NO SUBJECT"

print(user_prompt)
print("="*50)
print(f"Subject Line: {subject}")

Translate the following email subject line from german to English, while adapting it to American idioms and phrasing - 
your translation should faithfully match the meaning of the original: 

###
Dringend: Adressieren Sie das Problem mit dem Hochprioritäts-Cisco-Router
###

your response should be simply be the English translation with no other information.
Subject Line: Urgent: Resolve the issue with the high‑priority Cisco router


In [10]:
# use the ticket number from the previous cell so we can work through the ticket fields
# we will use language_code from the prior cell in this prompt to help the LLM do a nice translation.

# ========== Translate the Email Body ==========
# Prepare prompts for Email Body translation
system_prompt = "You are a helpful assistant whose assigned the job of translating support tickets from the original language to English."

# Create a detailed prompt asking the AI to detect the language
# The triple quotes (###) help the AI understand boundaries
user_prompt = \
f"""Translate the following email body from {language_code} to English, while adapting it to American idioms and phrasing - 
your translation should faithfully match the meaning of the original: 

###
{df.iloc[ticket_number]['body']}
###

Your response should be simply be the English translation with no other information.
"""
                    
# Call the AI to detect language
ai_response = call_llm_sdk(system_prompt, user_prompt, model=our_model, reasoning_effort='medium')

# Clean up the response and extract just the language code
body = ai_response.strip() if ai_response else "NO BODY"

print(user_prompt)
print("="*50)
print(f"Email Body: {body}")

Translate the following email body from german to English, while adapting it to American idioms and phrasing - 
your translation should faithfully match the meaning of the original: 

###
Sehr geehrter Kundenservice,

Ich wende mich an Sie, um ein wiederkehrendes Problem mit unserem Cisco Router ISR4331 zu melden, das häufige Netzwerkunterbrechungen verursacht. Dies ist in den letzten Wochen ein anhaltendes Problem gewesen und wirkt sich kritisch auf unsere täglichen Abläufe aus. Unser Unternehmen ist stark auf eine stabile und sichere Netzwerkverbindung angewiesen, und die aktuellen Störungen beeinträchtigen unsere Produktivität und Servicequalität.

Der betroffene Router, Modell ISR4331, ist ein wesentliches Element unserer Netzwerkstruktur und benötigt daher sofortige Aufmerksamkeit. Wir haben versucht, intern eine Fehlerbehebung durchzuführen, konnten jedoch die Ursache des Leistungsproblems nicht identifizieren. Mögliche Ursachen, die wir vermuten, sind ein Hardwarefehler oder ein

In [11]:
# HOMEWORK 1
# Add a cell to classify tickets into 4 Types: Incident, Request, Change, Problem - you'll have to craft a prompt that explains to the LLM what thse mean (or maybe not - you can try just asking first)

# use the ticket number from the previous cell so we can work through the ticket fields

# ========== Use the translated Email Subject and Body ==========
# Prepare prompts for Email Body translation
system_prompt = "You are a helpful assistant whose assigned the job of classifying support tickets."

# Create a detailed prompt asking the AI to determine the category of the ticket
# The triple quotes (###) help the AI understand boundaries
user_prompt = \
f"""Cattegorize the following email into one of the following categories: Incident, Request, Change, or Problem: 

###
Subject: {subject}
Body: {body}
###

Your response should be simply be one of Incident, Request, Change, or Problem with no other information.
"""
                    
# Call the AI to determine the category
ai_response = call_llm_sdk(system_prompt, user_prompt, model=our_model, reasoning_effort='high')

# Clean up the response and extract just the category
category = ai_response.strip() if ai_response else "NO CATEGORY"

print(user_prompt)
print("="*50)
print(f"Category: {category}")

Cattegorize the following email into one of the following categories: Incident, Request, Change, or Problem: 

###
Subject: Urgent: Resolve the issue with the high‑priority Cisco router
Body: Dear Customer Support,

I’m writing to report a recurring issue with our Cisco ISR4331 router that’s been causing frequent network outages. This problem has persisted over the past few weeks and is critically impacting our day‑to‑day operations. Our company relies heavily on a stable, secure network connection, and these current disruptions are affecting both productivity and service quality.

The affected router—model ISR4331—is a critical component of our network infrastructure and therefore requires immediate attention. We’ve attempted internal troubleshooting but couldn’t pinpoint the root cause of the performance issue. Possible causes we suspect include a hardware fault or a software configuration error, though we don’t rule out other potential problems that may need your expert assessment.


In [12]:
# Homework 2
# Add a cell to determine what departmental queue the ticket should be routed to: Billing & Payments, Customer Service, General Inquiry, Human Resources, 
# IT Support, Product Support, Returns & Exchanges, Sales and Pre-Sales, Service Outages, Technical Support
# Remember the concept of one-shot/multi-shot: You could craft a long prompt that gives an example of an email and assignment for each of these categories... 
# Examples are a GREAT way to show an LLM what you want.

# use the ticket number from the previous cell so we can work through the ticket fields
# ========== Use the translated Email Subject and Body ==========
# Prepare prompts for Email Body translation
system_prompt = "You are a helpful assistant whose assigned the job of routing support tickets to the correct department."

# Create a detailed prompt asking the AI to find the proper department to route the ticket to
# The triple quotes (###) help the AI understand boundaries
user_prompt = \
f"""Determine the correct department to route the following email. The available departments are:
Billing & Payments, Customer Service, General Inquiry, Human Resources, 
IT Support, Product Support, Returns & Exchanges, Sales and Pre-Sales, 
Service Outages, Technical Support. 

###
Subject: {subject}
Body: {body}
###

Your response should be simply be one of Billing & Payments, Customer Service, General Inquiry, Human Resources, 
IT Support, Product Support, Returns & Exchanges, Sales and Pre-Sales, 
Service Outages, Technical Support with no other information.
"""
                    
# Call the AI to determine the proper department to route to
ai_response = call_llm_sdk(system_prompt, user_prompt, model=our_model, reasoning_effort='high')

# Clean up the response and extract just the department
department = ai_response.strip() if ai_response else "NO DEPARTMENT"

print(user_prompt)
print("="*50)
print(f"Routing: {department}")

Determine the correct department to route the following email. The available departments are:
Billing & Payments, Customer Service, General Inquiry, Human Resources, 
IT Support, Product Support, Returns & Exchanges, Sales and Pre-Sales, 
Service Outages, Technical Support. 

###
Subject: Urgent: Resolve the issue with the high‑priority Cisco router
Body: Dear Customer Support,

I’m writing to report a recurring issue with our Cisco ISR4331 router that’s been causing frequent network outages. This problem has persisted over the past few weeks and is critically impacting our day‑to‑day operations. Our company relies heavily on a stable, secure network connection, and these current disruptions are affecting both productivity and service quality.

The affected router—model ISR4331—is a critical component of our network infrastructure and therefore requires immediate attention. We’ve attempted internal troubleshooting but couldn’t pinpoint the root cause of the performance issue. Possible 

In [17]:
# Homework 3
# Add a cell to determine the priority of the ticket: P1, P2, P3 – read the rules for the priorities and use those rules in your 
# prompt to have the LLM assign the correct priority.
# Again, you can explain the priority levels to the LLM and/or you can provide examples in the prompt. 
# I sketched in the prioritization rules below from the slides

# use the ticket number from the previous cell so we can work through the ticket fields

# ========== Use the translated Email Subject and Body ==========
# Prepare prompts for Email Body translation
system_prompt = "You are a helpful assistant who is assigned the job of determining the priority of support tickets so they can be handled in the proper order."

# Create a detailed prompt asking the AI to determine the ticket's priority
# The triple quotes (###) help the AI understand boundaries
user_prompt = \
f"""Determine the correct prioritization for the ticket subject and body accoring to the rules below; the possible priorities are P1, P2 or P3:
Rules for Priority Assignment

P1 - Critical
Security or privacy incident: data breach, ransomware, malware outbreak, phishing success, compromised account, stolen device with sensitive data.
Payment/revenue blocking: checkout/payment API down, failed customer transactions at scale.
Company-wide or regional outage: SSO, VPN, email, network, authentication unavailable.
Safety/legal risk: regulatory or compliance breach, urgent legal exposure.
High impact blocking: department/region/companywide/external customers cannot work, and urgency is blocking or deadline today.

P2 - Major
Medium impact blocking or deadline: a team is blocked, or a single user is blocked with a hard deadline.
Degraded shared service: email delays, slow VPN, partial outage affecting multiple users.
Single user blocking: cannot log in, device will not boot, locked account (no explicit deadline).

P3 - Minor
Informational requests: “how do I…”, “please provide access,” feature requests.
Cosmetic issues, minor bugs, or general inquiries.
Non-blocking tickets with no deadline.

Tie-Breakers
If multiple rules match, select the highest priority (P1 > P2 > P3).
If signals conflict, assign the lower (safer) priority and reduce confidence.

###
Determine the priority of the following support request:

Subject: {subject}
Body: {body}
### 

Your response should be simply be one of P1, P2 or P2 according to the rules. 
"""
                    
# Call the AI to detect the priority
ai_response = call_llm_sdk(system_prompt, user_prompt, model="gpt-oss-20b", reasoning_effort='high')

# Clean up the response and extract just the priority code
priority = ai_response.strip() if ai_response else "NO PRIORITY"

print(user_prompt)
print("="*50)
print(f"Priority: {priority}")

Determine the correct prioritization for the ticket subject and body accoring to the rules below; the possible priorities are P1, P2 or P3:
Rules for Priority Assignment

P1 - Critical
Security or privacy incident: data breach, ransomware, malware outbreak, phishing success, compromised account, stolen device with sensitive data.
Payment/revenue blocking: checkout/payment API down, failed customer transactions at scale.
Company-wide or regional outage: SSO, VPN, email, network, authentication unavailable.
Safety/legal risk: regulatory or compliance breach, urgent legal exposure.
High impact blocking: department/region/companywide/external customers cannot work, and urgency is blocking or deadline today.

P2 - Major
Medium impact blocking or deadline: a team is blocked, or a single user is blocked with a hard deadline.
Degraded shared service: email delays, slow VPN, partial outage affecting multiple users.
Single user blocking: cannot log in, device will not boot, locked account (no ex

In [18]:
# Homework 4
# Add a ”final output” that wraps all of the fields (translated subject, translated body, incident type, queue and priority) 
# and readies it to send on to the appropriate queue... 
# If your primary language is not English, try having the LLM translate this final bundle into your language.  
# How was the quality of the translation?  
# If the model isn’t doing a great job, search for a model that’s more tuned for your language (it would also have to know the source languages of course).   

import json                                                                                                    

# Create a dictionary with the ticket information                                                              
ticket_data = {                                                                                               
  "ticket": ticket_number,                                                                               
  "language": language_code,                                                                             
  "subject": subject,                                                                                     
  "email_body": body,                                                                                      
  "category": category,                                                                                   
  "queue": department,                                                                                     
  "priority": priority                                                                                     
}                                                                                                             

# Convert to JSON string with nice formatting                                                                  
json_output = json.dumps(ticket_data, indent=2, ensure_ascii=False)                                          

# Print the formatted JSON                                                                                
print(json_output)                                                                                       

{
  "ticket": 1489,
  "language": "german",
  "subject": "Urgent: Resolve the issue with the high‑priority Cisco router",
  "email_body": "Dear Customer Support,\n\nI’m writing to report a recurring issue with our Cisco ISR4331 router that’s been causing frequent network outages. This problem has persisted over the past few weeks and is critically impacting our day‑to‑day operations. Our company relies heavily on a stable, secure network connection, and these current disruptions are affecting both productivity and service quality.\n\nThe affected router—model ISR4331—is a critical component of our network infrastructure and therefore requires immediate attention. We’ve attempted internal troubleshooting but couldn’t pinpoint the root cause of the performance issue. Possible causes we suspect include a hardware fault or a software configuration error, though we don’t rule out other potential problems that may need your expert assessment.\n\nOur service agreement with your company has al

In [None]:
# Homework 5
# Success criteria:  
# How long does it take YOU to read a ticket and assign these items?  How long does it take the LLM?  
# What’s the speedup?  
# How is the accuracy? 
# Can you find an example where you feel the LLM did a poor job of assigning an Incident type, Queue or Priority?  
# Can you fix it by adjusting your prompt?
