In [1]:
import pandas as pd
import mindsdb_sdk as mdb

In [90]:
LLM_MODEL_NAME = "Qwen/Qwen3-235B-A22B-Thinking-2507"
EMBEDDING_MODEL_NAME = "Qwen/Qwen3-Embedding-8B"
NEBIUS_BASE_URL = "https://api.studio.nebius.com/v1/"
NEBIUS_API_KEY = "..."

## Preprocess Data and Upload to MindsDB Files db

Load Dataset

In [None]:
df = pd.read_csv(r"data/dataset-tickets-multi-lang3-4k.csv")
df.head()

Unnamed: 0,subject,body,answer,type,queue,priority,language,business_type,tag_1,tag_2,tag_3,tag_4,tag_5,tag_6,tag_7,tag_8,tag_9
0,Problema crítico del servidor requiere atenció...,Es necesaria una investigación inmediata sobre...,Estamos investigando urgentemente el problema ...,Incident,Technical Support,high,es,IT Services,Urgent Issue,Service Disruption,Incident Report,Service Recovery,System Maintenance,,,,
1,Anfrage zur Verfügbarkeit des Dell XPS 13 9310,"Sehr geehrter Kundenservice,\n\nich hoffe, die...","Sehr geehrter <name>,\n\nvielen Dank, dass Sie...",Request,Customer Service,low,de,Tech Online Store,Sales Inquiry,Product Support,Customer Service,Order Issue,Returns and Exchanges,,,,
2,Erro na Autocompletação de Código do IntelliJ ...,"Prezado Suporte ao Cliente <name>,\n\nEstou es...","Prezado <name>,\n\nObrigado por entrar em cont...",Incident,Technical Support,high,pt,IT Services,Technical Support,Software Bug,Problem Resolution,Urgent Issue,IT Support,,,,
3,Urgent Assistance Required: AWS Service,"Dear IT Services Support Team, \n\nI am reachi...","Dear <name>,\n\nThank you for reaching out reg...",Request,IT Support,high,en,IT Services,IT Support,Urgent Issue,Service Notification,Cloud Services,Problem Resolution,Technical Guidance,Performance Tuning,,
4,Problème d'affichage de MacBook Air,Cher équipe de support du magasin en ligne Tec...,"Cher <name>,\n\nMerci de nous avoir contactés ...",Incident,Product Support,low,fr,Tech Online Store,Technical Support,Product Support,Hardware Failure,Service Recovery,Routine Request,,,,


In [11]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4000 entries, 0 to 3999
Data columns (total 17 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   subject        3533 non-null   object 
 1   body           3999 non-null   object 
 2   answer         4000 non-null   object 
 3   type           4000 non-null   object 
 4   queue          4000 non-null   object 
 5   priority       4000 non-null   object 
 6   language       4000 non-null   object 
 7   business_type  4000 non-null   object 
 8   tag_1          4000 non-null   object 
 9   tag_2          4000 non-null   object 
 10  tag_3          4000 non-null   object 
 11  tag_4          3999 non-null   object 
 12  tag_5          3363 non-null   object 
 13  tag_6          2181 non-null   object 
 14  tag_7          1045 non-null   object 
 15  tag_8          269 non-null    object 
 16  tag_9          0 non-null      float64
dtypes: float64(1), object(16)
memory usage: 531.4+ KB


Preprocess

In [15]:
df = df.loc[
    df["language"] == "en",
    [
        "subject",
        "body",
        "answer",
        "priority",
        "type",
        "queue",
        "tag_1",
        "tag_2",
    ],
]

df = df.rename(columns={"queue": "category"})

df.head()

Unnamed: 0,subject,body,answer,priority,type,category,tag_1,tag_2
3,Urgent Assistance Required: AWS Service,"Dear IT Services Support Team, \n\nI am reachi...","Dear <name>,\n\nThank you for reaching out reg...",high,Request,IT Support,IT Support,Urgent Issue
5,Urgent: Issue with Zoom Screen Sharing Feature,"Dear Customer Support,\n\nI am experiencing a ...","Dear <name>,\n\nThank you for reaching out. We...",high,Incident,Technical Support,Technical Support,Urgent Issue
7,Service outage resolution requested for ongoin...,"Dear IT Services Customer Support,\n\nI am wri...","Dear <name>,\n\nThank you for reaching out to ...",high,Change,Service Outages and Maintenance,Service Outage,IT Support
8,Surface Pro 7 Issue,"Dear Tech Online Store Support Team,\n\nI hope...","Dear <name>,\n\nThank you for reaching out to ...",medium,Incident,Product Support,Technical Support,Product Support
10,Request for software development consultation ...,"Dear Customer Support,\n\nI hope this message ...","Dear <name>,\n\nThank you for reaching out reg...",medium,Request,Technical Support,IT Support,Technical Guidance


Remove rows with missing body

In [19]:
df = df.dropna(subset=["body", "subject"])

In [20]:
df.shape

(1219, 8)

Add id column

In [37]:
df.reset_index(drop=True, inplace=True)
df["id"] = df.index
df.head()

Unnamed: 0,subject,body,answer,priority,type,category,tag_1,tag_2,id
0,Urgent Assistance Required: AWS Service,"Dear IT Services Support Team, \n\nI am reachi...","Dear <name>,\n\nThank you for reaching out reg...",high,Request,IT Support,IT Support,Urgent Issue,0
1,Urgent: Issue with Zoom Screen Sharing Feature,"Dear Customer Support,\n\nI am experiencing a ...","Dear <name>,\n\nThank you for reaching out. We...",high,Incident,Technical Support,Technical Support,Urgent Issue,1
2,Service outage resolution requested for ongoin...,"Dear IT Services Customer Support,\n\nI am wri...","Dear <name>,\n\nThank you for reaching out to ...",high,Change,Service Outages and Maintenance,Service Outage,IT Support,2
3,Surface Pro 7 Issue,"Dear Tech Online Store Support Team,\n\nI hope...","Dear <name>,\n\nThank you for reaching out to ...",medium,Incident,Product Support,Technical Support,Product Support,3
4,Request for software development consultation ...,"Dear Customer Support,\n\nI hope this message ...","Dear <name>,\n\nThank you for reaching out reg...",medium,Request,Technical Support,IT Support,Technical Guidance,4


Select 600 records as our sample

In [39]:
df_sample = df.sample(n=600, random_state=42)

In [40]:
df_sample.info()

<class 'pandas.core.frame.DataFrame'>
Index: 600 entries, 542 to 694
Data columns (total 9 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   subject   600 non-null    object
 1   body      600 non-null    object
 2   answer    600 non-null    object
 3   priority  600 non-null    object
 4   type      600 non-null    object
 5   category  600 non-null    object
 6   tag_1     600 non-null    object
 7   tag_2     600 non-null    object
 8   id        600 non-null    int64 
dtypes: int64(1), object(8)
memory usage: 46.9+ KB


In [53]:
for key in ["type", "priority", "category", "tag_1", "tag_2"]:
    print(key)
    print(",".join(df_sample[key].unique().tolist()))

type
Incident,Request,Problem,Change
priority
high,medium,low
category
Technical Support,Returns and Exchanges,Customer Service,Product Support,Billing and Payments,Sales and Pre-Sales,IT Support,Service Outages and Maintenance,General Inquiry,Human Resources
tag_1
Urgent Issue,IT Support,Technical Support,Returns and Exchanges,Billing Issue,Payment Processing,Sales Inquiry,Service Disruption,Software Bug,Customer Feedback,Account Security,Service Outage,Product Support,Network Issue,Customer Service,Performance Tuning,Warranty Claim,Technical Guidance,Data Breach,Incident Report,Consulting Services,Account Assistance,Login Issue,General Inquiry
tag_2
Technical Support,Urgent Issue,Billing Issue,Product Support,IT Support,Network Issue,Service Disruption,Performance Tuning,Payment Processing,Incident Report,Refund Request,Customer Service,Technical Guidance,Problem Resolution,Account Assistance,Software Bug,Returns and Exchanges,System Crash,System Maintenance,Sales Inquiry,Order Issue

Upload the dataset to MindsDB

In [23]:
server = mdb.connect()
files = server.get_database("files")

In [41]:
files.create_table("tickets", df_sample)

Table(files.tickets)

In [42]:
files.get_table("tickets").fetch().head()

Unnamed: 0,subject,body,answer,priority,type,category,tag_1,tag_2,id
0,Urgent Assistance Required: Zoom Issue,"Greetings Customer Support Team,\n\nI trust th...","Dear <name>,\n\nWe appreciate you reaching out...",high,Incident,Technical Support,Urgent Issue,Technical Support,542
1,Urgent Assistance Needed for AWS Service,"Dear IT Services Support,\n\nI am reaching out...","Dear <name>,\n\nThank you for reaching out. To...",high,Request,Technical Support,IT Support,Urgent Issue,259
2,Assistance Required: AWS Cost Optimization and...,"Dear Customer Support Team, \n\nI hope this me...","Dear <name>,\n\nThank you for reaching out to ...",high,Request,Technical Support,Technical Support,Billing Issue,43
3,Urgent Exchange Needed for Defective MacBook,"Dear Tech Online Store Customer Support Team,\...","Dear <name>,\n\nThank you for reaching out to ...",high,Problem,Returns and Exchanges,Returns and Exchanges,Product Support,1009
4,Urgent Change Request for Consulting Service I...,"Dear IT Services Customer Support Team, \n\nI ...","Dear <name>,\n\nThank you for reaching out wit...",high,Change,Customer Service,Urgent Issue,Technical Support,753


## Save ticket to tickets table

In [62]:
def get_max_id():
    query = "SELECT MAX(id) as max_id FROM files.tickets"
    result = server.query(query).fetch()
    return result["max_id"][0]


get_max_id()

np.int64(1216)

In [86]:
def save_ticket(
    subject,
    body,
    answer,
    priority,
    type,
    category,
    tag_1,
    tag_2,
):
    max_id = get_max_id()
    new_id = max_id + 1 if max_id is not None else 0

    tickets_table = files.get_table("tickets")
    tickets_table.insert(
        pd.DataFrame(
            {
                "id": [new_id],
                "subject": [subject],
                "body": [body],
                "answer": [answer],
                "priority": [priority],
                "type": [type],
                "category": [category],
                "tag_1": [tag_1],
                "tag_2": [tag_2],
            }
        )
    )

    return new_id

In [73]:
def fetch_ticket(id: int):
    fetch_query = f"""
    SELECT * FROM files.tickets
    WHERE id = {id}
    """

    return server.query(fetch_query).fetch().iloc[0].to_dict()

## Create a Knowledge Base from the tickets

In [45]:
server.knowledge_bases.create(
    "tickets_kb",
    embedding_model={
        "provider": "openai",
        "model_name": EMBEDDING_MODEL_NAME,
        "base_url": NEBIUS_BASE_URL,
        "api_key": NEBIUS_API_KEY,
    },
    content_columns=["subject", "body", "answer"],
    metadata_columns=["type", "priority", "category", "tag_1", "tag_2"],
    id_column="id",
)

KnowledgeBase(mindsdb.tickets_kb)

In [None]:
kb = server.knowledge_bases.get("tickets_kb")
kb.insert_files(["tickets"])

In [48]:
def search_kb(
    content: str,
    filters: dict = None,  # pyright: ignore[reportArgumentType]
    top_k: int = 3,
):
    if filters:
        conditions = [f"content = '{content}'"]

        for key in ["type", "priority", "category", "tag_1", "tag_2"]:
            if filters.get(key):
                conditions.append(f"{key} = '{filters[key]}'")

        where_clause = " AND ".join(conditions)

        query = f"SELECT * FROM tickets_kb WHERE {where_clause}"
    else:
        query = f"SELECT * FROM tickets_kb WHERE content = '{content}'"

    return server.query(query).fetch()[:top_k]


search_kb("Dead pixels on screen", top_k=2)

Unnamed: 0,id,chunk_id,chunk_content,distance,relevance,tag_2,priority,type,category,tag_1,metadata
0,622,622:subject:1of1:0to23,Smart TV Screen Flicker,0.29901,0.769817,Technical Support,high,Incident,Product Support,Product Support,"{'_chunk_index': 0, '_content_column': 'subjec..."
1,864,864:subject:1of1:0to47,Display Flickering Issue on Dell XPS 13 Product,0.349539,0.740994,Product Support,high,Incident,Product Support,Technical Support,"{'_chunk_index': 0, '_content_column': 'subjec..."


## Create Agents for Classification and Support

Classifier Agent

In [54]:
classification_prompt = """
You are a support ticket classification model.
Given a customer's ticket, predict:

type - Incident,Request,Problem,Change
priority - high,medium,low
category - Technical Support,Returns and Exchanges,Customer Service,Product Support,Billing and Payments,Sales and Pre-Sales,IT Support,Service Outages and Maintenance,General Inquiry,Human Resources
tag_1 - Urgent Issue,IT Support,Technical Support,Returns and Exchanges,Billing Issue,Payment Processing,Sales Inquiry,Service Disruption,Software Bug,Customer Feedback,Account Security,Service Outage,Product Support,Network Issue,Customer Service,Performance Tuning,Warranty Claim,Technical Guidance,Data Breach,Incident Report,Consulting Services,Account Assistance,Login Issue,General Inquiry
tag_2 - Technical Support,Urgent Issue,Billing Issue,Product Support,IT Support,Network Issue,Service Disruption,Performance Tuning,Payment Processing,Incident Report,Refund Request,Customer Service,Technical Guidance,Problem Resolution,Account Assistance,Software Bug,Returns and Exchanges,System Crash,System Maintenance,Sales Inquiry,Order Issue,Hardware Failure,Login Issue,Service Outage,Cloud Services,Product Replacement,Feature Request,Account Security,Service Recovery,Data Breach,Shipping Inquiry,Warranty Claim

Output strictly in parsable JSON:
{
  "type": "...",
  "category": "...",
  "priority": "...",
  "tag_1": "...",
  "tag_2": "..."
}
"""

server.agents.create(
    name="ticket_classifier",
    model={
        "provider": "openai",
        "model_name": LLM_MODEL_NAME,
        "api_key": NEBIUS_API_KEY,
        "base_url": NEBIUS_BASE_URL,
    },
    data={"knowledge_base": "tickets_kb"},
    prompt_template=classification_prompt,
)

Agent(name: ticket_classifier)

In [None]:
import json


def classify_ticket(subject: str, body: str):
    text = f"Subject: {subject}\nBody: {body}"

    agent = server.agents.get("ticket_classifier")
    response = agent.completion([{"question": text, "answer": None}])
    return json.loads(response.content)


{'type': 'Incident',
 'category': 'Technical Support',
 'priority': 'high',
 'tag_1': 'Login Issue',
 'tag_2': 'Account Assistance'}

Support Agent

In [57]:
support_agent_prompt = """
You are a customer support AI agent.

Use the knowledge base (support_kb) to answer customer queries.
Retrieve relevant entries using subject, body, and existing resolutions.

### Rules
1. Reference similar past issues for accurate answers.
2. Maintain professional, concise, friendly tone.
3. Never expose system details or internal metadata.
4. If insufficient context, ask clarifying questions.

### Output
A helpful, human-like response ready to send to the customer.
"""

server.agents.create(
    name="support_agent",
    model={
        "provider": "openai",
        "model_name": LLM_MODEL_NAME,
        "api_key": NEBIUS_API_KEY,
        "base_url": NEBIUS_BASE_URL,
    },
    data={"knowledge_base": "tickets_kb"},
    prompt_template=support_agent_prompt,
)

Agent(name: support_agent)

In [None]:
def chat(
    message: str,
    message_history: list[dict] = None,  # pyright: ignore[reportArgumentType]
):
    agent = server.agents.get("support_agent")

    if message_history:
        response = agent.completion(
            messages=message_history + [{"question": message, "answer": None}]
        )
    else:
        response = agent.completion([{"question": message, "answer": None}])
    return response.content

In [77]:
subject = "Unable to access account"
body = "I am having trouble logging into my account. It says my password is incorrect, but I am sure it's right. Please help!"

In [76]:
answer = chat(f"Subject: {subject}\nBody: {body}")
answer

'I\'m sorry to hear you\'re having trouble accessing your account—that\'s frustrating! Since you\'re certain your password is correct, let\'s try these quick steps:  \n\n1️⃣ **Use the "Forgot Password" link** on the login page (this sends a secure reset link to your email).  \n2️⃣ **Check for typos**—ensure Caps Lock is off and you\'re using the correct keyboard layout.  \n3️⃣ **Clear your browser cache** or try logging in via incognito mode.  \n\nIf these don’t work, reply with:  \n- The email associated with your account  \n- Any error messages you see (e.g., "Invalid credentials")  \n\nI’ll help you resolve this right away! 🔒'

In [78]:
metadata = classify_ticket(subject, body)
metadata

{'type': 'Incident',
 'category': 'Technical Support',
 'priority': 'high',
 'tag_1': 'Login Issue',
 'tag_2': 'Account Assistance'}

## Demo

In [87]:
id_ = save_ticket(
    subject,
    body,
    answer,
    metadata["priority"],
    metadata["type"],
    metadata["category"],
    metadata["tag_1"],
    metadata["tag_2"],
)
id_

np.int64(1217)

In [88]:
fetch_ticket(id_)

{'subject': 'Unable to access account',
 'body': "I am having trouble logging into my account. It says my password is incorrect, but I am sure it's right. Please help!",
 'answer': 'I\'m sorry to hear you\'re having trouble accessing your account—that\'s frustrating! Since you\'re certain your password is correct, let\'s try these quick steps:  \\n\\n1️⃣ **Use the "Forgot Password" link** on the login page (this sends a secure reset link to your email).  \\n2️⃣ **Check for typos**—ensure Caps Lock is off and you\'re using the correct keyboard layout.  \\n3️⃣ **Clear your browser cache** or try logging in via incognito mode.  \\n\\nIf these don’t work, reply with:  \\n- The email associated with your account  \\n- Any error messages you see (e.g., "Invalid credentials")  \\n\\nI’ll help you resolve this right away! 🔒',
 'priority': 'high',
 'type': 'Incident',
 'category': 'Technical Support',
 'tag_1': 'Login Issue',
 'tag_2': 'Account Assistance',
 'id': 1217}