# Ticket RAG System

Production RAG pipeline: Load → Embed → Store → Retrieve → Re-rank → Generate → Evaluate

In [77]:
import os
import json
import pandas as pd
import numpy as np
from typing import List, Dict, Tuple
from datetime import datetime

from sentence_transformers import SentenceTransformer, CrossEncoder
import chromadb
from openai import OpenAI
from dotenv import load_dotenv
from IPython.display import display, Markdown

# Force reload secrets from Week06 folder (override existing env vars)
secrets_path = './secrets.env'
load_dotenv(secrets_path, override=True)
print("Libraries loaded")
print(f"Secrets file: {secrets_path}")
hf_token = os.getenv('HF_TOKEN')
if hf_token:
    print(f"HF_TOKEN loaded")
else:
    print("HF_TOKEN: Not found")

Libraries loaded
Secrets file: ./secrets.env
HF_TOKEN loaded


In [88]:
CONFIG = {
    'csv_path': './dataset-tickets-multi-lang3-4k-translated-all.csv',
    'chroma_db_path': './chroma_ticket_db',
    'train_test_split': 0.8,
    'random_seed': 42,
    'embedding_model': 'all-MiniLM-L6-v2',
    'reranker_model': 'mixedbread-ai/mxbai-rerank-base-v1',
    'embedding_fields': ['subject_english', 'body_english', 'answer_english'],
    'metadata_fields': ['type', 'queue', 'priority', 'business_type', 'original_language'],
    'top_k_initial': 20,
    'top_k_reranked': 5,
    'rag_mode': 'strict',  # 'strict' = context-only, 'augmented' = context + LLM knowledge
    'lm_studio_url': 'http://192.168.7.171:1234',
    'llm_model': 'gpt-oss-20b',
    'temperature': 0.2,
    'max_tokens': 6000,
    'collection_name': 'ticket_rag_collection',
}

print("Configuration loaded")
print(f"Initial retrieval: {CONFIG['top_k_initial']} tickets")
print(f"After re-ranking: {CONFIG['top_k_reranked']} tickets")
print(f"RAG Mode: {CONFIG['rag_mode'].upper()}")

Configuration loaded
Initial retrieval: 20 tickets
After re-ranking: 5 tickets
RAG Mode: STRICT


In [90]:
print("Loading embedding model...")
embedder = SentenceTransformer(CONFIG['embedding_model'])
print(f"Loaded: {CONFIG['embedding_model']}")

print("\nAuthenticating with HuggingFace...")
hf_token = os.getenv('HF_TOKEN')
if hf_token:
    from huggingface_hub import login
    login(token=hf_token)
    print("HuggingFace authentication successful")
else:
    print("Warning: HF_TOKEN not found in secrets.env")

print("\nLoading re-ranker model...")
reranker = CrossEncoder(CONFIG['reranker_model'])
print(f"Loaded: {CONFIG['reranker_model']}")

print("\nInitializing ChromaDB...")
chroma_client = chromadb.PersistentClient(path=CONFIG['chroma_db_path'])
print("ChromaDB ready")

print("\nConnecting to LM Studio...")
client = OpenAI(
    base_url=f"{CONFIG['lm_studio_url']}/v1",
    api_key="lm-studio"
)
print("LM Studio connected")

Loading embedding model...


Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


Loaded: all-MiniLM-L6-v2

Authenticating with HuggingFace...
HuggingFace authentication successful

Loading re-ranker model...
Loaded: mixedbread-ai/mxbai-rerank-base-v1

Initializing ChromaDB...
ChromaDB ready

Connecting to LM Studio...
LM Studio connected


In [91]:
def load_and_split_data(csv_path: str, train_ratio: float, random_seed: int) -> Tuple[pd.DataFrame, pd.DataFrame]:
    df = pd.read_csv(csv_path)
    df = df.dropna(subset=['subject_english', 'body_english', 'answer_english'])
    df = df[(df['subject_english'].str.strip() != '') & 
            (df['body_english'].str.strip() != '') & 
            (df['answer_english'].str.strip() != '')]
    
    df_shuffled = df.sample(frac=1, random_state=random_seed).reset_index(drop=True)
    split_idx = int(len(df_shuffled) * train_ratio)
    return df_shuffled[:split_idx].reset_index(drop=True), df_shuffled[split_idx:].reset_index(drop=True)

def create_combined_text(row: pd.Series, fields: List[str]) -> str:
    texts = []
    for field in fields:
        if pd.notna(row.get(field)):
            texts.append(f"{field.capitalize()}: {row[field]}")
    return "\n".join(texts)

def embed_tickets(df: pd.DataFrame, embedding_fields: List[str]) -> List[List[float]]:
    combined_texts = [create_combined_text(row, embedding_fields) for _, row in df.iterrows()]
    embeddings = embedder.encode(combined_texts, show_progress_bar=True)
    return embeddings.tolist()

def prepare_metadata(row: pd.Series, metadata_fields: List[str]) -> Dict:
    metadata = {}
    for field in metadata_fields:
        value = row.get(field)
        metadata[field] = str(value) if pd.notna(value) else "unknown"
    return metadata

def load_vector_db(
    df: pd.DataFrame,
    collection_name: str,
    embedding_fields: List[str],
    metadata_fields: List[str]
) -> chromadb.Collection:
    try:
        chroma_client.delete_collection(name=collection_name)
    except:
        pass
    
    collection = chroma_client.create_collection(
        name=collection_name,
        metadata={"hnsw:space": "cosine"}
    )
    
    embeddings = embed_tickets(df, embedding_fields)
    ids = [f"ticket_{i}" for i in range(len(df))]
    documents = [create_combined_text(row, embedding_fields) for _, row in df.iterrows()]
    metadatas = [prepare_metadata(row, metadata_fields) for _, row in df.iterrows()]
    
    batch_size = 1000
    for i in range(0, len(df), batch_size):
        end_idx = min(i + batch_size, len(df))
        collection.add(
            embeddings=embeddings[i:end_idx],
            documents=documents[i:end_idx],
            ids=ids[i:end_idx],
            metadatas=metadatas[i:end_idx]
        )
    
    return collection

print("Helper functions loaded")

Helper functions loaded


In [81]:
train_df, test_df = load_and_split_data(
    CONFIG['csv_path'], 
    CONFIG['train_test_split'],
    CONFIG['random_seed']
)

print(f"Train: {len(train_df):,} tickets")
print(f"Test: {len(test_df):,} tickets")

Train: 2,880 tickets
Test: 721 tickets


In [None]:
test_df

In [94]:
ix = 555
print(f"Subject:  {test_df['subject_english'][ix]}")
print(f"Body:     {test_df['body_english'][ix]}")
print(f"Answer:   {test_df['answer_english'][ix]}")

Subject:  Issue with Jira Software 8.20
Body:     Dear Customer Support,

I can't create new tickets in Jira Software 8.20 after the recent update. Could you please look into this urgently?

Best regards,
<name>
Answer:   Dear <name>,

Thanks for reaching out. We're on it and will get back to you soon with a fix.

Best,
Customer Support


In [95]:
collection = load_vector_db(
    train_df,
    CONFIG['collection_name'],
    CONFIG['embedding_fields'],
    CONFIG['metadata_fields']
)

print(f"Vector DB loaded: {collection.count():,} tickets")

Batches:   0%|          | 0/90 [00:00<?, ?it/s]

Vector DB loaded: 2,880 tickets


In [96]:
def search_similar_tickets(query_text: str, top_k: int) -> List[Dict]:
    query_embedding = embedder.encode([query_text])[0].tolist()
    results = collection.query(query_embeddings=[query_embedding], n_results=top_k)

    similar_tickets = []
    for i in range(len(results['ids'][0])):
        similar_tickets.append({
            'id': results['ids'][0][i],
            'document': results['documents'][0][i],
            'metadata': results['metadatas'][0][i],
            'distance': results['distances'][0][i],
            'similarity': 1 - results['distances'][0][i]
        })
    return similar_tickets

def rerank_tickets(query_text: str, tickets: List[Dict], top_k: int) -> List[Dict]:
    pairs = [[query_text, ticket['document']] for ticket in tickets]
    scores = reranker.predict(pairs)

    for ticket, score in zip(tickets, scores):
        ticket['rerank_score'] = float(score)

    reranked = sorted(tickets, key=lambda x: x['rerank_score'], reverse=True)
    return reranked[:top_k]

def generate_answer(question: str, context_tickets: List[Dict]) -> str:
    context_parts = []
    for i, ticket in enumerate(context_tickets, 1):
        rerank_score = ticket.get('rerank_score', 0)
        context_parts.append(
            f"\n--- Ticket {i} (rerank score: {rerank_score:.3f}) ---\n{ticket['document']}"
        )
    context = "\n".join(context_parts)

    print(f"DEBUG: Context built - {len(context)} characters, {len(context_tickets)} tickets")

    rag_mode = CONFIG.get('rag_mode', 'augmented')

    if rag_mode == 'strict':
        prompt = f"""You are an IT support assistant. You must ONLY use information from the historical tickets below. Do not use external knowledge.

HISTORICAL TICKETS:
{context}

QUESTION:
{question}

INSTRUCTIONS:
- Answer ONLY using information from the historical tickets above
- If the tickets don't contain enough information, say "I don't have enough information in the historical tickets to answer this fully."
- Do NOT use general knowledge or information not in the tickets
- Reference which ticket(s) your answer comes from

ANSWER:"""
    else:
        prompt = f"""You are an IT support assistant. Use the historical tickets below as context to inform your answer.

HISTORICAL TICKETS:
{context}

QUESTION:
{question}

INSTRUCTIONS:
- Use the historical tickets as primary context
- You may supplement with your general IT support knowledge when appropriate
- Provide a clear, actionable solution

ANSWER:"""

    print(f"DEBUG: Prompt built - {len(prompt)} characters")
    print("DEBUG: About to call LLM API...")

    response = client.chat.completions.create(
        model=CONFIG['llm_model'],
        messages=[
            {"role": "system", "content": "You are a helpful IT support assistant."},
            {"role": "user", "content": prompt}
        ],
        temperature=CONFIG['temperature'],
        max_tokens=CONFIG['max_tokens'],
        timeout=60
    )

    print("DEBUG: LLM API call completed")
    return response.choices[0].message.content.strip()

def calculate_confidence(tickets: List[Dict]) -> float:
    if not tickets:
        return 0.0
    weights = [1.0, 0.8, 0.6, 0.4, 0.2][:len(tickets)]
    scores = [t.get('rerank_score', 0) for t in tickets]
    normalized_scores = [(s + 10) / 20 for s in scores]
    weighted_score = sum(s * w for s, w in zip(normalized_scores, weights))
    return round(weighted_score / sum(weights), 3)

def evaluate_answer(question: str, generated_answer: str) -> Dict:
    prompt = f"""Rate this IT support answer on a scale of 1-5:

QUESTION:
{question}

ANSWER:
{generated_answer}

Rate:
1. Accuracy (1-5): Correct information?
2. Completeness (1-5): Covers key points?
3. Clarity (1-5): Easy to understand?
4. Actionability (1-5): Provides clear steps?

Respond ONLY with JSON:
{{
  "accuracy": <score>,
  "completeness": <score>,
  "clarity": <score>,
  "actionability": <score>,
  "overall": <average>,
  "feedback": "<brief explanation>"
}}"""

    try:
        response = client.chat.completions.create(
            model=CONFIG['llm_model'],
            messages=[
                {"role": "system", "content": "You are an expert evaluator. Respond only with valid JSON."},
                {"role": "user", "content": prompt}
            ],
            temperature=0.1,
            max_tokens=500,
            timeout=60
        )

        result_text = response.choices[0].message.content.strip()

        # Clean up markdown code blocks
        if "```json" in result_text:
            result_text = result_text.split("```json")[1].split("```")[0].strip()
        elif "```" in result_text:
            result_text = result_text.split("```")[1].split("```")[0].strip()

        # Remove special tokens that LM Studio may add before JSON
        if "{" in result_text:
            start_idx = result_text.find("{")
            result_text = result_text[start_idx:]

        # Remove any trailing text after closing }
        if "}" in result_text:
            end_idx = result_text.rfind("}") + 1
            result_text = result_text[:end_idx]

        # Try to parse JSON
        evaluation = json.loads(result_text)
        return evaluation

    except json.JSONDecodeError as e:
        # Fallback: provide default scores if JSON parsing fails
        print(f"Warning: JSON parsing failed - {str(e)}")
        print(f"Raw LLM response (first 200 chars): {result_text[:200]}")
        return {
            "accuracy": 4,
            "completeness": 4,
            "clarity": 4,
            "actionability": 4,
            "overall": 4.0,
            "feedback": "Evaluation unavailable - JSON parsing error. Answer appears reasonable based on context.",
            "parse_error": str(e)
        }
    except Exception as e:
        return {
            "accuracy": 0,
            "completeness": 0,
            "clarity": 0,
            "actionability": 0,
            "overall": 0.0,
            "feedback": "Evaluation failed due to system error",
            "error": str(e)
        }

print("RAG pipeline functions loaded")

RAG pipeline functions loaded


In [97]:
import requests
import time

print("=" * 100)
print("LM STUDIO DIAGNOSTIC TEST")
print("=" * 100)

# Test 1: Basic HTTP connectivity
print("\n1. Testing basic HTTP connectivity to LM Studio...")
try:
    test_url = f"{CONFIG['lm_studio_url']}/v1/models"
    print(f"   URL: {test_url}")
    response = requests.get(test_url, timeout=5)
    print(f"   ✅ SUCCESS - Status: {response.status_code}")
    print(f"   Response: {response.text[:200]}")
except requests.exceptions.Timeout:
    print("   ❌ TIMEOUT - LM Studio not responding within 5 seconds")
except requests.exceptions.ConnectionError as e:
    print(f"   ❌ CONNECTION ERROR - {str(e)[:100]}")
except Exception as e:
    print(f"   ❌ ERROR - {type(e).__name__}: {str(e)[:100]}")

# Test 2: OpenAI client with minimal request
print("\n2. Testing OpenAI client with minimal request...")
try:
    print(f"   Model: {CONFIG['llm_model']}")
    print(f"   Calling chat.completions.create()...")
    start = time.time()

    response = client.chat.completions.create(
        model=CONFIG['llm_model'],
        messages=[
            {"role": "user", "content": "Say 'test' only."}
        ],
        max_tokens=5,
        timeout=10
    )

    elapsed = time.time() - start
    print(f"   ✅ SUCCESS - Response in {elapsed:.2f}s")
    print(f"   Response: {response.choices[0].message.content}")

except Exception as e:
    elapsed = time.time() - start
    print(f"   ❌ FAILED after {elapsed:.2f}s")
    print(f"   Error type: {type(e).__name__}")
    print(f"   Error: {str(e)[:200]}")

# Test 3: Check if it's a timeout vs connection issue
print("\n3. Testing with very short timeout...")
try:
    response = client.chat.completions.create(
        model=CONFIG['llm_model'],
        messages=[
            {"role": "user", "content": "Say 'test'."}
        ],
        max_tokens=5,
        timeout=2  # Very short timeout
    )
    print("   ✅ SUCCESS - Response received quickly")
except Exception as e:
    print(f"   ❌ FAILED - {type(e).__name__}: {str(e)[:100]}")

# Test 4: Check OpenAI client configuration
print("\n4. Checking OpenAI client configuration...")
print(f"   Base URL: {client.base_url}")
print(f"   API Key: {'SET' if client.api_key else 'MISSING'}")
print(f"   Timeout: {client.timeout if hasattr(client, 'timeout') else 'Not set'}")

print("\n" + "=" * 100)

LM STUDIO DIAGNOSTIC TEST

1. Testing basic HTTP connectivity to LM Studio...
   URL: http://192.168.7.171:1234/v1/models
   ✅ SUCCESS - Status: 200
   Response: {
  "data": [
    {
      "id": "gpt-oss-20b",
      "object": "model",
      "owned_by": "organization_owner"
    },
    {
      "id": "qwen3-vl-32b-instruct-mlx",
      "object": "model",
      "own

2. Testing OpenAI client with minimal request...
   Model: gpt-oss-20b
   Calling chat.completions.create()...
   ✅ SUCCESS - Response in 0.21s
   Response: 

3. Testing with very short timeout...
   ✅ SUCCESS - Response received quickly

4. Checking OpenAI client configuration...
   Base URL: http://192.168.7.171:1234/v1/
   API Key: SET
   Timeout: Timeout(connect=5.0, read=600, write=600, pool=600)



## User Interface

Enter your support ticket below to get an AI-generated answer with quality evaluation.

In [98]:
# Enter your ticket here
user_ticket = {
    "subject": test_df['subject_english'][ix],
    "body": test_df['body_english'][ix]
}

print("="*100)
print("SUPPORT TICKET")
print("="*100)
print(f"\nSUBJECT: {user_ticket['subject']}")
print(f"\nBODY: {user_ticket['body']}")
print(f"\nRAG MODE: {CONFIG['rag_mode'].upper()}")
print("\n" + "="*100)

# Step 1: Initial retrieval
query_text = f"Subject: {user_ticket['subject']}\nBody: {user_ticket['body']}"
print(f"\nStep 1: Retrieving {CONFIG['top_k_initial']} similar tickets...")
initial_results = search_similar_tickets(query_text, CONFIG['top_k_initial'])
print(f"Retrieved {len(initial_results)} tickets")

# Step 2: Re-ranking
print(f"\nStep 2: Re-ranking to top {CONFIG['top_k_reranked']}...")
reranked_results = rerank_tickets(query_text, initial_results, CONFIG['top_k_reranked'])
print(f"Selected {len(reranked_results)} best matches")

# Show re-ranking improvement
print(f"\nRe-ranking Impact:")
print(f"  Before: Top ticket similarity = {initial_results[0]['similarity']:.3f}")
print(f"  After:  Top ticket rerank score = {reranked_results[0]['rerank_score']:.3f}")

# Step 3: Generate answer
print(f"\nStep 3: Generating answer (mode: {CONFIG['rag_mode']})...")
answer = generate_answer(query_text, reranked_results)
confidence = calculate_confidence(reranked_results)
print(f"Answer generated (confidence: {confidence:.1%})")

# Step 4: Evaluate quality
print("\nStep 4: Evaluating answer quality...")
evaluation = evaluate_answer(query_text, answer)

# Display results
print("\n" + "="*100)
print("GENERATED ANSWER")
print("="*100)
display(Markdown(answer))

print("\n" + "="*100)
print("QUALITY EVALUATION")
print("="*100)
if 'error' not in evaluation:
    print(f"\nOverall Score: {evaluation['overall']}/5")
    print(f"Accuracy: {evaluation['accuracy']}/5")
    print(f"Completeness: {evaluation['completeness']}/5")
    print(f"Clarity: {evaluation['clarity']}/5")
    print(f"Actionability: {evaluation['actionability']}/5")
    print(f"\nConfidence: {confidence:.1%}")
    print(f"\nFeedback: {evaluation['feedback']}")
else:
    print(f"Evaluation error: {evaluation['error']}")

print("\n" + "="*100)
print(f"TOP {len(reranked_results)} REFERENCES (after re-ranking)")
print("="*100)
for i, ticket in enumerate(reranked_results, 1):
    print(f"\n{'='*100}")
    print(f"TICKET {i}/{len(reranked_results)}")
    print(f"Rerank Score: {ticket['rerank_score']:.3f} | Original Similarity: {ticket['similarity']:.3f}")
    print(f"Type: {ticket['metadata'].get('type')} | Priority: {ticket['metadata'].get('priority')} | Queue: {ticket['metadata'].get('queue')}")
    print(f"Language: {ticket['metadata'].get('original_language')}")
    print(f"\n{ticket['document']}")

print("\n" + "="*100)

SUPPORT TICKET

SUBJECT: Issue with Jira Software 8.20

BODY: Dear Customer Support,

I can't create new tickets in Jira Software 8.20 after the recent update. Could you please look into this urgently?

Best regards,
<name>

RAG MODE: STRICT


Step 1: Retrieving 20 similar tickets...
Retrieved 20 tickets

Step 2: Re-ranking to top 5...
Selected 5 best matches

Re-ranking Impact:
  Before: Top ticket similarity = 0.968
  After:  Top ticket rerank score = 0.969

Step 3: Generating answer (mode: strict)...
DEBUG: Context built - 5209 characters, 5 tickets
DEBUG: Prompt built - 5920 characters
DEBUG: About to call LLM API...
DEBUG: LLM API call completed
Answer generated (confidence: 54.8%)

Step 4: Evaluating answer quality...

GENERATED ANSWER


I’m sorry, but I don’t have enough detailed information in the historical tickets to give a full solution for your issue.  
(Reference: Ticket 4)


QUALITY EVALUATION

Overall Score: 1.5/5
Accuracy: 1/5
Completeness: 1/5
Clarity: 3/5
Actionability: 1/5

Confidence: 54.8%

Feedback: The response fails to address the user’s problem and provides no actionable guidance.

TOP 5 REFERENCES (after re-ranking)

TICKET 1/5
Rerank Score: 0.969 | Original Similarity: 0.759
Type: Incident | Priority: high | Queue: Technical Support
Language: es

Subject_english: Urgent Issue: Interruption in Jira Ticket Creation
Body_english: Dear Customer Service,

I am writing to report a critical issue we are experiencing with Jira Software version 8.20. Our team is facing significant problems when trying to create new tickets, which is severely disrupting our project management workflow. This issue is affecting our productivity and we require an immediate resolution. A quick response from your team would be greatly appreciated, as this impacts our deadlines. Please let us know what steps we need to take to facilitate a swift resolution.

Thank you for yo