In [83]:
import numpy as np
import pandas as pd
import re
from datetime import datetime
from typing import List, Dict, Any
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [94]:
# Read the local file
VeociEntries = pd.read_json('Get_Veoci_Form_Entries.json')

> **Note:** This file simulates Veoci API output and will later be replaced by an HTTP payload.

In [95]:
incoming_entry_raw = [
  {
    "headers": {
      "x-forwarded-for": "3.208.214.5",
      "x-forwarded-proto": "https",
      "x-forwarded-port": "443",
      "host": "n8n.stg.veoci.com",
      "x-amzn-trace-id": "Root=1-6933355c-32df78546884c9ae2876637f",
      "content-length": "1692",
      "content-type": "application/json; charset=UTF-8",
      "user-agent": "Apache-HttpClient/4.5.13 (Java/21.0.9)",
      "accept-encoding": "gzip,deflate"
    },
    "params": {},
    "query": {},
    "body": {
      "5": "Fri Dec 05 2025 14:41:07 GMT-0500 (Eastern Standard Time): User logged out",
      "6": "/",
      "8": "Stacktrace ionic://localhost/js/app.js:1 - /******/ (function() { // webpackBootstrap src-cordova/www/js/webpack:/node_modules/chart.js/dist/chart.mjs:2726 - if ((reverse && position !== 'right') || (!reverse && position === 'right')) { src-cordova/www/js/webpack:/node_modules/chart.js/dist/chart.mjs:2726 - if ((reverse && position !== 'right') || (!reverse && position === 'right')) { src-cordova/www/js/webpack:/node_modules/chart.js/dist/chart.mjs:2758 - titleX = offsetFromEdge(scale, position, offset); src-cordova/www/js/webpack:/node_modules/chart.js/dist/chart.mjs:2758 - titleX = offsetFromEdge(scale, position, offset); src-cordova/www/js/webpack:/node_modules/chart.js/dist/chart.mjs:2758 - titleX = offsetFromEdge(scale, position, offset);",
      "16": "Mobile Safari UI/WKWebView - 18.6.2",
      "18": "6.0.545322-ios - PROD-MOBILE",
      "21": "5f1875273e1741000a1a4a25",
      "24": [],
      "25": "PROD-MOBILE",
      "26": "veoci-mobile-client",
      "27": "Error",
      "34": "Error ID: 693335535d860bbd20b41487 Error:Fri Dec 05 2025 14:41:07 GMT-0500 (Eastern Standard Time): User logged out Error occurred at: ionic://localhost/js/app.js:1 - /******/ (function() { // webpackBootstrap View error in Bugsnag",
      "id": "1580841464",
      "formId": "35430484",
      "name": "2025-Dec-05 14:41: Bugsnag ( open) -  PROD-MOBILE:Fri Dec 05 2025 14:41:07 GMT-0500 (Eastern Standard Time): User logged out",
      "lastModified": "2025-12-05T19:41:11Z",
      "created": "2025-12-05T19:41:11Z",
      "containerName": "Veoci Ticketing",
      "containerId": "67813"
    },
    "webhookUrl": "https://flows.stg.veoci.com/webhook/bugsnag-triage",
    "executionMode": "production"
  }
]

In [75]:
# pd.set_option('display.max_columns', None)
# pd.set_option('display.max_colwidth', None)

# VeociEntries

In [84]:
# --- 1. HELPER FUNCTIONS ---

def is_vendor_file(file_path: str) -> bool:
    if not file_path: return False
    vendor_patterns = [
        r"^node_modules/", r"^@vue/", r"vuetify/", r"core-js", r"zone\.js",
        r"runtime-core\.esm-bundler\.js", r"reactivity\.esm-bundler\.js",
        r"proxiedModel\.mjs", r"app\..*\.js", r"vendor\..*\.js", r"chunk-.*\.js",
        r"LogbackBugsnagAppender\.java", r"AppenderBase\.java",
        r"AppenderAttachableImpl\.java", r"Logger\.java", r"ch\.qos\.logback\.",
        r"org\.springframework\.", r"org\.apache\.commons\.", r"java\.util\.",
        r"javax?\.", r"sun\.reflect\."
    ]
    return any(re.search(p, file_path) for p in vendor_patterns)

def parse_stack_frames(input_str: Any) -> List[Dict[str, Any]]:
    if not input_str or not isinstance(input_str, str): return []
    frames = []
    seen = set()

    # HTML format: <strong>file:line</strong> - code
    html_matches = re.findall(r'<strong>(.*?)</strong>', input_str)
    for raw in html_matches:
        raw = raw.strip()
        if raw in seen: continue
        seen.add(raw)
        
        # Handle "file:line - code"
        parts = raw.split(' - ', 1)
        file_line = parts[0]
        
        last_colon = file_line.rfind(':')
        if last_colon == -1: continue
        
        file_path = file_line[:last_colon]
        line_str = file_line[last_colon+1:]
        file_name = file_path.split('/')[-1]
        
        frames.append({
            'file': file_name,
            'line': int(line_str) if line_str.isdigit() else None,
            'vendor': is_vendor_file(file_path),
            'full_path': file_path
        })

    # Fallback: Plain text format
    if not frames:
        plain_matches = re.findall(r'([\w@:\/\.\-]+?\.(?:vue|js|ts|mjs|jsx|tsx|java)):(\d+)', input_str)
        for full_path, line_num in plain_matches:
            raw = f"{full_path}:{line_num}"
            if raw in seen: continue
            seen.add(raw)
            
            file_name = full_path.split('/')[-1]
            frames.append({
                'file': file_name,
                'line': int(line_num),
                'vendor': is_vendor_file(full_path),
                'full_path': full_path
            })
            
    return frames

In [85]:
# --- 2. NORMALIZATION ---

def normalize_veoci_entry(entry: Dict[str, Any]) -> Dict[str, Any]:
    """Extracts fields from Veoci API format (nested in 'values') into Canonical Schema"""
    values = entry.get('values', {})
    
    def get_val(key):
        field = values.get(key, {})
        if not field: return None
        val = field.get('data', {}).get('value')
        if isinstance(val, list): return ", ".join(val) # Handle multi-value
        return val

    return {
        'entry_id': str(entry.get('id')),
        'project': get_val('21'),          # Project ID
        'release_stage': get_val('25'),    # Release Stage
        'app_version': get_val('18'),      # App Version
        'timestamp': entry.get('lastModified'), # Using lastModified as timestamp
        'error_message': get_val('5'),     # Error message
        'stack_frames': parse_stack_frames(get_val('8')), # Stack trace
        'name': entry.get('name')          # Keep name for reference
    }

def normalize_incoming_entry(entry_wrapper: Any) -> Dict[str, Any]:
    """Extracts fields from the Incoming_entry structure into Canonical Schema"""
    # Incoming_entry is a list containing a dict with 'body'
    if isinstance(entry_wrapper, list):
        entry = entry_wrapper[0].get('body', {})
    else:
        entry = entry_wrapper.get('body', {})

    return {
        'entry_id': str(entry.get('id')),
        'project': entry.get('21'),
        'release_stage': entry.get('25'),
        'app_version': entry.get('18'),
        'timestamp': entry.get('lastModified'),
        'error_message': entry.get('5'),
        'stack_frames': parse_stack_frames(entry.get('8')),
        'name': entry.get('name')
    }

In [96]:
# --- 3. APPLY NORMALIZATION ---

# Normalize Incoming Entry
incoming_entry = normalize_incoming_entry(incoming_entry_raw)
print(f"Normalized Incoming Entry: {incoming_entry['entry_id']}")

# Normalize Candidate Entries
candidate_entries = []
if "json" in VeociEntries.columns and not VeociEntries.empty:
    raw_entries = VeociEntries["json"][0]["entries"]
    candidate_entries = [normalize_veoci_entry(e) for e in raw_entries]

print(f"Normalized {len(candidate_entries)} Candidate Entries")

Normalized Incoming Entry: 1580841464
Normalized 213 Candidate Entries


### Canonical ErrorEntry schema

```json
{
  "entry_id": "str",
  "project": "str",
  "release_stage": "str",
  "app_version": "str",
  "timestamp": "str (ISO8601) or int",
  "error_message": "str",
  "stack_frames": [
    {"file": "str", "line": "int", "vendor": "bool"}
  ]
}
```

In [87]:
# --- 4. HARD GATES ---

def passes_hard_gates(incoming: Dict[str, Any], candidate: Dict[str, Any]) -> tuple[bool, List[str]]:
    """
    Returns (passed, reasons)
    passed: True if the candidate should be considered for scoring.
    reasons: List of reasons for failure (empty if passed).
    """
    reasons = []

    # 1. Self-comparison check
    if incoming['entry_id'] == candidate['entry_id']:
        reasons.append("Self-comparison")

    # 2. Project Mismatch (Critical)
    if incoming['project'] and candidate['project']:
        if incoming['project'] != candidate['project']:
            reasons.append(f"Project mismatch: {incoming['project']} != {candidate['project']}")
            
    # 3. Release Stage Mismatch (Optional)
    # if incoming['release_stage'] and candidate['release_stage']:
    #     if incoming['release_stage'] != candidate['release_stage']:
    #         reasons.append(f"Release stage mismatch: {incoming['release_stage']} != {candidate['release_stage']}")

    return len(reasons) == 0, reasons

In [106]:
# --- 5. SCORING LOGIC ---

def calculate_stack_score(incoming_frames: List[Dict], candidate_frames: List[Dict]) -> Dict[str, Any]:
    """Computes stack trace similarity score (0-40) and reasons."""
    score = 0
    reasons = []
    
    # Helper to select top N non-vendor frames
    def select_frames(frames):
        non_vendor = [f for f in frames if not f['vendor']]
        vendor = [f for f in frames if f['vendor']]
        chosen = non_vendor if non_vendor else vendor
        unique = []
        seen = set()
        for f in chosen:
            if f['file'] not in seen:
                seen.add(f['file'])
                unique.append(f)
            if len(unique) >= (3 if non_vendor else 2): break
        return unique

    inc_frames = select_frames(incoming_frames)
    cand_frames = select_frames(candidate_frames)
    
    matched_files = set()
    
    if inc_frames and cand_frames:
        # Top frame match (Critical)
        if inc_frames[0]['file'] == cand_frames[0]['file']:
            score += 25
            reasons.append(f"Top frame match: {inc_frames[0]['file']}")
            matched_files.add(inc_frames[0]['file'])
            
        # Secondary frame match
        if len(inc_frames) > 1 and len(cand_frames) > 1:
            if inc_frames[1]['file'] == cand_frames[1]['file']:
                score += 10
                reasons.append(f"Secondary frame match: {inc_frames[1]['file']}")
                matched_files.add(inc_frames[1]['file'])
                
        # Overlap
        for f in inc_frames:
            for cf in cand_frames:
                if f['file'] == cf['file'] and f['file'] not in matched_files:
                    score += 5
                    reasons.append(f"Frame overlap: {f['file']}")
                    matched_files.add(f['file'])

    return {"score": min(score, 40), "reasons": reasons}

def calculate_time_score(incoming_ts: str, candidate_ts: str) -> Dict[str, Any]:
    """Computes time decay score (-15 to +5) and reasons."""
    score = 0
    reasons = []
    
    if incoming_ts and candidate_ts:
        try:
            dt_inc = pd.to_datetime(incoming_ts)
            dt_cand = pd.to_datetime(candidate_ts)
            days_diff = abs((dt_inc - dt_cand).days)
            
            if days_diff <= 7:
                score += 5
                reasons.append("Recent (<= 7 days)")
            
            if days_diff > 180: score -= 15
            elif days_diff > 90: score -= 8
        except:
            pass
            
    return {"score": score, "reasons": reasons}

def calculate_context_score(incoming: Dict, candidate: Dict) -> Dict[str, Any]:
    """Computes metadata context score (0-30) and reasons."""
    score = 0
    reasons = []
    
    if incoming['project'] and candidate['project'] and incoming['project'] == candidate['project']:
        score += 15
        reasons.append("Same Project")
        
    if incoming['app_version'] and candidate['app_version'] and incoming['app_version'] == candidate['app_version']:
        score += 10
        reasons.append("Same App Version")
        
    if incoming['release_stage'] and candidate['release_stage'] and incoming['release_stage'] == candidate['release_stage']:
        score += 5
        reasons.append("Same Release Stage")
        
    return {"score": score, "reasons": reasons}

def calculate_total_score(
    incoming: Dict[str, Any], 
    candidate: Dict[str, Any], 
    message_similarity: float = 0.0
) -> Dict[str, Any]:
    """
    Pure function to calculate the total similarity score between an incoming error and a candidate.
    Aggregates Stack, Message, Context, and Time scores.
    """
    # 1. Stack Score (0-40)
    stack_res = calculate_stack_score(incoming['stack_frames'], candidate['stack_frames'])
    stack_score = stack_res['score']
    
    # 2. Message Score (Raw 0-1 input -> Scaled 0-30)
    # We store the RAW score for diagnostics, but use SCALED for total
    message_score_scaled = message_similarity * 30
    
    # 3. Context Score (0-30)
    context_res = calculate_context_score(incoming, candidate)
    context_score = context_res['score']
    
    # 4. Time Score (-15 to +5)
    time_res = calculate_time_score(incoming['timestamp'], candidate['timestamp'])
    time_score = time_res['score']
    
    # Aggregate
    total_score = (
        stack_score + 
        message_score_scaled + 
        context_score + 
        time_score
    )
    
    # Cap at 100, Floor at 0
    final_score = max(0, min(total_score, 100))
    
    all_reasons = (
        stack_res['reasons'] + 
        [f"Message similarity: {message_similarity:.2f}"] + 
        context_res['reasons'] + 
        time_res['reasons']
    )
    
    return {
        "entry_id": candidate['entry_id'],
        "name": candidate['name'],
        "final_score": final_score,
        "scores": {
            "stack": stack_score,
            "message": message_similarity, # Raw 0-1
            "context": context_score,
            "time": time_score
        },
        "reasons": all_reasons
    }

In [97]:
# --- 6. BUILD TF-IDF CORPUS ---

# Build TF-IDF corpus (incoming + gated candidates)
corpus = [incoming_entry["error_message"] or ""]

eligible_candidates = []
candidate_index_map = []  # maps TF-IDF index -> candidate index

for idx, candidate in enumerate(candidate_entries):
    passed, _ = passes_hard_gates(incoming_entry, candidate)
    if passed:
        corpus.append(candidate["error_message"] or "")
        eligible_candidates.append(candidate)
        candidate_index_map.append(idx)

print(f"TF-IDF Corpus Size: {len(corpus)} (1 Incoming + {len(eligible_candidates)} Candidates)")

TF-IDF Corpus Size: 213 (1 Incoming + 212 Candidates)


In [98]:
# --- 7. FIT TF-IDF ---

if len(corpus) > 1:
    vectorizer = TfidfVectorizer(
        ngram_range=(1, 2),
        stop_words="english",
        min_df=1,
        max_df=0.95
    )

    tfidf_matrix = vectorizer.fit_transform(corpus)
    print("TF-IDF Matrix Shape:", tfidf_matrix.shape)
else:
    print("Not enough data for TF-IDF.")
    tfidf_matrix = None

TF-IDF Matrix Shape: (213, 1109)


In [102]:
# --- 8. COMPUTE COSINE SIMILARITIES ---

if tfidf_matrix is not None:
    incoming_vector = tfidf_matrix[0]
    candidate_vectors = tfidf_matrix[1:]

    message_similarities = cosine_similarity(
        incoming_vector,
        candidate_vectors
    )[0]
    print(f"Computed {len(message_similarities)} similarity scores.")
else:
    print("Skipping TF-IDF computation (no eligible candidates or corpus too small).")
    message_similarities = []

Computed 212 similarity scores.


### Context score
Context score captures metadata alignment (e.g., app version, release stage).
- **Range**: 0–30
- **Weight**: Low (supporting signal only)
- **Purpose**: Boosts candidates that match the environment of the incoming error, even if the stack/message match is imperfect.

In [107]:
# --- 9. EXECUTION PIPELINE (INTEGRATED) ---

print("Starting Integrated Similarity Pipeline...")

results = []

for i, candidate in enumerate(eligible_candidates):
    # Get pre-computed message similarity
    raw_msg_sim = message_similarities[i] if i < len(message_similarities) else 0.0
    
    # Calculate Total Score using the pure function
    result = calculate_total_score(incoming_entry, candidate, raw_msg_sim)
    
    if result['final_score'] > 0:
        results.append(result)

# Sort and Display
results.sort(key=lambda x: x['final_score'], reverse=True)
print(f"Found {len(results)} matches.")

Starting Integrated Similarity Pipeline...
Found 212 matches.


In [108]:
# --- 10. SANITY CHECK ---

if results:
    display_df = pd.DataFrame(results)
    
    # Flatten scores for display
    display_df['stack_score'] = display_df['scores'].apply(lambda x: x['stack'])
    display_df['message_raw'] = display_df['scores'].apply(lambda x: x['message']) # Raw 0-1
    display_df['context_score'] = display_df['scores'].apply(lambda x: x['context'])
    display_df['time_score'] = display_df['scores'].apply(lambda x: x['time'])
    
    cols = ['entry_id', 'final_score', 'stack_score', 'message_raw', 'context_score', 'time_score', 'name']
    display(display_df[cols].head(10))
else:
    print("No matches found.")

Unnamed: 0,entry_id,final_score,stack_score,message_raw,context_score,time_score,name
0,1577493686,80.549355,35,0.518312,30,0,2025-Dec-01 21:36: Bugsnag ( open) - PROD-MOBILE:Tue Dec 02 2025 14:42:36 GMT-0500 (Eastern Standard Time): User logged out
1,1573468818,72.746747,35,0.258225,30,0,2025-Nov-26 10:22: Bugsnag ( open) - PROD-MOBILE:Fri Nov 28 2025 02:56:17 GMT-0800 (Pacific Standard Time): User logged out
2,1574883386,66.411104,35,0.047037,30,0,"2025-Nov-28 20:41: Bugsnag ( ignored) - PROD-MOBILE:Fri Nov 28 2025 07:11:44 GMT-0800 (Pacific Standard Time): Ref Entry Fetch fail LOOKUP 6481158 fetchRefEntries , {""message"":""Network Error"",""name"":""AxiosError"",""stack"":""@ionic://localhost/js/vendor.js:…"
3,1578456898,65.934662,35,0.031155,30,0,"2025-Dec-03 11:15: Bugsnag ( ignored) - PROD-MOBILE:Wed Dec 03 2025 16:45:01 GMT+1100 (Australian Eastern Daylight Time): Ref Entry Fetch fail LOOKUP 1419471 fetchRefEntries , {""message"":""Network Error"",""name"":""AxiosError"",""stack"":""@ionic://localhost/js…"
4,1577908710,65.0,35,0.0,30,0,2025-Dec-02 20:41: Bugsnag ( ignored) - PROD-MOBILE:Location could not be accessed
5,1574414416,65.0,35,0.0,30,0,2025-Nov-28 04:43: Bugsnag ( open) - PROD-MOBILE:null is not an object (evaluating 'this.$refs.dialog.hide')
6,1530220994,61.269356,35,0.208979,20,0,2025-Oct-09 06:28: Bugsnag ( open) - PROD-MOBILE:Tue Oct 14 2025 12:56:08 GMT-0700 (Pacific Daylight Time): User logged out
7,1574820389,55.0,25,0.0,30,0,2025-Nov-28 07:55: Bugsnag ( open) - PROD-MOBILE:App didn't start in 60 seconds
8,1575214338,55.0,25,0.0,30,0,2025-Nov-29 08:20: Bugsnag ( open) - PROD-MOBILE:Loading chunk 40996 failed.\n(timeout: ionic://localhost/js/chunk-common.js)
9,1537105308,55.0,35,0.0,20,0,2025-Oct-16 13:59: Bugsnag ( fixed) - PROD-MOBILE:undefined is not an object (evaluating 't.address')


In [110]:
# --- 11. SIGNAL DISAGREEMENT ANALYSIS (FALSE POSITIVE REVIEW) ---

if results:
    analysis_df = pd.DataFrame(results)
    
    # Normalize scores to 0-1 for comparison
    # Stack max = 40, Message max = 1 (raw)
    analysis_df['stack_norm'] = analysis_df['scores'].apply(lambda x: x['stack']) / 40.0
    analysis_df['message_norm'] = analysis_df['scores'].apply(lambda x: x['message']) 
    
    # Calculate Disagreement (Message - Stack)
    # Positive: Message is stronger signal
    # Negative: Stack is stronger signal
    analysis_df['signal_diff'] = analysis_df['message_norm'] - analysis_df['stack_norm']
    
    # Prepare display columns
    analysis_df['stack_score'] = analysis_df['scores'].apply(lambda x: x['stack'])
    analysis_df['message_raw'] = analysis_df['scores'].apply(lambda x: x['message'])
    cols = ['entry_id', 'final_score', 'stack_score', 'message_raw', 'signal_diff', 'name']
    
    print("--- High Message / Low Stack (Potential Text-Driven Matches) ---")
    print("Look for: Different errors that happen to have similar words.")
    high_msg = analysis_df[analysis_df['signal_diff'] > 0.3].sort_values('signal_diff', ascending=False)
    if not high_msg.empty:
        display(high_msg[cols].head(5))
    else:
        print("No significant High Message / Low Stack cases found.")
        
    print("\n--- High Stack / Low Message (Potential Generic Stack Matches) ---")
    print("Look for: Same error code path, but different error message (e.g. dynamic error strings).")
    high_stack = analysis_df[analysis_df['signal_diff'] < -0.3].sort_values('signal_diff', ascending=True)
    if not high_stack.empty:
        display(high_stack[cols])
    else:
        print("No significant High Stack / Low Message cases found.")

--- High Message / Low Stack (Potential Text-Driven Matches) ---
Look for: Different errors that happen to have similar words.
No significant High Message / Low Stack cases found.

--- High Stack / Low Message (Potential Generic Stack Matches) ---
Look for: Same error code path, but different error message (e.g. dynamic error strings).


Unnamed: 0,entry_id,final_score,stack_score,message_raw,signal_diff,name
5,1574414416,65.000000,35,0.000000,-0.875000,2025-Nov-28 04:43: Bugsnag ( open) - PROD-MOBILE:null is not an object (evaluating 'this.$refs.dialog.hide')
4,1577908710,65.000000,35,0.000000,-0.875000,2025-Dec-02 20:41: Bugsnag ( ignored) - PROD-MOBILE:Location could not be accessed
13,1512402078,55.000000,35,0.000000,-0.875000,2025-Sep-18 10:46: Bugsnag ( fixed) - PROD-MOBILE:undefined is not an object (evaluating 't.address')
14,1321167631,55.000000,35,0.000000,-0.875000,2025-Feb-24 07:03: Bugsnag ( fixed) - PROD-MOBILE:Invalid URL: /api/v2/actions/undefined/evaluate/form?parentId=1225710747&c=111424
12,1369970745,55.000000,35,0.000000,-0.875000,2025-Apr-16 15:26: Bugsnag ( fixed) - PROD-MOBILE:undefined is not an object (evaluating 'e.place_name.split')
...,...,...,...,...,...,...
121,1548648440,40.000000,25,0.000000,-0.625000,2025-Oct-30 06:14: Bugsnag ( open) - STAGE-MOBILE:Cannot read properties of undefined (reading 'initialise')
1,1573468818,72.746747,35,0.258225,-0.616775,2025-Nov-26 10:22: Bugsnag ( open) - PROD-MOBILE:Fri Nov 28 2025 02:56:17 GMT-0800 (Pacific Standard Time): User logged out
0,1577493686,80.549355,35,0.518312,-0.356688,2025-Dec-01 21:36: Bugsnag ( open) - PROD-MOBILE:Tue Dec 02 2025 14:42:36 GMT-0500 (Eastern Standard Time): User logged out
39,1561273567,53.183985,25,0.272799,-0.352201,2025-Nov-13 13:54: Bugsnag ( open) - PROD-MOBILE:Thu Nov 13 2025 10:54:41 GMT-0800 (Pacific Standard Time): User logged out


In [114]:
# --- 12. GENERATE TRIAGE REPORT (AGENT-STYLE) ---
import json

def generate_triage_report(results: List[Dict[str, Any]]) -> Dict[str, Any]:
    """
    Generates a report similar to the Bugsnag Triage Agent's output.
    """
    
    report = {
        "batchSummary": {
            "totalAnalyzed": len(eligible_candidates),
            "relatedFound": len(results),
            "confidenceCounts": {"High": 0, "Medium": 0, "Low": 0}
        },
        "relatedEntries": []
    }
    
    for res in results:
        # Map 0-100 score to Confidence
        score = res['final_score']
        if score >= 85: confidence = "High"
        elif score >= 70: confidence = "Medium"
        else: confidence = "Low"
        
        report["batchSummary"]["confidenceCounts"][confidence] += 1
        
        # Generate Explanation from Reasons
        explanation = f"Match found with {confidence} confidence ({score:.1f}/100). "
        explanation += "Key signals: " + "; ".join(res['reasons'][:3]) + "."
        
        entry = {
            "entryId": res['entry_id'],
            "score": score / 100.0, # Normalize to 0-1 for compatibility
            "confidence": confidence,
            "signals": list(res['scores'].keys()),
            "explanation": explanation,
            "breakdown": res['scores']
        }
        report["relatedEntries"].append(entry)
        
    return report

# Generate and Display Report
triage_report = generate_triage_report(results)

print("--- TRIAGE REPORT (AGENT STYLE) ---")
# Print only the first 2 entries to avoid token limits
short_report = triage_report.copy()
short_report['relatedEntries'] = short_report['relatedEntries'][:2]
print(json.dumps(short_report, indent=2))

--- TRIAGE REPORT (AGENT STYLE) ---
{
  "batchSummary": {
    "totalAnalyzed": 212,
    "relatedFound": 212,
    "confidenceCounts": {
      "High": 0,
      "Medium": 2,
      "Low": 210
    }
  },
  "relatedEntries": [
    {
      "entryId": "1577493686",
      "score": 0.8054935456015423,
      "confidence": "Medium",
      "signals": [
        "stack",
        "message",
        "context",
        "time"
      ],
      "explanation": "Match found with Medium confidence (80.5/100). Key signals: Top frame match: app.js; Secondary frame match: chart.mjs; Message similarity: 0.52.",
      "breakdown": {
        "stack": 35,
        "message": 0.5183118186718078,
        "context": 30,
        "time": 0
      }
    },
    {
      "entryId": "1573468818",
      "score": 0.7274674666421115,
      "confidence": "Medium",
      "signals": [
        "stack",
        "message",
        "context",
        "time"
      ],
      "explanation": "Match found with Medium confidence (72.7/100). Key 