In [1]:
# pip install sentence-transformers

In [2]:
# data loading
import pandas as pd
import re

df = pd.read_csv("production_grade_incident_rcas.csv")
df['date_reported'] = pd.to_datetime(df['date_reported']) # for time-based filtering

In [3]:
# load semantic embedding model

from sentence_transformers import SentenceTransformer

# Load a lightweight, efficient semantic model
# model = SentenceTransformer("all-MiniLM-L6-v2")
model = SentenceTransformer("distiluse-base-multilingual-cased-v2")

  from .autonotebook import tqdm as notebook_tqdm
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install h

In [4]:
# generate RCA embeddings

incident_embeddings = model.encode(df['rca_summary'].tolist(), show_progress_bar=True)

Batches: 100%|██████████| 4/4 [00:00<00:00,  8.98it/s]


In [5]:
# Define semantic search function

from sklearn.metrics.pairwise import cosine_similarity

def context_search(query_text, top_n=5):
    query_embedding = model.encode([query_text])
    similarities = cosine_similarity(query_embedding, incident_embeddings).flatten()
    
    df['similarity'] = similarities
    return df.sort_values(by='similarity', ascending=False).head(top_n)

In [6]:
# Add contexual filters


def extract_context_filters(query_text):
    filters = {}

    # Define mapping of units to pandas-compatible keywords
    valid_units = {
        "minute": "minutes",
        "minutes": "minutes",
        "hour": "hours",
        "hours": "hours",
        "day": "days",
        "days": "days",
        "week": "weeks",
        "weeks": "weeks"
    }

    # Match time expressions like "last 2 days", "past 5 hours"
    time_match = re.search(r"(last|past)\s+(\d+)\s+(minute|minutes|hour|hours|day|days|week|weeks)", query_text.lower())
    if time_match:
        qty = int(time_match.group(2))
        unit = valid_units.get(time_match.group(3))
        if unit:
            filters['date_reported'] = pd.Timestamp.now() - pd.Timedelta(**{unit: qty})

    # Optional filters
    if "severity 0" in query_text:
        filters['severity'] = 0
    for team in df['team_owning'].unique():
        if team.lower() in query_text.lower():
            filters['team_owning'] = team

    return filters



In [7]:
# Combine semantic + metadata filtering

def smart_context_query(query_text, top_n=5):
    base = context_search(query_text, top_n=100)  # fetch broader match first
    filters = extract_context_filters(query_text)
    
    for key, value in filters.items():
        if key == 'date_reported':
            base = base[base['date_reported'] >= value]
        else:
            base = base[base[key] == value]
    return base.head(top_n)

In [8]:
# Test query
results = smart_context_query("certificate failures in last week by Team13", top_n=5)
print(results[['incident_id', 'rca_summary', 'similarity', 'date_reported']])

                             incident_id  \
67  89694620-583b-4056-b1f4-52139534408a   
70  e7cb0e38-dcf2-43c3-aa44-6ba0627a4ad8   
44  352d8341-c853-41b9-a1a9-76731a9c421b   
37  af9c33d5-dfa1-480e-aa46-c7c64ad4a163   
89  519a5d73-918a-4454-a306-94891a5129b9   

                                          rca_summary  similarity  \
67  Report generation failed due to unhandled exce...    0.194632   
70  External API latency caused user profile sync ...    0.085721   
44  Kafka consumer in the pricing engine stalled d...    0.083172   
37  External API latency caused user profile sync ...    0.053149   
89  External API latency caused user profile sync ...    0.045424   

         date_reported  
67 2025-05-09 19:06:00  
70 2025-01-22 04:46:00  
44 2025-02-05 12:58:00  
37 2025-04-09 18:02:00  
89 2025-02-19 12:47:00  


In [9]:
# Sample queries
smart_context_query("migration issues in infra")

Unnamed: 0,incident_id,date_reported,app_name,client_name,impacted_clients,reported_by,ttd,ttk,ttm,ttc,rca_summary,rca_tags,team_owning,severity,category,incident_type,similarity
22,2a3fd6de-0823-43a1-aff5-dc1ae6f1e205,2025-04-01 01:59:00,App22,Client3,"Client6, Client2, Client7",client,37,17,85,92,A malformed database migration script caused c...,"DB, Migration, Backup",Team3,0,Infra,Schema Corruption,0.131779
79,b496488c-b861-44c0-96fa-83213294f898,2025-04-06 23:46:00,App28,Client2,"Client9, Client6, Client8",client,3,11,80,26,A malformed database migration script caused c...,"DB, Migration, Backup",Team14,0,Infra,Schema Corruption,0.129596
60,4187c155-ea7a-4e75-b8c8-1f35e3c7b76a,2025-03-31 02:42:00,App10,Client9,"Client4, Client9, Client8, Client2",client,12,14,11,111,A misconfigured firewall rule on the edge gate...,"Firewall, DNS, Edge Gateway",Team13,0,Network,Network Isolation,0.114876
71,1d35e379-f93b-4fa1-a4c9-2cea02004356,2025-03-16 18:26:00,App3,Client7,"Client4, Client5, Client8, Client3",client,24,18,29,55,A misconfigured firewall rule on the edge gate...,"Firewall, DNS, Edge Gateway",Team13,0,Network,Network Isolation,0.110048
3,206e905d-b25d-4134-989e-53e1d7e6e1ac,2025-01-29 19:39:00,App6,Client10,"Client9, Client6, Client2",client,20,4,38,154,A misconfigured firewall rule on the edge gate...,"Firewall, DNS, Edge Gateway",Team5,0,Network,Network Isolation,0.109286


In [10]:
smart_context_query("repeated API timeouts")

Unnamed: 0,incident_id,date_reported,app_name,client_name,impacted_clients,reported_by,ttd,ttk,ttm,ttc,rca_summary,rca_tags,team_owning,severity,category,incident_type,similarity
13,5c0a5f0f-4f9f-48f5-9ae8-fdec19a5164a,2025-04-01 17:12:00,App4,Client1,Client2,client,6,15,28,81,External API latency caused user profile sync ...,"API, Timeout, External",Team8,2,Integration,Upstream Latency,0.395934
2,3008f803-a358-437a-96aa-8996ed2b498c,2025-04-30 08:13:00,App11,Client5,"Client8, Client1",client,25,2,19,49,External API latency caused user profile sync ...,"API, Timeout, External",Team6,2,Integration,Upstream Latency,0.395315
52,dcb71cc3-b7af-4ea6-86a3-7ed7aa464ff6,2025-03-13 07:26:00,App17,Client1,"Client8, Client6, Client2",client,25,19,18,50,External API latency caused user profile sync ...,"API, Timeout, External",Team14,2,Integration,Upstream Latency,0.383025
59,a7c22043-0139-49d7-b0a6-20cfe4e2b552,2025-02-20 09:21:00,App24,Client3,"Client2, Client10, Client4, Client3",client,13,3,43,107,External API latency caused user profile sync ...,"API, Timeout, External",Team15,2,Integration,Upstream Latency,0.382295
80,12a64234-6242-40f2-8b54-db6236cfa083,2025-01-19 20:53:00,App20,Client5,"Client10, Client7",client,33,9,40,56,External API latency caused user profile sync ...,"API, Timeout, External",Team9,2,Integration,Upstream Latency,0.380776


In [11]:
smart_context_query("OAuth failure severity 0")

Unnamed: 0,incident_id,date_reported,app_name,client_name,impacted_clients,reported_by,ttd,ttk,ttm,ttc,rca_summary,rca_tags,team_owning,severity,category,incident_type,similarity
79,b496488c-b861-44c0-96fa-83213294f898,2025-04-06 23:46:00,App28,Client2,"Client9, Client6, Client8",client,3,11,80,26,A malformed database migration script caused c...,"DB, Migration, Backup",Team14,0,Infra,Schema Corruption,0.180392
22,2a3fd6de-0823-43a1-aff5-dc1ae6f1e205,2025-04-01 01:59:00,App22,Client3,"Client6, Client2, Client7",client,37,17,85,92,A malformed database migration script caused c...,"DB, Migration, Backup",Team3,0,Infra,Schema Corruption,0.155977
85,63722fb8-3a8c-4944-9e00-60c0f44189da,2025-02-27 23:35:00,App5,Client4,"Client7, Client4",client,3,3,82,31,A misconfigured firewall rule on the edge gate...,"Firewall, DNS, Edge Gateway",Team6,0,Network,Network Isolation,0.098665
60,4187c155-ea7a-4e75-b8c8-1f35e3c7b76a,2025-03-31 02:42:00,App10,Client9,"Client4, Client9, Client8, Client2",client,12,14,11,111,A misconfigured firewall rule on the edge gate...,"Firewall, DNS, Edge Gateway",Team13,0,Network,Network Isolation,0.096282
90,491f4425-3299-476f-9645-cdc1494737e0,2025-01-10 04:25:00,App16,Client7,"Client7, Client8",client,30,12,76,168,A misconfigured firewall rule on the edge gate...,"Firewall, DNS, Edge Gateway",Team8,0,Network,Network Isolation,0.081924


In [12]:
smart_context_query("schema corruption in last week")

Unnamed: 0,incident_id,date_reported,app_name,client_name,impacted_clients,reported_by,ttd,ttk,ttm,ttc,rca_summary,rca_tags,team_owning,severity,category,incident_type,similarity
79,b496488c-b861-44c0-96fa-83213294f898,2025-04-06 23:46:00,App28,Client2,"Client9, Client6, Client8",client,3,11,80,26,A malformed database migration script caused c...,"DB, Migration, Backup",Team14,0,Infra,Schema Corruption,0.201189
22,2a3fd6de-0823-43a1-aff5-dc1ae6f1e205,2025-04-01 01:59:00,App22,Client3,"Client6, Client2, Client7",client,37,17,85,92,A malformed database migration script caused c...,"DB, Migration, Backup",Team3,0,Infra,Schema Corruption,0.181857
53,1d8e6aa3-9b25-43b3-b0c8-ac824783785d,2025-02-22 22:37:00,App22,Client9,"Client1, Client7, Client5",client,21,12,60,25,Kafka consumer in the pricing engine stalled d...,"Kafka, Schema, Consumer",Team8,1,Application,Stale Data Pipeline,0.155276
3,206e905d-b25d-4134-989e-53e1d7e6e1ac,2025-01-29 19:39:00,App6,Client10,"Client9, Client6, Client2",client,20,4,38,154,A misconfigured firewall rule on the edge gate...,"Firewall, DNS, Edge Gateway",Team5,0,Network,Network Isolation,0.148089
15,e2f2b2ae-9144-4ec8-bcd1-f2a03ec41252,2025-05-07 03:33:00,App9,Client1,"Client1, Client5, Client8",client,38,20,75,164,A misconfigured firewall rule on the edge gate...,"Firewall, DNS, Edge Gateway",Team12,0,Network,Network Isolation,0.145915


In [13]:
smart_context_query("schema corruption in last 20 days")

Unnamed: 0,incident_id,date_reported,app_name,client_name,impacted_clients,reported_by,ttd,ttk,ttm,ttc,rca_summary,rca_tags,team_owning,severity,category,incident_type,similarity
49,82d63060-e0f5-4ab7-a41b-5cfd8e5f8730,2025-05-24 12:44:00,App14,Client10,"Client1, Client4",client,33,9,56,176,Kafka consumer in the pricing engine stalled d...,"Kafka, Schema, Consumer",Team11,1,Application,Stale Data Pipeline,0.127899
31,779793e2-94cd-4ec8-b1ca-a69ba9d1e596,2025-05-18 13:14:00,App26,Client4,"Client5, Client8",client,37,19,27,38,Kafka consumer in the pricing engine stalled d...,"Kafka, Schema, Consumer",Team10,1,Application,Stale Data Pipeline,0.12427
30,e0269264-61cf-4640-83c5-f2320a7a95ef,2025-05-31 04:25:00,App10,Client2,"Client4, Client1, Client7",client,6,16,31,34,Report generation failed due to unhandled exce...,"CSV, Exception, Bug",Team12,2,Application,Unhandled Exception,0.083002
23,4ee57e24-c90e-4602-95b6-97ccfb121ccc,2025-05-24 06:34:00,App10,Client9,"Client9, Client10, Client4",client,11,7,34,61,Report generation failed due to unhandled exce...,"CSV, Exception, Bug",Team8,2,Application,Unhandled Exception,0.079615
56,5344f285-4d60-4b2b-9e4b-e2f46abbf21f,2025-05-28 21:01:00,App14,Client6,"Client5, Client6",client,36,3,88,87,Grafana dashboard query optimization needed. I...,"Grafana, Query, Dashboard",Team7,3,Infra,Dashboard Slowness,0.056069


In [14]:
smart_context_query("most repeated incidents for last 10 days")

Unnamed: 0,incident_id,date_reported,app_name,client_name,impacted_clients,reported_by,ttd,ttk,ttm,ttc,rca_summary,rca_tags,team_owning,severity,category,incident_type,similarity
30,e0269264-61cf-4640-83c5-f2320a7a95ef,2025-05-31 04:25:00,App10,Client2,"Client4, Client1, Client7",client,6,16,31,34,Report generation failed due to unhandled exce...,"CSV, Exception, Bug",Team12,2,Application,Unhandled Exception,0.118511
36,e45b37e0-d7cc-4ca9-b59d-c85c7548e1a9,2025-05-30 23:48:00,App21,Client5,"Client10, Client1, Client7",client,33,8,63,91,Report generation failed due to unhandled exce...,"CSV, Exception, Bug",Team15,2,Application,Unhandled Exception,0.063773
56,5344f285-4d60-4b2b-9e4b-e2f46abbf21f,2025-05-28 21:01:00,App14,Client6,"Client5, Client6",client,36,3,88,87,Grafana dashboard query optimization needed. I...,"Grafana, Query, Dashboard",Team7,3,Infra,Dashboard Slowness,-0.023858


In [16]:
smart_context_query("team with most incidents in last 20 days")

Unnamed: 0,incident_id,date_reported,app_name,client_name,impacted_clients,reported_by,ttd,ttk,ttm,ttc,rca_summary,rca_tags,team_owning,severity,category,incident_type,similarity
30,e0269264-61cf-4640-83c5-f2320a7a95ef,2025-05-31 04:25:00,App10,Client2,"Client4, Client1, Client7",client,6,16,31,34,Report generation failed due to unhandled exce...,"CSV, Exception, Bug",Team12,2,Application,Unhandled Exception,0.085731
23,4ee57e24-c90e-4602-95b6-97ccfb121ccc,2025-05-24 06:34:00,App10,Client9,"Client9, Client10, Client4",client,11,7,34,61,Report generation failed due to unhandled exce...,"CSV, Exception, Bug",Team8,2,Application,Unhandled Exception,0.067576
36,e45b37e0-d7cc-4ca9-b59d-c85c7548e1a9,2025-05-30 23:48:00,App21,Client5,"Client10, Client1, Client7",client,33,8,63,91,Report generation failed due to unhandled exce...,"CSV, Exception, Bug",Team15,2,Application,Unhandled Exception,0.041266
49,82d63060-e0f5-4ab7-a41b-5cfd8e5f8730,2025-05-24 12:44:00,App14,Client10,"Client1, Client4",client,33,9,56,176,Kafka consumer in the pricing engine stalled d...,"Kafka, Schema, Consumer",Team11,1,Application,Stale Data Pipeline,0.027902
31,779793e2-94cd-4ec8-b1ca-a69ba9d1e596,2025-05-18 13:14:00,App26,Client4,"Client5, Client8",client,37,19,27,38,Kafka consumer in the pricing engine stalled d...,"Kafka, Schema, Consumer",Team10,1,Application,Stale Data Pipeline,0.019073
