In [1]:
# pip install sentence-transformers

In [3]:
# data loading
import pandas as pd
import re

df = pd.read_csv("production_grade_incident_rcas.csv")
df['date_reported'] = pd.to_datetime(df['date_reported']) # for time-based filtering

In [4]:
# load semantic embedding model

from sentence_transformers import SentenceTransformer

# Load a lightweight, efficient semantic model
model = SentenceTransformer("all-MiniLM-L6-v2")
# model = SentenceTransformer("distiluse-base-multilingual-cased-v2")

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
# generate RCA embeddings

incident_embeddings = model.encode(df['rca_summary'].tolist(), show_progress_bar=True)

Batches: 100%|██████████| 4/4 [00:01<00:00,  3.89it/s]


In [6]:
# Define semantic search function

from sklearn.metrics.pairwise import cosine_similarity

def context_search(query_text, top_n=5):
    query_embedding = model.encode([query_text])
    similarities = cosine_similarity(query_embedding, incident_embeddings).flatten()
    
    df['similarity'] = similarities
    return df.sort_values(by='similarity', ascending=False).head(top_n)

In [7]:
# Add contexual filters


def extract_context_filters(query_text):
    filters = {}

    # Define mapping of units to pandas-compatible keywords
    valid_units = {
        "minute": "minutes",
        "minutes": "minutes",
        "hour": "hours",
        "hours": "hours",
        "day": "days",
        "days": "days",
        "week": "weeks",
        "weeks": "weeks"
    }

    # Match time expressions like "last 2 days", "past 5 hours"
    time_match = re.search(r"(last|past)\s+(\d+)\s+(minute|minutes|hour|hours|day|days|week|weeks)", query_text.lower())
    if time_match:
        qty = int(time_match.group(2))
        unit = valid_units.get(time_match.group(3))
        if unit:
            filters['date_reported'] = pd.Timestamp.now() - pd.Timedelta(**{unit: qty})

    # Optional filters
    if "severity 0" in query_text:
        filters['severity'] = 0
    for team in df['team_owning'].unique():
        if team.lower() in query_text.lower():
            filters['team_owning'] = team

    return filters



In [8]:
# Combine semantic + metadata filtering

def smart_context_query(query_text, top_n=5):
    base = context_search(query_text, top_n=100)  # fetch broader match first
    filters = extract_context_filters(query_text)
    
    for key, value in filters.items():
        if key == 'date_reported':
            base = base[base['date_reported'] >= value]
        else:
            base = base[base[key] == value]
    return base.head(top_n)

In [9]:
# Test query
results = smart_context_query("certificate failures in last week by Team13", top_n=5)
print(results[['incident_id', 'rca_summary', 'similarity', 'date_reported']])

                             incident_id  \
44  352d8341-c853-41b9-a1a9-76731a9c421b   
77  9271447b-8b26-4563-b56a-0501cfdbbe32   
67  89694620-583b-4056-b1f4-52139534408a   
89  519a5d73-918a-4454-a306-94891a5129b9   
37  af9c33d5-dfa1-480e-aa46-c7c64ad4a163   

                                          rca_summary  similarity  \
44  Kafka consumer in the pricing engine stalled d...    0.250316   
77  Kafka consumer in the pricing engine stalled d...    0.249031   
67  Report generation failed due to unhandled exce...    0.241469   
89  External API latency caused user profile sync ...    0.222646   
37  External API latency caused user profile sync ...    0.218034   

         date_reported  
44 2025-02-05 12:58:00  
77 2025-04-03 12:51:00  
67 2025-05-09 19:06:00  
89 2025-02-19 12:47:00  
37 2025-04-09 18:02:00  


In [10]:
# Sample queries
smart_context_query("migration issues in infra")

Unnamed: 0,incident_id,date_reported,app_name,client_name,impacted_clients,reported_by,ttd,ttk,ttm,ttc,rca_summary,rca_tags,team_owning,severity,category,incident_type,similarity
79,b496488c-b861-44c0-96fa-83213294f898,2025-04-06 23:46:00,App28,Client2,"Client9, Client6, Client8",client,3,11,80,26,A malformed database migration script caused c...,"DB, Migration, Backup",Team14,0,Infra,Schema Corruption,0.436523
22,2a3fd6de-0823-43a1-aff5-dc1ae6f1e205,2025-04-01 01:59:00,App22,Client3,"Client6, Client2, Client7",client,37,17,85,92,A malformed database migration script caused c...,"DB, Migration, Backup",Team3,0,Infra,Schema Corruption,0.429777
60,4187c155-ea7a-4e75-b8c8-1f35e3c7b76a,2025-03-31 02:42:00,App10,Client9,"Client4, Client9, Client8, Client2",client,12,14,11,111,A misconfigured firewall rule on the edge gate...,"Firewall, DNS, Edge Gateway",Team13,0,Network,Network Isolation,0.184431
40,ef465b06-6eca-4c69-8fbb-a04b5dc7212c,2025-03-13 02:17:00,App10,Client1,"Client10, Client7, Client8, Client5",client,38,8,28,64,A misconfigured firewall rule on the edge gate...,"Firewall, DNS, Edge Gateway",Team10,0,Network,Network Isolation,0.181695
71,1d35e379-f93b-4fa1-a4c9-2cea02004356,2025-03-16 18:26:00,App3,Client7,"Client4, Client5, Client8, Client3",client,24,18,29,55,A misconfigured firewall rule on the edge gate...,"Firewall, DNS, Edge Gateway",Team13,0,Network,Network Isolation,0.181605


In [11]:
smart_context_query("repeated API timeouts")

Unnamed: 0,incident_id,date_reported,app_name,client_name,impacted_clients,reported_by,ttd,ttk,ttm,ttc,rca_summary,rca_tags,team_owning,severity,category,incident_type,similarity
70,e7cb0e38-dcf2-43c3-aa44-6ba0627a4ad8,2025-01-22 04:46:00,App14,Client4,"Client4, Client2",client,15,16,76,121,External API latency caused user profile sync ...,"API, Timeout, External",Team1,2,Integration,Upstream Latency,0.586093
46,a61cf0ca-ba4c-455d-a7e2-1c4768b35133,2025-01-23 20:35:00,App3,Client4,Client7,client,36,8,7,135,External API latency caused user profile sync ...,"API, Timeout, External",Team5,2,Integration,Upstream Latency,0.580589
88,ee910baf-06d5-4310-aa4c-d1d5eb56112f,2025-03-15 07:34:00,App16,Client1,"Client2, Client6, Client10",client,8,10,24,165,External API latency caused user profile sync ...,"API, Timeout, External",Team5,2,Integration,Upstream Latency,0.576363
7,f49f936e-6023-4b10-8d8a-c83b04ea5b41,2025-01-18 16:11:00,App18,Client1,"Client4, Client6",client,11,12,80,65,External API latency caused user profile sync ...,"API, Timeout, External",Team14,2,Integration,Upstream Latency,0.574934
35,bc8d3d41-02fd-4f77-9053-ee59d535687e,2025-03-21 05:15:00,App30,Client2,"Client3, Client2, Client1",client,16,15,49,146,External API latency caused user profile sync ...,"API, Timeout, External",Team7,2,Integration,Upstream Latency,0.573736


In [12]:
smart_context_query("OAuth failure severity 0")

Unnamed: 0,incident_id,date_reported,app_name,client_name,impacted_clients,reported_by,ttd,ttk,ttm,ttc,rca_summary,rca_tags,team_owning,severity,category,incident_type,similarity
40,ef465b06-6eca-4c69-8fbb-a04b5dc7212c,2025-03-13 02:17:00,App10,Client1,"Client10, Client7, Client8, Client5",client,38,8,28,64,A misconfigured firewall rule on the edge gate...,"Firewall, DNS, Edge Gateway",Team10,0,Network,Network Isolation,0.348562
85,63722fb8-3a8c-4944-9e00-60c0f44189da,2025-02-27 23:35:00,App5,Client4,"Client7, Client4",client,3,3,82,31,A misconfigured firewall rule on the edge gate...,"Firewall, DNS, Edge Gateway",Team6,0,Network,Network Isolation,0.347851
60,4187c155-ea7a-4e75-b8c8-1f35e3c7b76a,2025-03-31 02:42:00,App10,Client9,"Client4, Client9, Client8, Client2",client,12,14,11,111,A misconfigured firewall rule on the edge gate...,"Firewall, DNS, Edge Gateway",Team13,0,Network,Network Isolation,0.346888
3,206e905d-b25d-4134-989e-53e1d7e6e1ac,2025-01-29 19:39:00,App6,Client10,"Client9, Client6, Client2",client,20,4,38,154,A misconfigured firewall rule on the edge gate...,"Firewall, DNS, Edge Gateway",Team5,0,Network,Network Isolation,0.343419
71,1d35e379-f93b-4fa1-a4c9-2cea02004356,2025-03-16 18:26:00,App3,Client7,"Client4, Client5, Client8, Client3",client,24,18,29,55,A misconfigured firewall rule on the edge gate...,"Firewall, DNS, Edge Gateway",Team13,0,Network,Network Isolation,0.342381


In [13]:
smart_context_query("schema corruption in last week")

Unnamed: 0,incident_id,date_reported,app_name,client_name,impacted_clients,reported_by,ttd,ttk,ttm,ttc,rca_summary,rca_tags,team_owning,severity,category,incident_type,similarity
79,b496488c-b861-44c0-96fa-83213294f898,2025-04-06 23:46:00,App28,Client2,"Client9, Client6, Client8",client,3,11,80,26,A malformed database migration script caused c...,"DB, Migration, Backup",Team14,0,Infra,Schema Corruption,0.511686
22,2a3fd6de-0823-43a1-aff5-dc1ae6f1e205,2025-04-01 01:59:00,App22,Client3,"Client6, Client2, Client7",client,37,17,85,92,A malformed database migration script caused c...,"DB, Migration, Backup",Team3,0,Infra,Schema Corruption,0.500993
17,82a15bc0-64d1-452c-bcdc-27248d5b4a77,2025-05-03 10:28:00,App25,Client8,"Client6, Client5, Client3",client,35,13,48,36,Kafka consumer in the pricing engine stalled d...,"Kafka, Schema, Consumer",Team13,1,Application,Stale Data Pipeline,0.421589
31,779793e2-94cd-4ec8-b1ca-a69ba9d1e596,2025-05-18 13:14:00,App26,Client4,"Client5, Client8",client,37,19,27,38,Kafka consumer in the pricing engine stalled d...,"Kafka, Schema, Consumer",Team10,1,Application,Stale Data Pipeline,0.420502
53,1d8e6aa3-9b25-43b3-b0c8-ac824783785d,2025-02-22 22:37:00,App22,Client9,"Client1, Client7, Client5",client,21,12,60,25,Kafka consumer in the pricing engine stalled d...,"Kafka, Schema, Consumer",Team8,1,Application,Stale Data Pipeline,0.418783


In [14]:
smart_context_query("schema corruption in last 20 days")

Unnamed: 0,incident_id,date_reported,app_name,client_name,impacted_clients,reported_by,ttd,ttk,ttm,ttc,rca_summary,rca_tags,team_owning,severity,category,incident_type,similarity
49,82d63060-e0f5-4ab7-a41b-5cfd8e5f8730,2025-05-24 12:44:00,App14,Client10,"Client1, Client4",client,33,9,56,176,Kafka consumer in the pricing engine stalled d...,"Kafka, Schema, Consumer",Team11,1,Application,Stale Data Pipeline,0.401724
31,779793e2-94cd-4ec8-b1ca-a69ba9d1e596,2025-05-18 13:14:00,App26,Client4,"Client5, Client8",client,37,19,27,38,Kafka consumer in the pricing engine stalled d...,"Kafka, Schema, Consumer",Team10,1,Application,Stale Data Pipeline,0.397569
36,e45b37e0-d7cc-4ca9-b59d-c85c7548e1a9,2025-05-30 23:48:00,App21,Client5,"Client10, Client1, Client7",client,33,8,63,91,Report generation failed due to unhandled exce...,"CSV, Exception, Bug",Team15,2,Application,Unhandled Exception,0.275416
30,e0269264-61cf-4640-83c5-f2320a7a95ef,2025-05-31 04:25:00,App10,Client2,"Client4, Client1, Client7",client,6,16,31,34,Report generation failed due to unhandled exce...,"CSV, Exception, Bug",Team12,2,Application,Unhandled Exception,0.274965
23,4ee57e24-c90e-4602-95b6-97ccfb121ccc,2025-05-24 06:34:00,App10,Client9,"Client9, Client10, Client4",client,11,7,34,61,Report generation failed due to unhandled exce...,"CSV, Exception, Bug",Team8,2,Application,Unhandled Exception,0.274188


In [15]:
smart_context_query("most repeated incidents for last 10 days")

Unnamed: 0,incident_id,date_reported,app_name,client_name,impacted_clients,reported_by,ttd,ttk,ttm,ttc,rca_summary,rca_tags,team_owning,severity,category,incident_type,similarity
36,e45b37e0-d7cc-4ca9-b59d-c85c7548e1a9,2025-05-30 23:48:00,App21,Client5,"Client10, Client1, Client7",client,33,8,63,91,Report generation failed due to unhandled exce...,"CSV, Exception, Bug",Team15,2,Application,Unhandled Exception,0.143699
30,e0269264-61cf-4640-83c5-f2320a7a95ef,2025-05-31 04:25:00,App10,Client2,"Client4, Client1, Client7",client,6,16,31,34,Report generation failed due to unhandled exce...,"CSV, Exception, Bug",Team12,2,Application,Unhandled Exception,0.139428
56,5344f285-4d60-4b2b-9e4b-e2f46abbf21f,2025-05-28 21:01:00,App14,Client6,"Client5, Client6",client,36,3,88,87,Grafana dashboard query optimization needed. I...,"Grafana, Query, Dashboard",Team7,3,Infra,Dashboard Slowness,0.086785


In [16]:
smart_context_query("team with most incidents in last 20 days")

Unnamed: 0,incident_id,date_reported,app_name,client_name,impacted_clients,reported_by,ttd,ttk,ttm,ttc,rca_summary,rca_tags,team_owning,severity,category,incident_type,similarity
36,e45b37e0-d7cc-4ca9-b59d-c85c7548e1a9,2025-05-30 23:48:00,App21,Client5,"Client10, Client1, Client7",client,33,8,63,91,Report generation failed due to unhandled exce...,"CSV, Exception, Bug",Team15,2,Application,Unhandled Exception,0.183685
23,4ee57e24-c90e-4602-95b6-97ccfb121ccc,2025-05-24 06:34:00,App10,Client9,"Client9, Client10, Client4",client,11,7,34,61,Report generation failed due to unhandled exce...,"CSV, Exception, Bug",Team8,2,Application,Unhandled Exception,0.180828
30,e0269264-61cf-4640-83c5-f2320a7a95ef,2025-05-31 04:25:00,App10,Client2,"Client4, Client1, Client7",client,6,16,31,34,Report generation failed due to unhandled exce...,"CSV, Exception, Bug",Team12,2,Application,Unhandled Exception,0.180421
56,5344f285-4d60-4b2b-9e4b-e2f46abbf21f,2025-05-28 21:01:00,App14,Client6,"Client5, Client6",client,36,3,88,87,Grafana dashboard query optimization needed. I...,"Grafana, Query, Dashboard",Team7,3,Infra,Dashboard Slowness,0.10062
49,82d63060-e0f5-4ab7-a41b-5cfd8e5f8730,2025-05-24 12:44:00,App14,Client10,"Client1, Client4",client,33,9,56,176,Kafka consumer in the pricing engine stalled d...,"Kafka, Schema, Consumer",Team11,1,Application,Stale Data Pipeline,0.032822
