In [167]:
import pandas as pd
import numpy as np
from elasticsearch import Elasticsearch
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
from datetime import datetime

In [168]:
es_cloud_url="https://e6115e8808864a34b8547836bb0925b6.canadacentral.azure.elastic-cloud.com"
es_user="elastic"
es_pass="nI0YJM3GK202ejrodU52x0rP"

In [169]:
es = Elasticsearch(es_cloud_url,basic_auth=(es_user, es_pass))

In [170]:
# 2️⃣ Query ServiceNow Ticket Data from Elasticsearch
index_name = "snow-prd-task*"  
query = {
    "query": {
            "bool": {
                "must": [                            
                            {
                            "bool": {
                                "should": [
                                    { "match" : { "sys_class_name": "Incident"} }
                                    ] 
                            } },                            
                            ],
                "must_not":[
                       { "match": { "assigned_to.keyword" : "" }} ,
                       { "match": { "assigned_to.keyword" : "None" }} ,
                       { "match": { "contact_type": "Auto-Generated Event" }} ,
                       { "match": { "company": "Accenture" }} ,
                       { "match": { "sys_class_name.keyword": "Problem" }} ,
                    #    { "match" : { "state" : "Resolved Closed Cancelled Closed Complete Closed/Resolved Closed Incomplete Fulfilled" } }
                       ]
                       }                        
                       },  
    "_source": [
        "priority", "impact", "urgency", "category", "assignment_group",
        "incident_state", "opened_at", "resolved_at", "made_sla"
    ]
}

# Set scroll duration & batch size
scroll_time = "2m"  # Keep the scroll context open for 2 minutes
batch_size = 5000  # Fetch 5000 records per batch

# First request to initialize scroll
response = es.search(index=index_name, body=query, scroll=scroll_time, size=batch_size)

# Extract the scroll ID & first batch of results
scroll_id = response["_scroll_id"]
all_hits = response["hits"]["hits"]

# Fetch data in batches until we reach 100,000 records or no more data is available
while len(all_hits) < 100000 and len(response["hits"]["hits"]) > 0:
    response = es.scroll(scroll_id=scroll_id, scroll=scroll_time)
    scroll_id = response["_scroll_id"]
    all_hits.extend(response["hits"]["hits"])  # Append new records

    # Stop if we reach 100,000 records
    if len(all_hits) >= 100000:
        break

# Print total records fetched
print(f"Total records fetched: {len(all_hits)}")

# Extract actual data
data = [hit["_source"] for hit in all_hits]

  response = es.search(index=index_name, body=query, scroll=scroll_time, size=batch_size)


Total records fetched: 73650


In [171]:
# data

In [172]:
# 3️⃣ Convert Data into a Pandas DataFrame
df = pd.DataFrame(data)

In [173]:
df.shape

(73650, 9)

In [174]:
df

Unnamed: 0,opened_at,made_sla,assignment_group,incident_state,urgency,resolved_at,impact,priority,category
0,2018-11-27 08:27:26,true,AO-Solutions,Closed,3 - Low,2019-01-09T18:31:21,3 - Low,4 - Low,Data Center Services
1,2018-09-04 12:00:20,true,IO-Service Desk,Closed,3 - Low,2018-09-10T14:46:55,3 - Low,4 - Low,Application / Software
2,2018-07-26 15:22:02,true,IO-Service Desk,Closed,3 - Low,2018-08-01T09:29:28,3 - Low,4 - Low,Application / Software
3,2018-09-05 10:38:44,true,IO-Service Desk,Closed,3 - Low,2018-09-05T11:18:44,3 - Low,4 - Low,Networks
4,2018-05-08 10:06:18,true,IO-Service Desk,Closed,3 - Low,2018-05-08T10:34:26,3 - Low,4 - Low,
...,...,...,...,...,...,...,...,...,...
73645,2024-09-20 15:38:51,true,IO-Technology Services,Awaiting User Info,3 - Low,1991-01-01T00:00:00,3 - Low,4 - Low,Application / Software / Platform
73646,2024-12-11 19:29:50,true,AO-emili Application Dev,Closed,3 - Low,2025-02-28T17:20:50,3 - Low,4 - Low,Application / Software / Platform
73647,2024-06-13 10:07:39,true,IO-Technology Services,Awaiting User Info,3 - Low,1991-01-01T00:00:00,3 - Low,4 - Low,Application / Software / Platform
73648,2024-11-07 20:38:18,true,AO-CRM,Awaiting User Info,1 - High,1991-01-01T00:00:00,3 - Low,3 - Moderate,Application / Software / Platform


In [175]:
# 4️⃣ Data Preprocessing
# Convert timestamps to datetime
df["opened_at"] = pd.to_datetime(df["opened_at"], errors='coerce')
df["resolved_at"] = pd.to_datetime(df["resolved_at"], errors='coerce')

In [176]:
# Calculate ticket age (in hours)
df["ticket_age"] = (df["resolved_at"] - df["opened_at"]).dt.total_seconds() / 3600  # Convert to hours


In [177]:
# Drop null SLA values
df = df.dropna(subset=["made_sla"])

In [178]:
# ✅ Convert SLA to binary (0 = Met SLA, 1 = Breached SLA)
df["sla_breach"] = df["made_sla"].map({"true": 0, "false": 1})

In [179]:
# ✅ Remove rows where `sla_breach` is still NaN or invalid
df = df.dropna(subset=["sla_breach"])

In [180]:
# ✅ Ensure `sla_breach` contains only 0 or 1
df = df[df["sla_breach"].isin([0, 1])]

In [181]:
# Drop `made_sla` since it's no longer needed
df = df.drop(columns=["made_sla"])

In [182]:
# Fill missing values in categorical columns with "Unknown"
for col in ["priority", "impact", "urgency", "category", "assignment_group", "incident_state"]:
    df[col] = df[col].fillna("Unknown")

In [183]:
# Fill missing values in numerical column (ticket_age) with median
df["ticket_age"] = df["ticket_age"].fillna(df["ticket_age"].median())

In [184]:
# df.loc[df["sla_breach"] ==1]

In [185]:
# Encode categorical variables
label_encoders = {}
for col in ["priority", "impact", "urgency", "category", "assignment_group", "incident_state"]:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col].astype(str))
    label_encoders[col] = le

In [186]:
# Standardize numerical features
scaler = StandardScaler()
df["ticket_age"] = scaler.fit_transform(df[["ticket_age"]])

In [187]:
df

Unnamed: 0,opened_at,assignment_group,incident_state,urgency,resolved_at,impact,priority,category,ticket_age,sla_breach
0,2018-11-27 08:27:26,18,9,2,2019-01-09 18:31:21,2,3,4,0.172582,0
1,2018-09-04 12:00:20,66,9,2,2018-09-10 14:46:55,2,3,1,0.151156,0
2,2018-07-26 15:22:02,66,9,2,2018-08-01 09:29:28,2,3,1,0.150949,0
3,2018-09-05 10:38:44,66,9,2,2018-09-05 11:18:44,2,3,9,0.147659,0
4,2018-05-08 10:06:18,66,9,2,2018-05-08 10:34:26,2,3,10,0.147654,0
...,...,...,...,...,...,...,...,...,...,...
73645,2024-09-20 15:38:51,70,6,2,1991-01-01 00:00:00,2,3,2,-6.926776,0
73646,2024-12-11 19:29:50,19,9,2,2025-02-28 17:20:50,2,3,2,0.192967,0
73647,2024-06-13 10:07:39,70,6,2,1991-01-01 00:00:00,2,3,2,-6.869780,0
73648,2024-11-07 20:38:18,2,6,0,1991-01-01 00:00:00,2,2,2,-6.954465,0


In [188]:
# Define features and target variable
X = df[["priority", "impact", "urgency", "category", "assignment_group", "incident_state", "ticket_age"]]
y = df["sla_breach"]

In [189]:
# 5️⃣ Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [190]:
# 6️⃣ Train Logistic Regression Model
model = LogisticRegression()
model.fit(X_train, y_train)

In [191]:
# 7️⃣ Evaluate Model
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.99979633401222
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00     14727
           1       0.00      0.00      0.00         3

    accuracy                           1.00     14730
   macro avg       0.50      0.50      0.50     14730
weighted avg       1.00      1.00      1.00     14730



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [192]:
# Function to encode categorical features, handling unseen labels
def encode_with_fallback(label_encoder, value):
    if value in label_encoder.classes_:
        return label_encoder.transform([value])[0]
    else:
        return label_encoder.transform(["Unknown"])[0]  # Use fallback "Unknown"

# Example prediction function
def predict_sla_breach(priority, impact, urgency, category, assignment_group, incident_state, ticket_age):
    # Ensure input is in the correct format
    priority = encode_with_fallback(label_encoders["priority"], priority)
    impact = encode_with_fallback(label_encoders["impact"], impact)
    urgency = encode_with_fallback(label_encoders["urgency"], urgency)
    category = encode_with_fallback(label_encoders["category"], category)
    assignment_group = encode_with_fallback(label_encoders["assignment_group"], assignment_group)
    incident_state = encode_with_fallback(label_encoders["incident_state"], incident_state)

    # Scale ticket age
    ticket_age = scaler.transform([[ticket_age]])[0][0]

    # Prepare input data
    X_new = [[priority, impact, urgency, category, assignment_group, incident_state, ticket_age]]

    # Predict SLA breach
    prediction = model.predict(X_new)[0]
    return "SLA Breached" if prediction == 1 else "SLA Met"

# Example Prediction
# print(predict_sla_breach("3 - Moderate", "3 - Low", "1 - High", "Monitoring Event", "IO-Network Data Support", "Closed", 10))


In [193]:
# Example Prediction
print(predict_sla_breach("3 - Moderate", "3 - Low", "1 - High", "Monitoring Event", "IO-Network Data Support", "Closed", 10))



SLA Met




In [194]:
import joblib

# Save trained model
joblib.dump(model, "sla_breach_model.pkl")

# Save label encoders
joblib.dump(label_encoders, "label_encoders.pkl")

# Save scaler
joblib.dump(scaler, "scaler.pkl")

print("Model, encoders, and scaler saved successfully!")


Model, encoders, and scaler saved successfully!


In [195]:
# Load model, encoders, and scaler
model = joblib.load("sla_breach_model.pkl")
label_encoders = joblib.load("label_encoders.pkl")
scaler = joblib.load("scaler.pkl")

print("Model, encoders, and scaler loaded successfully!")


Model, encoders, and scaler loaded successfully!


In [212]:
def predict_sla_breach(priority, impact, urgency, category, assignment_group, incident_state, ticket_age):
    # Function to handle unseen labels
    def encode_with_fallback(label_encoder, value):
        if value in label_encoder.classes_:
            return label_encoder.transform([value])[0]
        else:
            return label_encoder.transform(["Unknown"])[0]  # Use fallback "Unknown"

    # Encode categorical features
    priority = encode_with_fallback(label_encoders["priority"], priority)
    impact = encode_with_fallback(label_encoders["impact"], impact)
    urgency = encode_with_fallback(label_encoders["urgency"], urgency)
    category = encode_with_fallback(label_encoders["category"], category)
    assignment_group = encode_with_fallback(label_encoders["assignment_group"], assignment_group)
    incident_state = encode_with_fallback(label_encoders["incident_state"], incident_state)

    # Scale numerical feature
    ticket_age = scaler.transform([[ticket_age]])[0][0]

    # Prepare input data
    X_new = [[priority, impact, urgency, category, assignment_group, incident_state, ticket_age]]

    # Predict SLA breach
    prediction = model.predict(X_new)[0]
    return "SLA Breached" if prediction == 1 else "SLA Met"



In [213]:
# Example Prediction
print(predict_sla_breach("3 - Moderate", "3 - Low", "1 - High", "Monitoring Event", "IO-Network Data Support", "Closed", 20))


SLA Met




In [232]:
# from elasticsearch import Elasticsearch
import numpy as np
from datetime import datetime

# # Connect to Elasticsearch
# es = Elasticsearch(["http://your-elasticsearch-host:9200"])  # Update with your ES host

def fetch_incident_details(ticket_number):
    """Fetches incident details from Elasticsearch using the ticket number."""
    index_name = "snow-prd-task*"  # Your Elasticsearch index
    
    query = {
        "query": {
            "bool": {
                        "must": [
                            
                            {"match":{"number.keyword": ticket_number}}
                        ]
                    }
            },
        "_source": [
            "priority", "impact", "urgency", "category",
            "assignment_group", "incident_state", "sys_created_on"        ]
    }
    
    response = es.search(index=index_name, body=query, size=1)
    
    if response["hits"]["hits"]:
        return response["hits"]["hits"][0]["_source"]
    else:
        return None  # No incident found

from datetime import datetime, timezone
def calculate_ticket_age(creation_time):
    """Calculates ticket age in hours from creation time to now."""
    try:
        created_time = datetime.strptime(creation_time, "%Y-%m-%dT%H:%M:%S")  # Adjust format if needed
        # resolved_at = datetime.strptime(resolved_at, "%Y-%m-%dT%H:%M:%S")  # Adjust format if needed
        now1 = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S")  # Timezone-aware UTC datetime
        now = datetime.strptime(now1, "%Y-%m-%dT%H:%M:%S")
        ticket_age = (now - created_time).total_seconds() / 3600  # Convert seconds to hours
        return round(ticket_age, 2)
    except Exception as e:
        print(f"Error calculating ticket age: {e}")
        return None

def predict_sla_breach(ticket_number, model, label_encoders):
    """Predicts SLA breach for a given incident ticket number."""
    
    # Fetch incident details
    incident = fetch_incident_details(ticket_number)
    if not incident:
        return f"No incident found for ticket: {ticket_number}"
    
    # Calculate ticket age
    ticket_age = calculate_ticket_age(incident["sys_created_on"])
    if ticket_age is None:
        return "Error in ticket age calculation"
    
    # Extract categorical features
    categorical_features = ["priority", "impact", "urgency", "category", "assignment_group", "incident_state"]
    
    # Encode categorical features safely (handle unseen labels)
    try:
        encoded_features = []
        for feature in categorical_features:
            le = label_encoders.get(feature)  # Get the label encoder for the feature
            if le:
                if incident[feature] in le.classes_:
                    encoded_features.append(le.transform([incident[feature]])[0])  # Encode known values
                else:
                    encoded_features.append(len(le.classes_))  # Assign a new index for unseen values
            else:
                return f"Error: No encoder found for {feature}"
        
        # Convert to numpy array and combine with ticket age
        input_features = np.array(encoded_features + [ticket_age]).reshape(1, -1)

    except ValueError as e:
        return f"Encoding error: {e}"
    
    # Predict SLA breach
    prediction = model.predict(input_features)[0]
    
    return f"Predicted SLA Breach Time: {prediction} hours"


In [233]:
print(predict_sla_breach("INC7428098", model, label_encoders))

  response = es.search(index=index_name, body=query, size=1)


Predicted SLA Breach Time: 0 hours


