In [136]:
import pandas as pd
import numpy as np
from elasticsearch import Elasticsearch
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
from datetime import datetime

In [137]:
es_cloud_url="https://e6115e8808864a34b8547836bb0925b6.canadacentral.azure.elastic-cloud.com"
es_user="elastic"
es_pass="nI0YJM3GK202ejrodU52x0rP"

In [138]:
es = Elasticsearch(es_cloud_url,basic_auth=(es_user, es_pass))

In [139]:
# 2️⃣ Query ServiceNow Ticket Data from Elasticsearch
index_name = "snow-prd-task*"  
query = {
    "query": {"match_all": {}},  # Fetch all tickets (Modify as needed)
    "_source": [
        "priority", "impact", "urgency", "category", "assignment_group",
        "incident_state", "opened_at", "resolved_at", "made_sla"
    ]
}

# Set scroll duration & batch size
scroll_time = "2m"  # Keep the scroll context open for 2 minutes
batch_size = 5000  # Fetch 5000 records per batch

# First request to initialize scroll
response = es.search(index=index_name, body=query, scroll=scroll_time, size=batch_size)

# Extract the scroll ID & first batch of results
scroll_id = response["_scroll_id"]
all_hits = response["hits"]["hits"]

# Fetch data in batches until we reach 100,000 records or no more data is available
while len(all_hits) < 100000 and len(response["hits"]["hits"]) > 0:
    response = es.scroll(scroll_id=scroll_id, scroll=scroll_time)
    scroll_id = response["_scroll_id"]
    all_hits.extend(response["hits"]["hits"])  # Append new records

    # Stop if we reach 100,000 records
    if len(all_hits) >= 100000:
        break

# Print total records fetched
print(f"Total records fetched: {len(all_hits)}")

# Extract actual data
data = [hit["_source"] for hit in all_hits]

  response = es.search(index=index_name, body=query, scroll=scroll_time, size=batch_size)


Total records fetched: 100000


In [140]:
# data

In [141]:
# 3️⃣ Convert Data into a Pandas DataFrame
df = pd.DataFrame(data)

In [142]:
df.shape

(100000, 9)

In [143]:
df

Unnamed: 0,opened_at,made_sla,assignment_group,urgency,impact,priority,category,incident_state,resolved_at
0,2017-03-03 13:28:47,true,IO-Platform Services,3 - Medium,3 - Medium,3 - Moderate,,,
1,2017-02-24 13:11:09,true,IO-ASG,4 - Low,4 - Low,4 - Low,,,
2,2017-03-02 10:18:15,true,IO-System Integration Services,4 - Low,4 - Low,4 - Low,,,
3,2017-03-20 13:13:36,true,IO-Network Telecom,3 - Medium,3 - Medium,3 - Moderate,,,
4,2017-02-28 11:55:23,true,IO-ASG,3 - Medium,3 - Medium,3 - Moderate,,,
...,...,...,...,...,...,...,...,...,...
99995,2022-09-06 11:02:39,true,BPO-Asset Management,,,,,,
99996,2022-11-07 16:06:16,true,IO-Access Admin,,,,,,
99997,2022-11-07 15:38:53,true,IO-Access Admin,,,,,,
99998,2022-11-07 15:37:42,true,IO-Access Admin,,,,,,


In [144]:
# 4️⃣ Data Preprocessing
# Convert timestamps to datetime
df["opened_at"] = pd.to_datetime(df["opened_at"], errors='coerce')
df["resolved_at"] = pd.to_datetime(df["resolved_at"], errors='coerce')

In [145]:
# Calculate ticket age (in hours)
df["ticket_age"] = (df["resolved_at"] - df["opened_at"]).dt.total_seconds() / 3600  # Convert to hours


In [146]:
# Drop null SLA values
df = df.dropna(subset=["made_sla"])

In [147]:
# ✅ Convert SLA to binary (0 = Met SLA, 1 = Breached SLA)
df["sla_breach"] = df["made_sla"].map({"true": 0, "false": 1})

In [148]:
# ✅ Remove rows where `sla_breach` is still NaN or invalid
df = df.dropna(subset=["sla_breach"])

In [149]:
# ✅ Ensure `sla_breach` contains only 0 or 1
df = df[df["sla_breach"].isin([0, 1])]

In [150]:
# Drop `made_sla` since it's no longer needed
df = df.drop(columns=["made_sla"])

In [151]:
# Fill missing values in categorical columns with "Unknown"
for col in ["priority", "impact", "urgency", "category", "assignment_group", "incident_state"]:
    df[col] = df[col].fillna("Unknown")

In [152]:
# Fill missing values in numerical column (ticket_age) with median
df["ticket_age"] = df["ticket_age"].fillna(df["ticket_age"].median())

In [153]:
# df.loc[df["sla_breach"] ==1]

In [154]:
# Encode categorical variables
label_encoders = {}
for col in ["priority", "impact", "urgency", "category", "assignment_group", "incident_state"]:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col].astype(str))
    label_encoders[col] = le

In [155]:
# Standardize numerical features
scaler = StandardScaler()
df["ticket_age"] = scaler.fit_transform(df[["ticket_age"]])

In [156]:
df

Unnamed: 0,opened_at,assignment_group,urgency,impact,priority,category,incident_state,resolved_at,ticket_age,sla_breach
0,2017-03-03 13:28:47,81,6,6,3,9,3,NaT,0.174641,0
1,2017-02-24 13:11:09,60,7,7,4,9,3,NaT,0.174641,0
2,2017-03-02 10:18:15,94,7,7,4,9,3,NaT,0.174641,0
3,2017-03-20 13:13:36,74,6,6,3,9,3,NaT,0.174641,0
4,2017-02-28 11:55:23,60,6,6,3,9,3,NaT,0.174641,0
...,...,...,...,...,...,...,...,...,...,...
99995,2022-09-06 11:02:39,26,0,0,0,16,3,NaT,0.174641,0
99996,2022-11-07 16:06:16,61,0,0,0,16,3,NaT,0.174641,0
99997,2022-11-07 15:38:53,61,0,0,0,16,3,NaT,0.174641,0
99998,2022-11-07 15:37:42,61,0,0,0,16,3,NaT,0.174641,0


In [157]:
# Define features and target variable
X = df[["priority", "impact", "urgency", "category", "assignment_group", "incident_state", "ticket_age"]]
y = df["sla_breach"]

In [158]:
# 5️⃣ Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [159]:
# 6️⃣ Train Logistic Regression Model
model = LogisticRegression()
model.fit(X_train, y_train)

In [160]:
# 7️⃣ Evaluate Model
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.99985
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00     19997
           1       0.00      0.00      0.00         3

    accuracy                           1.00     20000
   macro avg       0.50      0.50      0.50     20000
weighted avg       1.00      1.00      1.00     20000



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [161]:
# Function to encode categorical features, handling unseen labels
def encode_with_fallback(label_encoder, value):
    if value in label_encoder.classes_:
        return label_encoder.transform([value])[0]
    else:
        return label_encoder.transform(["Unknown"])[0]  # Use fallback "Unknown"

# Example prediction function
def predict_sla_breach(priority, impact, urgency, category, assignment_group, incident_state, ticket_age):
    # Ensure input is in the correct format
    priority = encode_with_fallback(label_encoders["priority"], priority)
    impact = encode_with_fallback(label_encoders["impact"], impact)
    urgency = encode_with_fallback(label_encoders["urgency"], urgency)
    category = encode_with_fallback(label_encoders["category"], category)
    assignment_group = encode_with_fallback(label_encoders["assignment_group"], assignment_group)
    incident_state = encode_with_fallback(label_encoders["incident_state"], incident_state)

    # Scale ticket age
    ticket_age = scaler.transform([[ticket_age]])[0][0]

    # Prepare input data
    X_new = [[priority, impact, urgency, category, assignment_group, incident_state, ticket_age]]

    # Predict SLA breach
    prediction = model.predict(X_new)[0]
    return "SLA Breached" if prediction == 1 else "SLA Met"

# Example Prediction
# print(predict_sla_breach("3 - Moderate", "3 - Low", "1 - High", "Monitoring Event", "IO-Network Data Support", "Closed", 10))


In [162]:
# Example Prediction
print(predict_sla_breach("3 - Moderate", "3 - Low", "1 - High", "Monitoring Event", "IO-Network Data Support", "Closed", 10))



SLA Met




In [163]:
import joblib

# Save trained model
joblib.dump(model, "sla_breach_model.pkl")

# Save label encoders
joblib.dump(label_encoders, "label_encoders.pkl")

# Save scaler
joblib.dump(scaler, "scaler.pkl")

print("Model, encoders, and scaler saved successfully!")


Model, encoders, and scaler saved successfully!


In [164]:
# Load model, encoders, and scaler
model = joblib.load("sla_breach_model.pkl")
label_encoders = joblib.load("label_encoders.pkl")
scaler = joblib.load("scaler.pkl")

print("Model, encoders, and scaler loaded successfully!")


Model, encoders, and scaler loaded successfully!


In [165]:
def predict_sla_breach(priority, impact, urgency, category, assignment_group, incident_state, ticket_age):
    # Function to handle unseen labels
    def encode_with_fallback(label_encoder, value):
        if value in label_encoder.classes_:
            return label_encoder.transform([value])[0]
        else:
            return label_encoder.transform(["Unknown"])[0]  # Use fallback "Unknown"

    # Encode categorical features
    priority = encode_with_fallback(label_encoders["priority"], priority)
    impact = encode_with_fallback(label_encoders["impact"], impact)
    urgency = encode_with_fallback(label_encoders["urgency"], urgency)
    category = encode_with_fallback(label_encoders["category"], category)
    assignment_group = encode_with_fallback(label_encoders["assignment_group"], assignment_group)
    incident_state = encode_with_fallback(label_encoders["incident_state"], incident_state)

    # Scale numerical feature
    ticket_age = scaler.transform([[ticket_age]])[0][0]

    # Prepare input data
    X_new = [[priority, impact, urgency, category, assignment_group, incident_state, ticket_age]]

    # Predict SLA breach
    prediction = model.predict(X_new)[0]
    return "SLA Breached" if prediction == 1 else "SLA Met"



In [166]:
# Example Prediction
print(predict_sla_breach("3 - Moderate", "3 - Low", "1 - High", "Monitoring Event", "IO-Network Data Support", "Closed", 10))


SLA Met


