In [76]:
import pandas as pd
import numpy as np
from elasticsearch import Elasticsearch
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
from datetime import datetime

In [77]:
es_cloud_url="https://e6115e8808864a34b8547836bb0925b6.canadacentral.azure.elastic-cloud.com"
es_user="elastic"
es_pass="nI0YJM3GK202ejrodU52x0rP"

In [78]:
es = Elasticsearch(es_cloud_url,basic_auth=(es_user, es_pass))

In [81]:
# 2️⃣ Query ServiceNow Ticket Data from Elasticsearch
index_name = "snow-prd-task*"  
query = {
    "query": {"match_all": {}},  # Fetch all tickets (Modify as needed)
    "_source": [
        "priority", "impact", "urgency", "category", "assignment_group",
        "incident_state", "opened_at", "resolved_at", "made_sla"
    ]
}

response = es.search(index=index_name, body=query, size=20000)  # Fetch data
data = [hit["_source"] for hit in response["hits"]["hits"]]

  response = es.search(index=index_name, body=query, size=20000)  # Fetch data


ConnectionTimeout: Connection timed out

In [None]:
# data

In [None]:
# 3️⃣ Convert Data into a Pandas DataFrame
df = pd.DataFrame(data)

In [None]:
df.shape

(20000, 9)

In [None]:
df

Unnamed: 0,opened_at,made_sla,assignment_group,urgency,impact,priority,category,incident_state,resolved_at
0,2017-03-03 13:28:47,true,IO-Platform Services,3 - Medium,3 - Medium,3 - Moderate,,,
1,2017-02-24 13:11:09,true,IO-ASG,4 - Low,4 - Low,4 - Low,,,
2,2017-03-02 10:18:15,true,IO-System Integration Services,4 - Low,4 - Low,4 - Low,,,
3,2017-03-20 13:13:36,true,IO-Network Telecom,3 - Medium,3 - Medium,3 - Moderate,,,
4,2017-02-28 11:55:23,true,IO-ASG,3 - Medium,3 - Medium,3 - Moderate,,,
...,...,...,...,...,...,...,...,...,...
19995,2020-11-29 08:28:32,true,IO-Service Desk,3 - Low,3 - Low,4 - Low,Access Management,Closed,2020-12-02T09:16:09
19996,2020-11-29 00:42:19,true,IO-Platform Services,2 - Medium,3 - Low,4 - Low,Monitoring Event,Closed,2020-12-01T15:14:09
19997,2020-11-30 11:07:07,true,Compugen-DSSNAT,3 - Low,3 - Low,4 - Low,Networks,Closed,2020-11-30T14:36:07
19998,2020-11-30 08:09:19,true,IO-Platform Services,3 - Low,1 - High,3 - Moderate,Monitoring Event,Closed,2021-01-07T08:58:59


In [None]:
# 4️⃣ Data Preprocessing
# Convert timestamps to datetime
df["opened_at"] = pd.to_datetime(df["opened_at"])
df["resolved_at"] = pd.to_datetime(df["resolved_at"])

In [None]:
# Calculate ticket age (in hours)
df["ticket_age"] = (df["resolved_at"] - df["opened_at"]).dt.total_seconds() / 3600  # Convert to hours


In [None]:
# Drop null SLA values
df = df.dropna(subset=["made_sla"])

In [None]:
# Convert SLA result to binary (1 = Breached, 0 = Met SLA)
df["sla_breach"] = df["made_sla"].apply(lambda x: 0 if x == "true" else 1)
df = df.drop(columns=["made_sla"])

In [None]:
# Fill missing values in categorical columns with "Unknown"
for col in ["priority", "impact", "urgency", "category", "assignment_group", "incident_state"]:
    df[col] = df[col].fillna("Unknown")

In [None]:
# Fill missing values in numerical column (ticket_age) with median
df["ticket_age"] = df["ticket_age"].fillna(df["ticket_age"].median())

In [None]:
# df.loc[df["sla_breach"] ==1]

In [None]:
# Encode categorical variables
label_encoders = {}
for col in ["priority", "impact", "urgency", "category", "assignment_group", "incident_state"]:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col].astype(str))
    label_encoders[col] = le

In [None]:
# Standardize numerical features
scaler = StandardScaler()
df["ticket_age"] = scaler.fit_transform(df[["ticket_age"]])

In [None]:
df

Unnamed: 0,opened_at,assignment_group,urgency,impact,priority,category,incident_state,resolved_at,ticket_age,sla_breach
0,2017-03-03 13:28:47,64,6,6,3,9,2,NaT,0.110366,0
1,2017-02-24 13:11:09,45,7,7,4,9,2,NaT,0.110366,0
2,2017-03-02 10:18:15,73,7,7,4,9,2,NaT,0.110366,0
3,2017-03-20 13:13:36,57,6,6,3,9,2,NaT,0.110366,0
4,2017-02-28 11:55:23,45,6,6,3,9,2,NaT,0.110366,0
...,...,...,...,...,...,...,...,...,...,...
19995,2020-11-29 08:28:32,71,5,5,4,1,1,2020-12-02 09:16:09,0.112885,0
19996,2020-11-29 00:42:19,64,4,5,4,7,1,2020-12-01 15:14:09,0.112520,0
19997,2020-11-30 11:07:07,39,5,5,4,8,1,2020-11-30 14:36:07,0.110417,0
19998,2020-11-30 08:09:19,64,5,2,3,7,1,2021-01-07 08:58:59,0.142798,0


In [None]:
# Define features and target variable
X = df[["priority", "impact", "urgency", "category", "assignment_group", "incident_state", "ticket_age"]]
y = df["sla_breach"]

In [None]:
# 5️⃣ Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
# 6️⃣ Train Logistic Regression Model
model = LogisticRegression()
model.fit(X_train, y_train)

In [None]:
# 7️⃣ Evaluate Model
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.9995
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00      3998
           1       0.00      0.00      0.00         2

    accuracy                           1.00      4000
   macro avg       0.50      0.50      0.50      4000
weighted avg       1.00      1.00      1.00      4000



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
# 8️⃣ SLA Breach Prediction for a New Ticket
def predict_sla_breach(priority, impact, urgency, category, assignment_group, incident_state, ticket_age):
    """Predicts SLA breach for a given ticket."""
    input_data = pd.DataFrame([[priority, impact, urgency, category, assignment_group, incident_state, ticket_age]],
                              columns=["priority", "impact", "urgency", "category", "assignment_group", "incident_state", "ticket_age"])
    
    # Handle unseen categorical values by replacing with "Unknown"
    for col in ["priority", "impact", "urgency", "category", "assignment_group", "incident_state"]:
        if priority not in label_encoders[col].classes_:
            input_data[col] = "Unknown"
    
    # Encode categorical values
    for col in ["priority", "impact", "urgency", "category", "assignment_group", "incident_state"]:
        input_data[col] = label_encoders[col].transform([input_data[col][0]])

    # Scale ticket_age
    input_data["ticket_age"] = scaler.transform(input_data[["ticket_age"]])

    # Predict
    prediction = model.predict(input_data)[0]
    return "SLA Breach Expected" if prediction == 1 else "No SLA Breach"

In [None]:
# Example Prediction
print(predict_sla_breach("3 - Moderate", "3 - Low", "1 - High", "Monitoring Event", "IO-Network Data Support", "Closed", 10))



ValueError: y contains previously unseen labels: 'Unknown'