In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.multioutput import MultiOutputClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import LabelEncoder

In [4]:
df=pd.read_csv('synthetic_dataset_V2_10000.csv')

In [6]:
df.head()

Unnamed: 0,Ticket ID,Customer Query,Summary,Assigned Department,Action Type,Priority,Sentiment,Timestamp,Resolution_Steps,Complexity_Score,Resolution_Time_Actual
0,T10001,My internet has been down for over 5 hours. Th...,My internet has been down for over 5 hours. Th...,Technical Support,Follow-Up,Medium,Negative,14-01-2024 02:55,Ran remote diagnostic on connection status. Fo...,5.62,81
1,T10002,"I'm interested in your Pro services, can you s...","I'm interested in your Pro services, can you s...",General Queries,Resolve at L1,Low,Positive,23-05-2024 04:59,Provided link to pricing page. Confirmed servi...,4.12,25
2,T10003,I demand to know why my promotional discount w...,I demand to know why my promotional discount w...,Billing,Resolve at L1,Low,Negative,30-06-2024 10:44,Updated credit card details on file. Confirmed...,7.0,27
3,T10004,"I'm interested in your Pro services, can you s...","I'm interested in your Pro services, can you s...",General Queries,Resolve at L1,Low,Positive,05-04-2024 03:25,Answered question about operating hours. Close...,4.12,21
4,T10005,What are your general customer support operati...,What are your general customer support operati...,General Queries,Resolve at L1,Medium,Neutral,06-01-2024 22:47,Answered question about operating hours. Close...,4.12,27


In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 11 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Ticket ID               10000 non-null  object 
 1   Customer Query          10000 non-null  object 
 2   Summary                 10000 non-null  object 
 3   Assigned Department     10000 non-null  object 
 4   Action Type             10000 non-null  object 
 5   Priority                10000 non-null  object 
 6   Sentiment               10000 non-null  object 
 7   Timestamp               10000 non-null  object 
 8   Resolution_Steps        10000 non-null  object 
 9   Complexity_Score        10000 non-null  float64
 10  Resolution_Time_Actual  10000 non-null  int64  
dtypes: float64(1), int64(1), object(9)
memory usage: 859.5+ KB


In [8]:
df.shape


(10000, 11)

In [9]:
df.isnull().sum()

Ticket ID                 0
Customer Query            0
Summary                   0
Assigned Department       0
Action Type               0
Priority                  0
Sentiment                 0
Timestamp                 0
Resolution_Steps          0
Complexity_Score          0
Resolution_Time_Actual    0
dtype: int64

In [10]:
df.duplicated().sum()

np.int64(0)

In [11]:
df['Customer Query'] = df['Customer Query'].fillna('')

In [12]:
TARGET_COLS = ['Assigned Department', 'Priority', 'Sentiment', 'Action Type']
label_encoders = {}

for col in TARGET_COLS:
    le = LabelEncoder()
    df[col + '_Encoded'] = le.fit_transform(df[col])
    label_encoders[col] = le

In [13]:
X = df['Customer Query']
Y1 = df[['Assigned Department_Encoded', 'Priority_Encoded', 'Sentiment_Encoded']]

In [None]:
X_train, X_test, Y1_train, Y1_test = train_test_split(X, Y1, test_size=0.2, random_state=42)

model_1_pipeline = Pipeline([
    ('tfidf', TfidfVectorizer(stop_words='english', max_features=5000)),
    ('clf', MultiOutputClassifier(LogisticRegression(solver='liblinear', random_state=42)))
])

model_1_pipeline.fit(X_train, Y1_train)
#  model performance using model_1_pipeline.score(X_test, Y1_test)

In [15]:
Y2 = df['Action Type_Encoded']

In [16]:
X_train_act, X_test_act, Y2_train, Y2_test = train_test_split(X, Y2, test_size=0.2, random_state=42)

model_2_pipeline = Pipeline([
    ('tfidf', TfidfVectorizer(stop_words='english', max_features=5000)),
    ('clf', LogisticRegression(solver='liblinear', random_state=42))
])

model_2_pipeline.fit(X_train_act, Y2_train)

In [17]:
SLA_POLICY = {
    'High': "CRITICAL - SLA Breach Risk",
    'Medium': "Standard SLA Target",
    'Low': "Relaxed SLA Target"
}

In [None]:
da

In [18]:
def intelligent_router(query):
    # 1. Triage (Agent 1)
    pred_Y1 = model_1_pipeline.predict([query])[0]

    # Inverse transform the numeric output back to human-readable labels
    dept = label_encoders['Assigned Department'].inverse_transform([pred_Y1[0]])[0]
    priority = label_encoders['Priority'].inverse_transform([pred_Y1[1]])[0]
    sentiment = label_encoders['Sentiment'].inverse_transform([pred_Y1[2]])[0]

    # 2. Action Prediction (Agent 2)
    pred_Y2 = model_2_pipeline.predict([query])[0]
    action = label_encoders['Action Type'].inverse_transform([pred_Y2])[0]

    # 3. SLA Check (Agent 4 Logic)
    sla_status = SLA_POLICY.get(priority, "Undefined")

    # --- Basic Override Logic ---
    if priority == 'High' and action != 'Escalate':
        action_final = 'Escalate (SLA OVERRIDE)'
        sla_status = "URGENT OVERRIDE: High Priority Ticket Forced Escalation."
    else:
        action_final = action

    return {
        'Query': query,
        'Department': dept,
        'Priority': priority,
        'Sentiment': sentiment,
        'Action_Type_Final': action_final,
        'SLA_Status': sla_status
    }

In [19]:
NEW_TICKETS = [
    "My entire server is down, this is a CRITICAL emergency!",
    "I'd like to update my address and change my basic plan to premium."
]

for ticket in NEW_TICKETS:
    result = intelligent_router(ticket)
    print("\n")
    print(f" Query: {result['Query']}")
    print(f"Triage Tags (Agent 1):")
    print(f"Department: {result['Department']}")
    print(f"Priority: {result['Priority']}")
    print(f"Sentiment: {result['Sentiment']}")
    print("\n Final Routing (Agent 2 & 4):")
    print(f"Predicted Action: {result['Action_Type_Final']}")
    print(f"SLA Conclusion: {result['SLA_Status']}")



 Query: My entire server is down, this is a CRITICAL emergency!
Triage Tags (Agent 1):
Department: Technical Support
Priority: Medium
Sentiment: Negative

 Final Routing (Agent 2 & 4):
Predicted Action: Follow-Up
SLA Conclusion: Standard SLA Target


 Query: I'd like to update my address and change my basic plan to premium.
Triage Tags (Agent 1):
Department: Account Management
Priority: Low
Sentiment: Neutral

 Final Routing (Agent 2 & 4):
Predicted Action: Resolve at L1
SLA Conclusion: Relaxed SLA Target


In [11]:
# --- Agent 3: Resolution Recommendation (Similarity-Based) ---
from sklearn.neighbors import NearestNeighbors

# For this agent, we will model Resolution_Steps based on the Customer Query
# We use the entire dataset for the knowledge base
X_rec = df['Customer Query']
Y_rec = df['Resolution_Steps']

# 1. Vectorize the entire knowledge base (KB)
tfidf_rec = TfidfVectorizer(stop_words='english', max_features=5000)
X_rec_vectorized = tfidf_rec.fit_transform(X_rec)

# 2. Train a Nearest Neighbors model on the vectorized KB
# 'metric=cosine' makes it a Content-Based Recommender using cosine similarity
nn_model = NearestNeighbors(n_neighbors=5, metric='cosine', algorithm='brute')
nn_model.fit(X_rec_vectorized)

def resolution_recommender(query, n=3):
    """Predicts the top N most similar resolution steps."""
    query_vectorized = tfidf_rec.transform([query])
    # Find the N nearest neighbors (indices)
    # print(query_vectorized.shape())
    distances, indices = nn_model.kneighbors(query_vectorized, n_neighbors=n)

    recommendations = []
    for i, index in enumerate(indices[0]):
        recommendations.append({
            'Similarity_Score': 1 - distances[0][i], # 1 - distance = similarity
            'Ticket_ID': df.iloc[index]['Ticket ID'],
            'Recommended_Steps': df.iloc[index]['Resolution_Steps']
        })
    return recommendations



In [None]:
resolution_recommender("I need to reset my password immediately and I can't find the link.")

[{'Similarity_Score': np.float64(0.4023264638420738),
  'Ticket_ID': 'T12996',
  'Recommended_Steps': 'Checked plan details and billing cycle. Explained fee structure. No further action required.'},
 {'Similarity_Score': np.float64(0.4023264638420738),
  'Ticket_ID': 'T19804',
  'Recommended_Steps': 'Awaiting confirmation of address change from validation team. Contact customer for documentation.'},
 {'Similarity_Score': np.float64(0.4023264638420738),
  'Ticket_ID': 'T12980',
  'Recommended_Steps': 'Verified payment history; confirmed single charge was correct. Sent customer the payment confirmation receipt.'}]

: 

In [None]:
# --- Agent 4: Resolution Time Estimation (Regression) ---
from sklearn.linear_model import LinearRegression
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler

# Define features for the regression model
REGRESSION_FEATURES = ['Complexity_Score', 'Priority_Encoded', 'Assigned Department_Encoded', 'Sentiment_Encoded']
TARGET_REG = 'Resolution_Time_Actual'

# Prepare the data
X_reg = df[REGRESSION_FEATURES]
Y_reg = df[TARGET_REG]

# Define preprocessor: Scale numerical
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), ['Complexity_Score']),
        ('cat', 'passthrough', ['Priority_Encoded', 'Assigned Department_Encoded', 'Sentiment_Encoded'])
    ],
    # remainder='passthrough'
)

# Build and train the regression pipeline
model_3_pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', LinearRegression()) # Simple Linear Regression for demonstration
])

model_3_pipeline.fit(X_reg, Y_reg)

def time_estimator(dept_enc, priority_enc, sentiment_enc, complexity):
    """Predicts Resolution Time in minutes."""
    input_data = pd.DataFrame([[complexity, priority_enc, dept_enc, sentiment_enc]],
                              columns=['Complexity_Score', 'Priority_Encoded', 'Assigned Department_Encoded', 'Sentiment_Encoded'])
    
    # Predict and ensure time is positive
    predicted_time = model_3_pipeline.predict(input_data)[0]
    return max(0, predicted_time)

In [22]:
# --- Final Multi-Agent Orchestration ---

def intelligent_router_multi_agent(query, complexity_score=5.0): # Default complexity for new query
    # 1. Triage (Agent 1)
    pred_Y1 = model_1_pipeline.predict([query])[0]
    dept_enc, priority_enc, sentiment_enc = pred_Y1[0], pred_Y1[1], pred_Y1[2]

    dept = label_encoders['Assigned Department'].inverse_transform([dept_enc])[0]
    priority = label_encoders['Priority'].inverse_transform([priority_enc])[0]
    sentiment = label_encoders['Sentiment'].inverse_transform([sentiment_enc])[0]

    # 2. Action Prediction (Agent 2)
    pred_Y2 = model_2_pipeline.predict([query])[0]
    action = label_encoders['Action Type'].inverse_transform([pred_Y2])[0]

    # 3. Resolution Recommendation (Agent 3)
    recommendations = resolution_recommender(query, n=1) # Get top 1 recommendation
    
    # 4. Resolution Time Estimation (Agent 4)
    # The complexity score is estimated/inputted manually here; in a real system, another agent would predict this.
    est_time = time_estimator(dept_enc, priority_enc, sentiment_enc, complexity_score)
    
    # 5. SLA Override Logic (Agent 4)
    sla_status = SLA_POLICY.get(priority, "Undefined")
    action_final = action
    
    if priority == 'High' and action != 'Escalate':
        action_final = 'Escalate (SLA OVERRIDE)'
        sla_status = "URGENT OVERRIDE: High Priority Ticket Forced Escalation."
    
    return {
        'Query': query,
        'Department': dept,
        'Priority': priority,
        'Sentiment': sentiment,
        'Predicted_Action': action_final,
        'Predicted_Resolution_Time_Minutes': f"{est_time:.2f}",
        'SLA_Status': sla_status,
        'Recommended_Resolution': recommendations[0]['Recommended_Steps'] if recommendations else "No historical match found."
    }



In [25]:
# --- Example of running the new multi-agent system ---

NEW_QUERY = "My internet has been down for over 5 hours. This is unacceptable service, especially since I work from home"
result = intelligent_router_multi_agent(NEW_QUERY, complexity_score=4.5)

print(f"\n Query: {result['Query']}")
print("----------------------------------------------------------------")
print(f" Department & Priority (Agent 1): {result['Department']} / {result['Priority']}")
print(f" Predicted Action (Agent 2): {result['Predicted_Action']}")
print(f" Estimated Time (Agent 4): {result['Predicted_Resolution_Time_Minutes']} minutes")
print(f" SLA Status: {result['SLA_Status']}")
print(f"\n Recommended Steps (Agent 3):\n{result['Recommended_Resolution']}")


 Query: My internet has been down for over 5 hours. This is unacceptable service, especially since I work from home
----------------------------------------------------------------
 Department & Priority (Agent 1): Technical Support / High
 Predicted Action (Agent 2): Escalate (SLA OVERRIDE)
 Estimated Time (Agent 4): 73.49 minutes
 SLA Status: URGENT OVERRIDE: High Priority Ticket Forced Escalation.

 Recommended Steps (Agent 3):
Identified potential bug with latest patch. Logged issue in Jira and provided workaround.
