<a href="https://colab.research.google.com/github/rohanb1985/failover_decision/blob/main/failover_decision_try_02.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
pip install pandas numpy scikit-learn



In [2]:
import logging
import numpy as np
import pandas as pd
import os
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LogisticRegression, Ridge
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

## Configurations

### Logging configuration

In [3]:
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
logger = logging.getLogger("poc_phase1")

### Config: actions & tiers

In [4]:
ACTIONS = [
    "failover_to_secondary",
    "stay_in_primary_with_resilience",
    "queue_in_primary",
    "route_to_chatbot_or_self_service"
]

TIER_CONFIG = {
    "Bronze":   {"tier_weight": 1.0, "gamma": 0.4, "delta": 0.3, "zeta": 0.3},
    "Silver":   {"tier_weight": 1.2, "gamma": 0.6, "delta": 0.5, "zeta": 0.5},
    "Gold":     {"tier_weight": 1.5, "gamma": 0.8, "delta": 0.7, "zeta": 0.6},
    "Platinum": {"tier_weight": 2.0, "gamma": 1.0, "delta": 0.9, "zeta": 0.8},
}

INTENT_LABELS = ["payment_issue","cancellation","technical","billing","general_query"]
INTENT_WEIGHT = {"payment_issue":1.5,"cancellation":1.8,"technical":1.2,"billing":1.0,"general_query":0.6}

### Business value alphas

In [5]:
ALPHAS = {
    "alpha_order_value": 0.6,
    "alpha_clv": 0.2,
    "alpha_tier": 50.0,
    "alpha_urgency": 200.0,
    "alpha_sentiment": 100.0,
    "alpha_intent": 300.0
}

### Cost betas

In [6]:
BETAS = {"beta_handling_cost": 1.0, "beta_latency": 0.05, "beta_agent_avail": 100.0, "beta_queue": 0.5}

### Action dependent cost and latency overheads

In [7]:
ACTION_BASE_COST = {
    "failover_to_secondary": 50.0,
    "stay_in_primary_with_resilience": 20.0,
    "queue_in_primary": 5.0,
    "route_to_chatbot_or_self_service": 2.0
}
ACTION_LATENCY_OVERHEAD = {
    "failover_to_secondary": 150.0,
    "stay_in_primary_with_resilience": 50.0,
    "queue_in_primary": 200.0,
    "route_to_chatbot_or_self_service": 80.0
}
ACTION_AGENT_AVAIL_SHIFT = {
    "failover_to_secondary": -0.1,
    "stay_in_primary_with_resilience": 0.0,
    "queue_in_primary": 0.0,
    "route_to_chatbot_or_self_service": +0.2
}

### Escalation model weights

In [8]:
ESC_W = {"w_sentiment": 2.0, "w_urgency": 2.5, "w_intent": 1.5, "w_agent_exp": 1.0}

## Read input CSV

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [71]:
def load_csv_from_drive(file_path):
  """Loads a CSV file from Google Drive into a pandas DataFrame.

  Args:
    file_path: The path to the CSV file in Google Drive (e.g., 'My Drive/data/my_data.csv').

  Returns:
    A pandas DataFrame containing the data from the CSV file.
  """
  try:
    df = pd.read_csv(f'/content/drive/{file_path}', dtype=str).fillna("")  # load as string first
    print(f"Successfully loaded CSV from {file_path}")
    return df
  except FileNotFoundError:
    print(f"Error: File not found at {file_path}. Please check the path.")
    return None
  except Exception as e:
    print(f"Error: An error occurred while loading the CSV: {e}")
    return None

In [72]:
INPUT_CSV = "MyDrive/sample_interactions.csv"
df = load_csv_from_drive(INPUT_CSV)

Successfully loaded CSV from MyDrive/sample_interactions.csv


In [19]:
df.head(5)

Unnamed: 0,interaction_id,tenant,tenant_tier,channel,raw_text,order_value,clv,sla_time_left,base_queue_wait,region_health_primary,region_health_secondary,agent_experience
0,I_01,AcmeTel,Gold,voice,My payment failed twice and I need this order ...,2500.0,15000.0,120,30,0.45,0.88,0.6
1,I_02,QuickShop,Silver,chat,I want to change my delivery address for order...,150.0,1200.0,600,40,0.95,0.92,0.7
2,I_03,LearnX,Bronze,chat,How do I reset my password? I can't login to m...,,200.0,1800,15,0.98,0.95,0.8
3,I_04,AcmeTel,Gold,voice,My subscription was renewed but I didn't recei...,,,300,60,0.7,0.85,0.5
4,I_05,HealthNow,Platinum,voice,The system shows a billing error for claim 345...,5000.0,78000.0,90,10,0.65,0.8,0.9


In [73]:
# Convert numeric columns (some may be empty)
numeric_cols = ["order_value","clv","sla_time_left","base_queue_wait","region_health_primary","region_health_secondary","agent_experience"]
for c in numeric_cols:
    df[c] = pd.to_numeric(df[c], errors="coerce").fillna(0.0)

print(f"Input CSV loaded: {len(df)} rows")

Input CSV loaded: 30 rows


## Apply pre-trained ML models for NLP feature extraction

In [23]:
from transformers import pipeline
import torch

In [24]:
print("Loading pre-trained Transformer models for intent, sentiment, and urgency...")

Loading pre-trained Transformer models for intent, sentiment, and urgency...


In [25]:
# Sentiment analysis model (returns 'label' and 'score')
# We'll map HuggingFace output: label ∈ {POSITIVE, NEGATIVE, NEUTRAL}, score ∈ [0,1]
sentiment_pipe = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment-latest")

Error while fetching `HF_TOKEN` secret value from your vault: 'Requesting secret HF_TOKEN timed out. Secrets can only be fetched when running from the Colab UI.'.
You are not authenticated with the Hugging Face Hub in this notebook.
If the error persists, please let us know by opening an issue on GitHub (https://github.com/huggingface/huggingface_hub/issues/new).


config.json:   0%|          | 0.00/929 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/501M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/501M [00:00<?, ?B/s]

Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

Device set to use cpu


In [26]:
# Intent detection model — since no universal open intent model exists, we'll
#    use a zero-shot classification model and specify our known intent labels.
intent_labels = ["payment_issue", "cancellation", "technical", "billing", "general_query"]
intent_pipe = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

Device set to use cpu


In [27]:
# Urgency detection — use a text-classification model fine-tuned on urgency detection,
#    or fallback to a zero-shot classification model with labels ["low", "medium", "high"].
urg_labels = ["low", "medium", "high"]
urg_pipe = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

Device set to use cpu


In [28]:
print("Pre-trained NLP pipelines loaded successfully.")

Pre-trained NLP pipelines loaded successfully.


## Inference and feature extraction

In [29]:
print("Deriving intent_type, sentiment_score, and urgency_score from raw_text...")

Deriving intent_type, sentiment_score, and urgency_score from raw_text...


### Derive sentiment

In [30]:
def get_sentiment_features(text):
    """Use pre-trained sentiment model to return sentiment label and numeric score."""
    try:
        res = sentiment_pipe(text, truncation=True)[0]
        lbl = res["label"].lower()
        scr = res["score"]
    except Exception as e:
        lbl, scr = "neutral", 0.0
    # Convert label and score into a signed sentiment score in [-1, 1]
    if "neg" in lbl:
        return lbl, -scr
    elif "pos" in lbl:
        return lbl, scr
    else:
        return lbl, 0.0

### Derive Intent

In [31]:
def get_intent_features(text):
    """Use zero-shot classification for intent detection."""
    try:
        res = intent_pipe(text, candidate_labels=intent_labels)
        best_idx = int(torch.tensor(res["scores"]).argmax())
        lbl = res["labels"][best_idx]
        scr = float(res["scores"][best_idx])
    except Exception as e:
        lbl, scr = "general_query", 0.0
    return lbl, scr

### Derive Urgency

In [32]:
def get_urgency_features(text):
    """Use zero-shot classification to derive urgency label and scaled numeric score."""
    try:
        res = urg_pipe(text, candidate_labels=urg_labels)
        best_idx = int(torch.tensor(res["scores"]).argmax())
        lbl = res["labels"][best_idx]
        scr = float(res["scores"][best_idx])
    except Exception as e:
        lbl, scr = "medium", 0.5
    # Map to [0,1] for internal model
    scale = {"low": 0.2, "medium": 0.5, "high": 0.9}
    return lbl, scale.get(lbl, 0.5)

### Apply models to dataset

In [74]:
# Apply models to dataset
intents, intent_scores, sents, sent_scores, urg_labels_out, urg_scores = [], [], [], [], [], []
for t in df["raw_text"]:
    i_lbl, i_scr = get_intent_features(t)
    s_lbl, s_scr = get_sentiment_features(t)
    u_lbl, u_scr = get_urgency_features(t)
    intents.append(i_lbl)
    intent_scores.append(i_scr)
    sents.append(s_lbl)
    sent_scores.append(s_scr)
    urg_labels_out.append(u_lbl)
    urg_scores.append(u_scr)

df["intent_type"] = intents
df["intent_conf"] = intent_scores
df["sentiment_label"] = sents
df["sentiment_score"] = sent_scores
df["urgency_label"] = urg_labels_out
df["urgency_score"] = urg_scores

In [75]:
print("NLP features derived using pre-trained models. Sample:")

NLP features derived using pre-trained models. Sample:


In [76]:
df[["interaction_id","raw_text","intent_type","intent_conf","sentiment_label","sentiment_score","urgency_label","urgency_score"]].head(6)

Unnamed: 0,interaction_id,raw_text,intent_type,intent_conf,sentiment_label,sentiment_score,urgency_label,urgency_score
0,I_01,My payment failed twice and I need this order ...,cancellation,0.696861,negative,-0.899431,high,0.9
1,I_02,I want to change my delivery address for order...,payment_issue,0.34609,neutral,0.0,medium,0.5
2,I_03,How do I reset my password? I can't login to m...,technical,0.890899,neutral,0.0,high,0.9
3,I_04,My subscription was renewed but I didn't recei...,billing,0.5883,neutral,0.0,medium,0.5
4,I_05,The system shows a billing error for claim 345...,billing,0.7744,negative,-0.798688,high,0.9
5,I_06,Is there a discount code I can use? Also what ...,payment_issue,0.425195,neutral,0.0,high,0.9


## Feature group calculators

### Calculate Intent weight

In [37]:
def calc_intent_weight(intent):
    return INTENT_WEIGHT.get(intent, 0.5)

### Calculate business value

    Calculate the expected business value for a given interaction and action.

    This function computes the total business value by aggregating multiple weighted factors
    that represent the strategic importance and revenue potential of an interaction. The
    calculation considers customer tier, order value, lifetime value, urgency, sentiment,
    and intent type, then applies action-specific penalties.

    The business value formula is:
        E[V] = alpha_order_value * order_value
             + alpha_clv * customer_lifetime_value
             + alpha_tier * tier_weight
             + alpha_urgency * urgency_score
             + alpha_sentiment * sentiment_score
             + alpha_intent * intent_weight
             - action_specific_penalty

    Parameters
    ----------
    row : dict or pandas.Series
        Interaction data containing:
        - tenant_tier: Customer tier level (Bronze, Silver, Gold, Platinum)
        - order_value: Current order/transaction value in currency units
        - clv: Customer lifetime value
        - urgency_score: Urgency score (0.0-1.0) derived from NLP model
        - sentiment_score: Sentiment score (-1.0 to 1.0) derived from NLP model
        - intent_type: Customer intent category (e.g., payment_issue, cancellation)
    action : str
        The action being evaluated, must be one of:
        - "failover_to_secondary"
        - "stay_in_primary_with_resilience"
        - "queue_in_primary"
        - "route_to_chatbot_or_self_service"

    Returns
    -------
    float
        The expected business value for this interaction-action pair.
        Higher values indicate greater strategic importance and revenue potential.

    Notes
    -----
    - If order_value is missing or zero, it's estimated as 10% of CLV
    - Action penalties reflect reduced value delivery for certain routing choices:
      * failover_to_secondary: -50 (minor service degradation)
      * stay_in_primary_with_resilience: 0 (no penalty)
      * queue_in_primary: -20 (delay impact)
      * route_to_chatbot_or_self_service: -200 (limited personalization)
    - Intent weights are applied via calc_intent_weight() to reflect issue complexity
    - Tier weights amplify value for premium customers (Bronze: 1.0, Platinum: 2.0)

In [60]:
def calculate_business_value(row, action):
    """
    E[V_i(a)] = alpha1*order_value + alpha2*clv + alpha3*tier_weight
                 + alpha4*urgency + alpha5*sentiment + alpha6*intent_weight
    Note: order_value may be NaN — use proxies
    """
    al = ALPHAS
    tier_w = TIER_CONFIG[row["tenant_tier"]]["tier_weight"]
    order_v = row["order_value"] if not np.isnan(row["order_value"]) else 0.0
    # proxy: if order missing, use 0.1*clv as instant value proxy
    if np.isnan(row["order_value"]):
        order_v = 0.1 * row["clv"]

    intent_w = calc_intent_weight(row["intent_type"])
    val = (al["alpha_order_value"] * order_v +
           al["alpha_clv"] * row["clv"] +
           al["alpha_tier"] * tier_w +
           al["alpha_urgency"] * row["urgency_score"] +
           al["alpha_sentiment"] * row["sentiment_score"] +
           al["alpha_intent"] * intent_w)
    # Optionally small action modifier (e.g., some actions slightly reduce realized value)
    action_value_penalty = {"failover_to_secondary": -50, "stay_in_primary_with_resilience": 0,
                            "queue_in_primary": -20, "route_to_chatbot_or_self_service": -200}
    val += action_value_penalty.get(action, 0)
    return float(val)

### Calculate Handling cost

    Calculate the expected handling cost for a given interaction and action.

    This function computes the total cost of handling an interaction based on multiple factors:
    - Base cost associated with the action
    - Queue wait time estimates
    - Latency overhead
    - Agent availability
    - Regional health conditions
    - Intent complexity

    The cost formula includes:
        E[C] = base_cost
             + beta_handling_cost * (handling_time_min * rate_per_min)
             + beta_latency * (latency_ms / 100.0)
             + beta_agent_avail * (1.0 - availability)
             + beta_queue * (queue_wait / 60.0)
        
        Final cost is adjusted by regional health factor.

    Parameters
    ----------
    row : dict or pandas.Series
        Interaction data containing:
        - base_queue_wait: Base queue wait time in seconds
        - region_health_primary: Health score of primary region (0.0-1.0)
        - region_health_secondary: Health score of secondary region (0.0-1.0)
        - intent_type: Type of customer intent (affects handling complexity)
    action : str
        The action being evaluated

    Returns
    -------
    float
        The expected handling cost for this interaction-action pair.
        Always returns a non-negative value.

    Notes
    -----
    - Different actions have different queue multipliers, latency overheads, and agent availability shifts
    - Handling time is influenced by intent complexity (weighted by calc_intent_weight)
    - Regional health degradation increases costs through the region_health_factor
    - Cost rates vary by action: chatbot/self-service (2/min), queue (5/min), others (10/min)

In [62]:
def calculate_handling_cost(row, action):
    """
    Simple cost model:
    expected_handling_cost = ACTION_BASE_COST[action] + (some function of clv/agent_availability/latency)
    E[C] = beta_handling_cost * base_cost + beta_latency * latency_ms + beta_agent_avail*(1-availability)
    We'll derive action-specific latency and availability from base_queue and region health.
    """
    base_cost = ACTION_BASE_COST[action]
    # latency: base queue wait (s) -> convert to ms roughly, plus action overhead
    # but queue_wait depends on whether action routes to primary or secondary or chatbot
    if action == "failover_to_secondary":
        queue_wait = row["base_queue_wait"] * 0.5  # assume failover has shorter queue in our sim
        latency_ms = queue_wait * 1000/2 + ACTION_LATENCY_OVERHEAD[action]  # scaled
        avail = max(0.05, 0.5 + ACTION_AGENT_AVAIL_SHIFT[action])  # synthetic
        # but account for region health effect
        avail *= row["region_health_secondary"]
    elif action == "stay_in_primary_with_resilience":
        queue_wait = row["base_queue_wait"]
        latency_ms = queue_wait * 1000/1 + ACTION_LATENCY_OVERHEAD[action]
        avail = max(0.05, 0.6 + ACTION_AGENT_AVAIL_SHIFT[action]) * row["region_health_primary"]
    elif action == "queue_in_primary":
        queue_wait = min(1800.0, row["base_queue_wait"] * 2)  # queuing longer
        latency_ms = queue_wait * 1000 + ACTION_LATENCY_OVERHEAD[action]
        avail = max(0.01, 0.5 + ACTION_AGENT_AVAIL_SHIFT[action]) * row["region_health_primary"]
    else:  # chatbot
        queue_wait = max(0.0, row["base_queue_wait"] * 0.2)
        latency_ms = queue_wait * 1000/4 + ACTION_LATENCY_OVERHEAD[action]
        avail = min(1.0, 0.9 + ACTION_AGENT_AVAIL_SHIFT[action])  # mostly available
        # chatbot less affected by region health (assume distributed)
        avail *= (0.9 + 0.1 * row["region_health_primary"])

    # Estimate handling_time minutes from queue_wait (s) and intent complexity
    handling_time_min = (queue_wait / 60.0) * (1.0 + (calc_intent_weight(row["intent_type"]) - 1.0) * 0.3)
    # cost per minute proxy (higher for backups due to infra)
    rate_per_min = 2.0 if action == "route_to_chatbot_or_self_service" else (5.0 if action == "queue_in_primary" else 10.0)
    # add region health penalty into cost
    region_health_factor = (1.0 + (1.0 - (row["region_health_secondary"] if action == "failover_to_secondary" else row["region_health_primary"])) * 0.5)
    expected_handling_cost = base_cost + BETAS["beta_handling_cost"] * (handling_time_min * rate_per_min) \
                             + BETAS["beta_latency"] * (latency_ms / 100.0) \
                             + BETAS["beta_agent_avail"] * (1.0 - avail) \
                             + BETAS["beta_queue"] * (queue_wait / 60.0)
    # scale by region health factor
    expected_handling_cost *= region_health_factor
    return float(max(0.0, expected_handling_cost))

### Calculate P_SLA

    Calculate the probability of SLA (Service Level Agreement) violation for a given interaction and action.

    This function estimates the likelihood that an interaction will violate its SLA based on
    the expected queue wait time versus the remaining SLA time. The probability increases
    as the ratio of estimated queue time to remaining SLA time grows, using a sigmoid
    function for smooth transition.

    The calculation involves:
    1. Estimating queue wait time based on action and base queue conditions
    2. Computing the ratio of estimated queue time to remaining SLA time
    3. Applying a sigmoid transformation to convert the ratio to a probability

    The sigmoid formula used is:
        P(SLA_violation) = 1 / (1 + exp(-3.0 * (ratio - 0.8)))

    Parameters
    ----------
    row : dict or pandas.Series
        Interaction data containing:
        - base_queue_wait: Base queue wait time in seconds
        - sla_time_left: Remaining time before SLA breach in seconds
    action : str
        The action being evaluated.

    Returns
    -------
    float
        Probability of SLA violation, bounded between 0.0 and 1.0.
        - Values near 0.0 indicate low risk of SLA breach
        - Values near 1.0 indicate high risk of SLA breach
        - The inflection point occurs at ratio ≈ 0.8 (80% of SLA time consumed)

    Notes
    -----
    - Default base_queue_wait is 30.0 seconds if missing or NaN
    - Default sla_time_left is 60.0 seconds if missing or NaN
    - The sigmoid steepness parameter (3.0) controls how sharply probability increases
    - The threshold (0.8) means 50% probability when estimated queue is 80% of remaining SLA time

In [40]:
def calculate_p_sla(row, action):
    """
    Heuristic for SLA miss probability:
    - compute sla_ratio = estimated_queue_wait / sla_time_left
    - map to probability: logistic-ish or min(1, ratio)
    We'll also reduce P_SLA for failover actions (assume they improve SLA).
    """
    base_queue = row["base_queue_wait"]
    if action == "failover_to_secondary":
        est_queue = base_queue * 0.5
    elif action == "queue_in_primary":
        est_queue = base_queue * 2.0
    elif action == "route_to_chatbot_or_self_service":
        est_queue = base_queue * 0.2
    else:
        est_queue = base_queue

    sla_ratio = est_queue / max(1.0, row["sla_time_left"])
    # logistic transform for nicer probabilities:
    p = 1.0 / (1.0 + np.exp(-3.0 * (sla_ratio - 0.8)))  # shift so ratio ~0.8 -> 50%
    p = float(np.clip(p, 0.0, 1.0))
    return p

### Calculate P_escalation

    Calculate the probability of escalation for a given interaction and action.

    This function estimates the likelihood that an interaction will require escalation to
    higher-tier support based on multiple risk factors including negative sentiment, urgency,
    intent complexity, and agent experience. The probability is computed using a weighted
    logistic model that combines these factors with action-specific adjustments.

    The escalation risk formula is:
        risk_input = w_sentiment * (-sentiment_score)
                   + w_urgency * urgency_score
                   + w_intent * (intent_weight - 1.0)
                   + w_agent_exp * (1.0 - agent_experience)
                   + action_specific_adjustments
        
        P(escalation) = 1 / (1 + exp(-0.8 * (risk_input - 1.0)))

    Parameters
    ----------
    row : dict or pandas.Series
        Interaction data containing:
        - intent_type: Type of customer intent (affects complexity weight)
        - sentiment_score: Sentiment score (-1.0 to 1.0) where negative indicates frustration
        - urgency_score: Urgency score (0.0-1.0) indicating time sensitivity
        - agent_experience: Agent experience level (0.0-1.0) where 1.0 is highly experienced
    action : str
        The action being evaluated

    Returns
    -------
    float
        Probability of escalation, bounded between 0.0 and 1.0.
        - Values near 0.0 indicate low escalation risk
        - Values near 1.0 indicate high escalation risk
        - The inflection point occurs at risk_input ≈ 1.0 (50% probability)

    Notes
    -----
    - Negative sentiment increases risk (inverted in calculation)
    - Intent weights from calc_intent_weight() reflect issue complexity
    - Less experienced agents increase escalation probability
    - Self-service routing increases risk for complex intents (e.g., payment_issue, cancellation)
    - Queuing increases risk proportionally to urgency
    - Default agent_experience is 0.8 if missing or NaN
    - The sigmoid steepness (0.8) controls how sharply probability increases with risk

In [66]:
def calculate_p_escalation(row, action):
    """
    Heuristic logistic model for escalation probability:
      input = w1*(-sentiment) + w2*urgency + w3*intent_weight + w4*(1-agent_experience)
      p = sigmoid(input_scaled)
    Some actions (chatbot) may increase escalation probability for complex intents.
    """
    #intent_w = calc_intent_weight(row["intent"])
    intent_w = calc_intent_weight(row["intent_type"])
    inp = (ESC_W["w_sentiment"] * (-row["sentiment_score"]) +
           ESC_W["w_urgency"] * row["urgency_score"] +
           ESC_W["w_intent"] * (intent_w - 1.0) +
           ESC_W["w_agent_exp"] * (1.0 - row["agent_experience"]))
    # action modifier: chatbots have higher risk for complex intents; queue may increase risk slightly
    if action == "route_to_chatbot_or_self_service":
        inp += 0.5 * max(0.0, intent_w - 1.0)
    elif action == "queue_in_primary":
        inp += 0.3 * row["urgency_score"]
    # map to probability
    p = 1.0 / (1.0 + np.exp(-0.8 * (inp - 1.0)))  # shifted logistic
    p = float(np.clip(p, 0.0, 1.0))
    return p

### Calculate Health Penalty

    Calculate the health penalty score for a given interaction and action based on regional health.

    This function computes a penalty score that reflects the degradation or unavailability
    of infrastructure in the target region for the specified action. The penalty increases
    as regional health decreases, representing additional risk, latency, or service quality
    issues when routing to degraded regions.

    The health penalty is calculated as:
        penalty = 1.0 - regional_health_score
    
    where regional_health_score ranges from 0.0 (complete outage) to 1.0 (fully healthy).

    Parameters
    ----------
    row : dict or pandas.Series
        Interaction data containing:
        - region_health_primary: Health score of primary region (0.0-1.0)
        - region_health_secondary: Health score of secondary region (0.0-1.0)
    action : str
        The action being evaluated. Determines which region's health to consider:
        - "failover_to_secondary": Uses region_health_secondary
        - All other actions: Use region_health_primary

    Returns
    -------
    float
        Health penalty score bounded between 0.0 and 1.0.
        - 0.0 indicates perfect health (no penalty)
        - 1.0 indicates complete outage (maximum penalty)
        - Higher values indicate greater infrastructure risk

    Notes
    -----
    - Default health score is 1.0 (fully healthy) if the value is missing or NaN
    - This penalty is scaled by the tier-specific zeta coefficient in the final reward calculation
    - Failover actions are penalized based on secondary region health
    - Primary-based actions (stay, queue, chatbot) are penalized based on primary region health

In [42]:
def calculate_health_penalty(row, action):
    """
    Health score is action-dependent: target region health used.
    Health_i(a) in our formula is normalized 0..1 (higher = worse? earlier we used 1 healthy)
    NOTE: In earlier slides we used Health where 0 healthy -> 1 bad; here we will
    compute HealthPenalty such that larger value reduces reward (so use (1 - health) scale).
    We'll return Health_i(a) = (1 - region_health) scaled.
    """
    if action == "failover_to_secondary":
        health = row["region_health_secondary"]
    else:
        health = row["region_health_primary"]
    # convert health to penalty-scale: 0 (healthy) -> 0, 1 (down) -> 1
    score = float(np.clip(1.0 - health, 0.0, 1.0))
    return score

## Reward calculator

In [50]:
def compute_reward_for_row(row_action):
    """
    row_action: a Series-like dict that includes original row fields and 'action'
    Returns detailed components and total reward
    """
    tenant = row_action["tenant"]
    cfg = TIER_CONFIG.get(row_action["tenant_tier"], TIER_CONFIG["Bronze"])
    gamma = cfg["gamma"]
    delta = cfg["delta"]
    zeta = cfg["zeta"]
    tier_weight = cfg["tier_weight"]

    # Business value
    ev = calculate_business_value(row_action, row_action["action"])

    # Cost
    ec = calculate_handling_cost(row_action, row_action["action"])

    # SLA miss probability and penalty term
    p_sla = calculate_p_sla(row_action, row_action["action"])
    sla_penalty = gamma * p_sla * tier_weight * 1000.0  # multiply to convert to rupee-like penalty scale

    # Escalation probability and penalty
    p_esc = calculate_p_escalation(row_action, row_action["action"])
    esc_penalty = delta * p_esc * 1000.0  # scale

    # Health penalty
    health_score = calculate_health_penalty(row_action, row_action["action"])
    health_penalty = zeta * health_score * 1000.0  # scale

    # Total reward
    R = ev - ec - sla_penalty - esc_penalty - health_penalty

    return {
        "E_V": round(ev, 2),
        "E_C": round(ec, 2),
        "P_SLA": round(p_sla, 4),
        "SLA_penalty": round(sla_penalty, 2),
        "P_Esc": round(p_esc, 4),
        "Esc_penalty": round(esc_penalty, 2),
        "Health_score": round(health_score, 4),
        "Health_penalty": round(health_penalty, 2),
        "Reward": round(R, 2)
    }

## Main demo flow

In [48]:
df.head(3)

Unnamed: 0,interaction_id,tenant,tenant_tier,channel,raw_text,order_value,clv,sla_time_left,base_queue_wait,region_health_primary,region_health_secondary,agent_experience,intent_type,intent_conf,sentiment_label,sentiment_score,urgency_label,urgency_score
0,I_01,AcmeTel,Gold,voice,My payment failed twice and I need this order ...,2500.0,15000.0,120,30,0.45,0.88,0.6,cancellation,0.696861,negative,-0.899431,high,0.9
1,I_02,QuickShop,Silver,chat,I want to change my delivery address for order...,150.0,1200.0,600,40,0.95,0.92,0.7,payment_issue,0.34609,neutral,0.0,medium,0.5
2,I_03,LearnX,Bronze,chat,How do I reset my password? I can't login to m...,,200.0,1800,15,0.98,0.95,0.8,technical,0.890899,neutral,0.0,high,0.9


In [77]:
rows = []
total_interactions = len(df)
for idx, r in df.iterrows():
    # Convert Series to dict for calculators
    base_row = r.to_dict()
    # Provide keys as expected by calculate functions
    base_row["tenant_tier"] = r["tenant_tier"]
    base_row["tenant"] = r["tenant"]
    for action in ACTIONS:
        row_action = base_row.copy()
        row_action["action"] = action
        comps = compute_reward_for_row(row_action)
        entry = {
            "interaction_id": r["interaction_id"],
            "tenant": r["tenant"],
            "tenant_tier": r["tenant_tier"],
            "channel": r["channel"],
            "raw_text": r["raw_text"],
            "action": action,
            "order_value": r["order_value"],
            "clv": r["clv"],
            "intent_type": r["intent_type"],
            "intent_conf": r["intent_conf"],
            "sentiment_score": r["sentiment_score"],
            "urgency_score": r["urgency_score"],
            "sla_time_left": r["sla_time_left"],
            "base_queue_wait": r["base_queue_wait"],
            "region_health_primary": r["region_health_primary"],
            "region_health_secondary": r["region_health_secondary"],
            "agent_experience": r["agent_experience"],
            **comps
        }
        rows.append(entry)

    # Print progress every 10 interactions or at the last one
    if (idx + 1) % 10 == 0 or (idx + 1) == total_interactions:
        logger.info(f"Processed {idx + 1}/{total_interactions} interactions...")

In [78]:
interaction_action_df = pd.DataFrame(rows)

In [79]:
interaction_action_df.head(3)

Unnamed: 0,interaction_id,tenant,tenant_tier,channel,raw_text,action,order_value,clv,intent_type,intent_conf,...,agent_experience,E_V,E_C,P_SLA,SLA_penalty,P_Esc,Esc_penalty,Health_score,Health_penalty,Reward
0,I_01,AcmeTel,Gold,voice,My payment failed twice and I need this order ...,failover_to_secondary,2500.0,15000.0,cancellation,0.696861,...,0.6,5155.06,129.16,0.1166,139.92,0.9763,683.42,0.12,72.0,4130.55
1,I_01,AcmeTel,Gold,voice,My payment failed twice and I need this order ...,stay_in_primary_with_resilience,2500.0,15000.0,cancellation,0.696861,...,0.6,5205.06,145.96,0.1611,193.33,0.9763,683.42,0.55,330.0,3852.35
2,I_01,AcmeTel,Gold,voice,My payment failed twice and I need this order ...,queue_in_primary,2500.0,15000.0,cancellation,0.696861,...,0.6,5185.06,152.11,0.2891,346.86,0.9808,686.58,0.55,330.0,3669.51


In [80]:
# -----------------------
# Select best action per interaction
# -----------------------
best = interaction_action_df.loc[interaction_action_df.groupby("interaction_id")["Reward"].idxmax()].copy()
best = best.sort_values("interaction_id").reset_index(drop=True)
best = best.rename(columns={"action": "chosen_action", "Reward": "chosen_reward"})

In [81]:
best

Unnamed: 0,interaction_id,tenant,tenant_tier,channel,raw_text,chosen_action,order_value,clv,intent_type,intent_conf,...,agent_experience,E_V,E_C,P_SLA,SLA_penalty,P_Esc,Esc_penalty,Health_score,Health_penalty,chosen_reward
0,I_01,AcmeTel,Gold,voice,My payment failed twice and I need this order ...,failover_to_secondary,2500.0,15000.0,cancellation,0.696861,...,0.6,5155.06,129.16,0.1166,139.92,0.9763,683.42,0.12,72.0,4130.55
1,I_02,QuickShop,Silver,chat,I want to change my delivery address for order...,stay_in_primary_with_resilience,150.0,1200.0,payment_issue,0.34609,...,0.7,940.0,93.3,0.0998,71.82,0.7389,369.43,0.05,25.0,380.45
2,I_03,LearnX,Bronze,chat,How do I reset my password? I can't login to m...,stay_in_primary_with_resilience,0.0,200.0,technical,0.890899,...,0.8,630.0,72.22,0.0851,34.04,0.8022,240.66,0.02,6.0,277.09
3,I_04,AcmeTel,Gold,voice,My subscription was renewed but I didn't recei...,failover_to_secondary,0.0,0.0,billing,0.5883,...,0.5,425.0,138.49,0.1091,130.92,0.6457,451.96,0.15,90.0,-386.36
4,I_05,HealthNow,Platinum,voice,The system shows a billing error for claim 345...,failover_to_secondary,5000.0,78000.0,billing,0.7744,...,0.9,19050.13,132.22,0.0968,193.59,0.9136,822.2,0.2,160.0,17742.11
5,I_06,QuickShop,Silver,chat,Is there a discount code I can use? Also what ...,stay_in_primary_with_resilience,100.0,320.0,payment_issue,0.425195,...,0.7,814.0,85.62,0.0881,63.41,0.8629,431.47,0.08,40.0,193.5
6,I_07,FoodZoom,Bronze,chat,My order is missing an item. Please arrange re...,stay_in_primary_with_resilience,400.0,800.0,cancellation,0.349186,...,0.6,1096.03,105.36,0.102,40.81,0.9696,290.89,0.12,36.0,622.97
7,I_08,LearnX,Bronze,voice,The application crashes when I click 'Submit' ...,stay_in_primary_with_resilience,0.0,300.0,technical,0.802969,...,0.4,562.73,80.65,0.1091,43.64,0.9576,287.27,0.07,21.0,130.17
8,I_09,AcmeTel,Gold,chat,I need to cancel my subscription immediately,failover_to_secondary,0.0,22000.0,cancellation,0.777091,...,0.4,5058.26,144.4,0.1166,139.92,0.9787,685.1,0.3,180.0,3908.84
9,I_10,HealthNow,Platinum,voice,"Claim denied incorrectly, this is urgent — I n...",failover_to_secondary,0.0,0.0,payment_issue,0.327318,...,0.95,598.49,152.64,0.1043,208.66,0.95,854.99,0.4,320.0,-937.79
