<a href="https://colab.research.google.com/github/rohanb1985/failover_decision/blob/main/try_01.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import numpy as np
import pandas as pd

np.random.seed(42)

## Configuration & constants

Actions

In [4]:
ACTIONS = [
    "failover_to_secondary",
    "stay_in_primary_with_resilience",
    "queue_in_primary",
    "route_to_chatbot_or_self_service"
]

Tenant tiers and their policy coefficients (γ, δ, ζ)
- gamma: SLA sensitivity,
- delta: escalation sensitivity,
- zeta: health sensitivity


In [5]:
TIER_CONFIG = {
    "Bronze":   {"tier_weight": 1.0, "gamma": 0.4, "delta": 0.3, "zeta": 0.3},
    "Silver":   {"tier_weight": 1.2, "gamma": 0.6, "delta": 0.5, "zeta": 0.5},
    "Gold":     {"tier_weight": 1.5, "gamma": 0.8, "delta": 0.7, "zeta": 0.6},
    "Platinum": {"tier_weight": 2.0, "gamma": 1.0, "delta": 0.9, "zeta": 0.8},
}

Intent weights - Indicates how important different intents are to business value / escalation heuristics

In [6]:
INTENT_WEIGHT = {
    "payment_issue": 1.5,
    "cancellation": 1.8,
    "technical": 1.2,
    "billing": 1.0,
    "general_query": 0.6
}

Coefficients for the simplified business value linear model (alpha)

In [7]:
ALPHAS = {
    "alpha_order_value": 0.6,
    "alpha_clv": 0.2,
    "alpha_tier": 50.0,    # multiplied by tier_weight
    "alpha_urgency": 200.0, # urgency in 0..1
    "alpha_sentiment": 100.0, # sentiment -1..1
    "alpha_intent": 300.0    # multiplied by intent weight
}

Coefficients for cost model (betas) — these are used to compute E[C]

In [8]:
BETAS = {
    "beta_handling_cost": 1.0,
    "beta_latency": 0.05,   # how latency inflates cost proxy
    "beta_agent_avail": 100.0, # (1 - availability) multiplier
    "beta_queue": 0.5
}

Escalation logistic weights (w1..)

In [9]:
ESC_W = {"w_sentiment": 2.0, "w_urgency": 2.5, "w_intent": 1.5, "w_agent_exp": 1.0}

For synthetic dataset ranges

In [10]:
ORDER_MIN, ORDER_MAX = 100, 10000  # rupees
CLV_MIN, CLV_MAX = 500, 200000
URG_MIN, URG_MAX = 0.0, 1.0
SENT_MIN, SENT_MAX = -1.0, 1.0
HEALTH_MIN, HEALTH_MAX = 0.0, 1.0  # 1 = healthy, 0 = down

Base per-action cost and latency multipliers (action-dependent)

In [11]:
ACTION_BASE_COST = {
    "failover_to_secondary": 50.0,
    "stay_in_primary_with_resilience": 20.0,
    "queue_in_primary": 5.0,
    "route_to_chatbot_or_self_service": 2.0
}
ACTION_LATENCY_OVERHEAD = {
    "failover_to_secondary": 150.0,   # ms extra
    "stay_in_primary_with_resilience": 50.0,
    "queue_in_primary": 200.0,
    "route_to_chatbot_or_self_service": 80.0
}
ACTION_AGENT_AVAIL_SHIFT = {
    "failover_to_secondary": -0.1,  # maybe slightly fewer agents in backup
    "stay_in_primary_with_resilience": 0.0,
    "queue_in_primary": 0.0,
    "route_to_chatbot_or_self_service": +0.2  # chatbot often has higher "availability"
}

## Synthetic dataset creation

In [12]:
def generate_synthetic_interactions(n_interactions=30):
    """
    Creates a DataFrame of synthetic interactions, each with varying tenant, intent, urgency, etc.
    We'll expand each interaction into multiple (interaction, action) rows later.
    """
    tenants = list(TIER_CONFIG.keys())
    intents = list(INTENT_WEIGHT.keys())
    channels = ["voice", "chat", "email", "bot"]

    rows = []
    for i in range(n_interactions):
        tenant = np.random.choice(tenants, p=[0.25, 0.3, 0.25, 0.2])
        intent = np.random.choice(intents, p=[0.2, 0.15, 0.25, 0.25, 0.15])
        channel = np.random.choice(channels)
        order_value = float(np.round(np.random.uniform(ORDER_MIN, ORDER_MAX), 2)) \
            if np.random.rand() < 0.4 else np.nan  # some interactions have order_value (40%)
        clv = float(np.round(np.random.uniform(CLV_MIN, CLV_MAX), 2))
        urgency = float(np.round(np.random.uniform(URG_MIN, URG_MAX), 2))
        sentiment = float(np.round(np.random.uniform(SENT_MIN, SENT_MAX), 2))
        sla_time_left = float(np.round(np.random.uniform(10, 600), 2))  # seconds left
        base_queue = float(np.round(np.random.uniform(5, 400), 2))  # seconds
        region_health_primary = float(np.round(np.random.uniform(0.2, 1.0), 2))
        region_health_secondary = float(np.round(np.random.uniform(0.3, 1.0), 2))

        rows.append({
            "interaction_id": f"I_{i+1}",
            "tenant": tenant,
            "intent": intent,
            "channel": channel,
            "order_value": order_value,
            "clv": clv,
            "urgency": urgency,
            "sentiment": sentiment,
            "sla_time_left": sla_time_left,
            "base_queue_wait": base_queue,
            "region_health_primary": region_health_primary,
            "region_health_secondary": region_health_secondary,
            # agent experience (0..1, 1 = most experienced)
            "agent_experience": float(np.round(np.random.uniform(0.3, 1.0), 2))
        })
    return pd.DataFrame(rows)

## Feature group calculators

### Calculate Intent weight

In [13]:
def calc_intent_weight(intent):
    return INTENT_WEIGHT.get(intent, 0.5)

### Calculate business value

In [14]:
def calculate_business_value(row, action):
    """
    E[V_i(a)] = alpha1*order_value + alpha2*clv + alpha3*tier_weight
                 + alpha4*urgency + alpha5*sentiment + alpha6*intent_weight
    Note: order_value may be NaN — use proxies
    """
    al = ALPHAS
    tier_w = TIER_CONFIG[row["tenant"]]["tier_weight"]
    order_v = row["order_value"] if not np.isnan(row["order_value"]) else 0.0
    # proxy: if order missing, use 0.1*clv as instant value proxy
    if np.isnan(row["order_value"]):
        order_v = 0.1 * row["clv"]

    intent_w = calc_intent_weight(row["intent"])
    val = (al["alpha_order_value"] * order_v +
           al["alpha_clv"] * row["clv"] +
           al["alpha_tier"] * tier_w +
           al["alpha_urgency"] * row["urgency"] +
           al["alpha_sentiment"] * row["sentiment"] +
           al["alpha_intent"] * intent_w)
    # Optionally small action modifier (e.g., some actions slightly reduce realized value)
    action_value_penalty = {"failover_to_secondary": -50, "stay_in_primary_with_resilience": 0,
                            "queue_in_primary": -20, "route_to_chatbot_or_self_service": -200}
    val += action_value_penalty.get(action, 0)
    return float(val)

### Calculate Handling cost

In [15]:
def calculate_handling_cost(row, action):
    """
    Simple cost model:
    expected_handling_cost = ACTION_BASE_COST[action] + (some function of clv/agent_availability/latency)
    E[C] = beta_handling_cost * base_cost + beta_latency * latency_ms + beta_agent_avail*(1-availability)
    We'll derive action-specific latency and availability from base_queue and region health.
    """
    base_cost = ACTION_BASE_COST[action]
    # latency: base queue wait (s) -> convert to ms roughly, plus action overhead
    # but queue_wait depends on whether action routes to primary or secondary or chatbot
    if action == "failover_to_secondary":
        queue_wait = row["base_queue_wait"] * 0.5  # assume failover has shorter queue in our sim
        latency_ms = queue_wait * 1000/2 + ACTION_LATENCY_OVERHEAD[action]  # scaled
        avail = max(0.05, 0.5 + ACTION_AGENT_AVAIL_SHIFT[action])  # synthetic
        # but account for region health effect
        avail *= row["region_health_secondary"]
    elif action == "stay_in_primary_with_resilience":
        queue_wait = row["base_queue_wait"]
        latency_ms = queue_wait * 1000/1 + ACTION_LATENCY_OVERHEAD[action]
        avail = max(0.05, 0.6 + ACTION_AGENT_AVAIL_SHIFT[action]) * row["region_health_primary"]
    elif action == "queue_in_primary":
        queue_wait = min(1800.0, row["base_queue_wait"] * 2)  # queuing longer
        latency_ms = queue_wait * 1000 + ACTION_LATENCY_OVERHEAD[action]
        avail = max(0.01, 0.5 + ACTION_AGENT_AVAIL_SHIFT[action]) * row["region_health_primary"]
    else:  # chatbot
        queue_wait = max(0.0, row["base_queue_wait"] * 0.2)
        latency_ms = queue_wait * 1000/4 + ACTION_LATENCY_OVERHEAD[action]
        avail = min(1.0, 0.9 + ACTION_AGENT_AVAIL_SHIFT[action])  # mostly available
        # chatbot less affected by region health (assume distributed)
        avail *= (0.9 + 0.1 * row["region_health_primary"])

    # Estimate handling_time minutes from queue_wait (s) and intent complexity
    handling_time_min = (queue_wait / 60.0) * (1.0 + (calc_intent_weight(row["intent"]) - 1.0) * 0.3)
    # cost per minute proxy (higher for backups due to infra)
    rate_per_min = 2.0 if action == "route_to_chatbot_or_self_service" else (5.0 if action == "queue_in_primary" else 10.0)
    # add region health penalty into cost
    region_health_factor = (1.0 + (1.0 - (row["region_health_secondary"] if action == "failover_to_secondary" else row["region_health_primary"])) * 0.5)
    expected_handling_cost = base_cost + BETAS["beta_handling_cost"] * (handling_time_min * rate_per_min) \
                             + BETAS["beta_latency"] * (latency_ms / 100.0) \
                             + BETAS["beta_agent_avail"] * (1.0 - avail) \
                             + BETAS["beta_queue"] * (queue_wait / 60.0)
    # scale by region health factor
    expected_handling_cost *= region_health_factor
    return float(max(0.0, expected_handling_cost))

### Calculate P_SLA

In [16]:
def calculate_p_sla(row, action):
    """
    Heuristic for SLA miss probability:
    - compute sla_ratio = estimated_queue_wait / sla_time_left
    - map to probability: logistic-ish or min(1, ratio)
    We'll also reduce P_SLA for failover actions (assume they improve SLA).
    """
    base_queue = row["base_queue_wait"]
    if action == "failover_to_secondary":
        est_queue = base_queue * 0.5
    elif action == "queue_in_primary":
        est_queue = base_queue * 2.0
    elif action == "route_to_chatbot_or_self_service":
        est_queue = base_queue * 0.2
    else:
        est_queue = base_queue

    sla_ratio = est_queue / max(1.0, row["sla_time_left"])
    # logistic transform for nicer probabilities:
    p = 1.0 / (1.0 + np.exp(-3.0 * (sla_ratio - 0.8)))  # shift so ratio ~0.8 -> 50%
    p = float(np.clip(p, 0.0, 1.0))
    return p

### Calculate P_escalation

In [17]:
def calculate_p_escalation(row, action):
    """
    Heuristic logistic model for escalation probability:
      input = w1*(-sentiment) + w2*urgency + w3*intent_weight + w4*(1-agent_experience)
      p = sigmoid(input_scaled)
    Some actions (chatbot) may increase escalation probability for complex intents.
    """
    intent_w = calc_intent_weight(row["intent"])
    inp = (ESC_W["w_sentiment"] * (-row["sentiment"]) +
           ESC_W["w_urgency"] * row["urgency"] +
           ESC_W["w_intent"] * (intent_w - 1.0) +
           ESC_W["w_agent_exp"] * (1.0 - row["agent_experience"]))
    # action modifier: chatbots have higher risk for complex intents; queue may increase risk slightly
    if action == "route_to_chatbot_or_self_service":
        inp += 0.5 * max(0.0, intent_w - 1.0)
    elif action == "queue_in_primary":
        inp += 0.3 * row["urgency"]
    # map to probability
    p = 1.0 / (1.0 + np.exp(-0.8 * (inp - 1.0)))  # shifted logistic
    p = float(np.clip(p, 0.0, 1.0))
    return p

### Calculate Health Penalty

In [18]:
def calculate_health_penalty(row, action):
    """
    Health score is action-dependent: target region health used.
    Health_i(a) in our formula is normalized 0..1 (higher = worse? earlier we used 1 healthy)
    NOTE: In earlier slides we used Health where 0 healthy -> 1 bad; here we will
    compute HealthPenalty such that larger value reduces reward (so use (1 - health) scale).
    We'll return Health_i(a) = (1 - region_health) scaled.
    """
    if action == "failover_to_secondary":
        health = row["region_health_secondary"]
    else:
        health = row["region_health_primary"]
    # convert health to penalty-scale: 0 (healthy) -> 0, 1 (down) -> 1
    score = float(np.clip(1.0 - health, 0.0, 1.0))
    return score

## Reward calculator

In [19]:
def compute_reward_for_row(row_action):
    """
    row_action: a Series-like dict that includes original row fields and 'action'
    Returns detailed components and total reward
    """
    tenant = row_action["tenant"]
    cfg = TIER_CONFIG[tenant]
    gamma = cfg["gamma"]
    delta = cfg["delta"]
    zeta = cfg["zeta"]
    tier_weight = cfg["tier_weight"]

    # Business value
    ev = calculate_business_value(row_action, row_action["action"])

    # Cost
    ec = calculate_handling_cost(row_action, row_action["action"])

    # SLA miss probability and penalty term
    p_sla = calculate_p_sla(row_action, row_action["action"])
    sla_penalty = gamma * p_sla * tier_weight * 1000.0  # multiply to convert to rupee-like penalty scale

    # Escalation probability and penalty
    p_esc = calculate_p_escalation(row_action, row_action["action"])
    esc_penalty = delta * p_esc * 1000.0  # scale

    # Health penalty
    health_score = calculate_health_penalty(row_action, row_action["action"])
    health_penalty = zeta * health_score * 1000.0  # scale

    # Total reward
    R = ev - ec - sla_penalty - esc_penalty - health_penalty

    return {
        "E_V": round(ev, 2),
        "E_C": round(ec, 2),
        "P_SLA": round(p_sla, 4),
        "SLA_penalty": round(sla_penalty, 2),
        "P_Esc": round(p_esc, 4),
        "Esc_penalty": round(esc_penalty, 2),
        "Health_score": round(health_score, 4),
        "Health_penalty": round(health_penalty, 2),
        "Reward": round(R, 2)
    }

## Main demo flow

In [20]:
def build_interaction_action_table(interactions_df):
    """
    Expand each interaction into multiple rows, one per action,
    compute all components and choose best action.
    """
    rows = []
    for _, r in interactions_df.iterrows():
        for action in ACTIONS:
            # build a dict of row values + action
            row_action = r.to_dict()
            row_action["action"] = action
            comps = compute_reward_for_row(row_action)
            entry = {
                "interaction_id": r["interaction_id"],
                "tenant": r["tenant"],
                "intent": r["intent"],
                "channel": r["channel"],
                "action": action,
                # context values
                "order_value": r["order_value"],
                "clv": r["clv"],
                "urgency": r["urgency"],
                "sentiment": r["sentiment"],
                "sla_time_left": r["sla_time_left"],
                "base_queue_wait": r["base_queue_wait"],
                "region_health_primary": r["region_health_primary"],
                "region_health_secondary": r["region_health_secondary"],
                "agent_experience": r["agent_experience"],
                # computed
                **comps
            }
            rows.append(entry)
    taf = pd.DataFrame(rows)
    # choose best action per interaction
    best = taf.loc[taf.groupby("interaction_id")["Reward"].idxmax()].copy()
    best = best.sort_values("interaction_id").reset_index(drop=True)
    best = best.rename(columns={"action": "chosen_action", "Reward": "chosen_reward"})
    # merge best choice back
    taf = taf.merge(best[["interaction_id", "chosen_action", "chosen_reward"]], on="interaction_id", how="left")
    return taf, best

## Run demo

### 1. Generate synthetic interactions

In [21]:
interactions = generate_synthetic_interactions(n_interactions=30)

### 2. Expand and compute

In [22]:
taf, best_actions = build_interaction_action_table(interactions)

Compute and display chosen actions summary

In [23]:
pd.set_option("display.max_columns", 40)
print("\n=== Example: All computed (interaction x action) rows (first 20 rows) ===\n")
print(taf.head(20).to_string(index=False))

print("\n=== Chosen action per interaction (summary) ===\n")
print(best_actions[["interaction_id", "tenant", "intent", "chosen_action", "chosen_reward"]].to_string(index=False))


=== Example: All computed (interaction x action) rows (first 20 rows) ===

interaction_id tenant        intent channel                           action  order_value       clv  urgency  sentiment  sla_time_left  base_queue_wait  region_health_primary  region_health_secondary  agent_experience      E_V    E_C  P_SLA  SLA_penalty  P_Esc  Esc_penalty  Health_score  Health_penalty   Reward                    chosen_action  chosen_reward
           I_1 Silver general_query   email            failover_to_secondary          NaN 119571.61     0.45      -0.80         280.96           136.81                   0.31                     0.76              0.34 31288.62 165.06 0.1585       114.10 0.8066       403.29          0.24           120.0 30486.15            failover_to_secondary       30486.15
           I_1 Silver general_query   email  stay_in_primary_with_resilience          NaN 119571.61     0.45      -0.80         280.96           136.81                   0.31                     0.76   

### Display aggregate counts

In [24]:
print("\n=== Aggregate chosen action counts ===\n")
print(best_actions["chosen_action"].value_counts())


=== Aggregate chosen action counts ===

chosen_action
route_to_chatbot_or_self_service    17
failover_to_secondary                9
stay_in_primary_with_resilience      4
Name: count, dtype: int64
