In [13]:
import os
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# Folder where your 10 datasets are saved
folder_path = "simulated_datasets"
csv_files = sorted([f for f in os.listdir(folder_path) if f.endswith(".csv")])

# Resolution probabilities for agent-topic matching
RESOLUTION_PROB = {
    ("Agent 1", "Management"): 0.9,
    ("Agent 1", "IT issue"): 0.5,
    ("Agent 2", "Management"): 0.5,
    ("Agent 2", "IT issue"): 0.9,
    ("Agent 3", "Management"): 0.8,
    ("Agent 3", "IT issue"): 0.8,
}

# Action mapping
agent_to_action = {"Agent 1": 0, "Agent 2": 1, "Agent 3": 2, "": 3}

# Satisfaction-based reward
def satisfaction_reward(sat):
    if sat >= 7:
        return 7
    elif sat >= 4:
        return 2
    else:
        return -8

# Scoring function to choose best available agent
def agent_score(agent, topic, wait_time):
    match_quality = RESOLUTION_PROB.get((agent, topic), 0)
    return match_quality - (wait_time.total_seconds() / 60.0) * 0.1  # penalty for wait

# Store MDP records
all_mdp_records = []

for file in csv_files:
    file_path = os.path.join(folder_path, file)
    df = pd.read_csv(file_path)

    # Parse call time correctly
    df["Time"] = pd.to_datetime(df["Time"], format="%H:%M:%S.%f", errors="coerce")
    df.sort_values("Time", inplace=True)

    # Initialize agent availability
    agent_free_at = {agent: df["Time"].min() for agent in ["Agent 1", "Agent 2", "Agent 3"]}

    for _, row in df.iterrows():
        topic = row["Topic"]
        call_time = row["Time"]
        agent_assigned = None
        best_score = -np.inf
        wait_time_dict = {}

        # Evaluate all agents for this call
        for agent in ["Agent 1", "Agent 2", "Agent 3"]:
            wait_time = agent_free_at[agent] - call_time if agent_free_at[agent] > call_time else timedelta(0)
            wait_time_dict[agent] = wait_time
            score = agent_score(agent, topic, wait_time)
            if wait_time.total_seconds() <= 120 and score > best_score:
                best_score = score
                agent_assigned = agent

        # Compute outcome
        if agent_assigned:
            action = agent_to_action[agent_assigned]
            assigned = 1
            resolved = np.random.rand() < RESOLUTION_PROB[(agent_assigned, topic)]
            base_reward = 2
            resolution_reward = 10 if resolved else -5
            satisfaction = np.random.randint(7, 11) if resolved else np.random.randint(1, 4)
            satisfaction_adj = satisfaction_reward(satisfaction)
            wait_minutes = wait_time_dict[agent_assigned].total_seconds() / 60
            reward = base_reward + resolution_reward + satisfaction_adj - wait_minutes * 1.5
            talk_duration = np.random.exponential(4)
            agent_free_at[agent_assigned] = call_time + timedelta(minutes=talk_duration)
        else:
            action = 3
            agent_assigned = "None"
            assigned = 0
            resolved = False
            satisfaction = 0
            reward = -10
            wait_minutes = np.nan

        # Construct state and next state
        state = (
            topic,
            int(agent_free_at["Agent 1"] > call_time),
            int(agent_free_at["Agent 2"] > call_time),
            int(agent_free_at["Agent 3"] > call_time),
        )
        next_state = state  # Next state is based on updated agent status

        # Log episode
        all_mdp_records.append([
            int(row["Call ID"]),
            topic,
            state,
            action,
            reward,
            resolved,
            satisfaction,
            wait_minutes,
            next_state,
            file
        ])

# Convert to DataFrame
mdp_df = pd.DataFrame(all_mdp_records, columns=[
    "Call ID", "Topic", "State", "Action", "Reward", "Resolved",
    "Satisfaction", "Wait Time (min)", "Next State", "Dataset"
])

# Save final enhanced MDP
mdp_df.to_csv("mdp_enhanced_waittime_policy.csv", index=False)
print("✅ Saved: mdp_enhanced_waittime_policy.csv with", len(mdp_df), "records")
print(mdp_df.head())



✅ Saved: mdp_enhanced_waittime_policy.csv with 5000 records
   Call ID       Topic                  State  Action  Reward  Resolved  \
0      495    IT issue    (IT issue, 0, 1, 0)       1    19.0      True   
1      496  Management  (Management, 1, 0, 0)       0    19.0      True   
2      172    IT issue    (IT issue, 1, 1, 0)       1    19.0      True   
3      497  Management  (Management, 1, 1, 1)       2    19.0      True   
4      173    IT issue    (IT issue, 1, 1, 1)       1    19.0      True   

   Satisfaction  Wait Time (min)             Next State        Dataset  
0             8              0.0    (IT issue, 0, 1, 0)  dataset_1.csv  
1             8              0.0  (Management, 1, 0, 0)  dataset_1.csv  
2            10              0.0    (IT issue, 1, 1, 0)  dataset_1.csv  
3            10              0.0  (Management, 1, 1, 1)  dataset_1.csv  
4            10              0.0    (IT issue, 1, 1, 1)  dataset_1.csv  
