In [4]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random
import os

# Parameters
NUM_DATASETS = 10
N = 500
arrival_rate = 1/5
service_rate = 1/4
MAX_QUEUE_LENGTH = 25

AGENTS = ["Agent 1", "Agent 2", "Agent 3"]
TOPICS = ["Management", "IT issue"]
RESOLUTION_PROB = {
    ("Agent 1", "Management"): 0.9,
    ("Agent 1", "IT issue"): 0.5,
    ("Agent 2", "Management"): 0.5,
    ("Agent 2", "IT issue"): 0.9,
    ("Agent 3", "Management"): 0.8,
    ("Agent 3", "IT issue"): 0.8,
}

def satisfaction_score(resolved, speed_of_answer, talk_duration):
    if not resolved:
        return np.random.randint(1, 4)
    elif speed_of_answer < 60 and talk_duration < 6:
        return np.random.randint(7, 11)
    elif speed_of_answer < 120:
        return np.random.randint(4, 7)
    else:
        return np.random.randint(1, 4)

# Create folder to store datasets
os.makedirs("simulated_datasets", exist_ok=True)

for i in range(NUM_DATASETS):
    data = []
    start_time = datetime.combine(datetime.today(), datetime.strptime("09:00", "%H:%M").time())
    current_time = start_time
    queue = []
    active_agents = {agent: start_time for agent in AGENTS}

    for call_id in range(1, N + 1):
        inter_arrival = np.random.exponential(1 / arrival_rate)
        current_time += timedelta(minutes=inter_arrival)
        date = current_time.date()
        time = current_time.time()
        topic = random.choice(TOPICS)

        assigned = False
        for agent in AGENTS:
            if active_agents[agent] <= current_time:
                service_time = np.random.exponential(1 / service_rate)
                resolved_prob = RESOLUTION_PROB[(agent, topic)]
                resolved = "Y" if random.random() < resolved_prob else "N"
                speed_of_answer = random.randint(5, 120)
                talk_duration = round(service_time, 2)
                satisfaction = satisfaction_score(resolved == "Y", speed_of_answer, talk_duration)

                data.append([
                    call_id,
                    agent,
                    date,
                    time,
                    topic,
                    "Y",
                    resolved,
                    speed_of_answer,
                    talk_duration,
                    satisfaction
                ])
                active_agents[agent] = current_time + timedelta(minutes=service_time)
                assigned = True
                break

        if not assigned:
            if len(queue) >= MAX_QUEUE_LENGTH:
                data.append([
                    call_id,
                    "",
                    date,
                    time,
                    topic,
                    "N",
                    "",
                    "",
                    "",
                    ""
                ])
            else:
                queue.append((call_id, topic, date, time))
                data.append([
                    call_id,
                    "",
                    date,
                    time,
                    topic,
                    "N",
                    "",
                    "",
                    "",
                    ""
                ])

    columns = ["Call ID", "Agent", "Date", "Time", "Topic", "Answered (Y/N)",
               "Resolved (Y/N)", "Speed of answer", "Talk duration", "Satisfaction rating"]
    df = pd.DataFrame(data, columns=columns)
    
    # Save to CSV
    df.to_csv(f"simulated_datasets/dataset_{i+1}.csv", index=False)

print("✅ 10 datasets successfully generated and saved in 'simulated_datasets/' folder.")


✅ 10 datasets successfully generated and saved in 'simulated_datasets/' folder.
