In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import matplotlib.ticker as mtick


# Define function to run the epsilon-greedy simulation
def run_epsilon_greedy(expected_rewards, gamma, epsilon, steps=100000):
    avg_rewards = [0.0, 0.0]
    counts = [0, 0]
    rewards = [0, 0]

    for t in range(steps):
        if np.random.rand() < epsilon:
            choice = np.random.choice([0, 1])
        else:
            choice = np.argmax(avg_rewards)

        reward = int(np.random.rand() < expected_rewards[choice])
        counts[choice] += 1
        discounted_reward = reward * (gamma ** t)
        rewards[choice] += discounted_reward
        avg_rewards[choice] += (reward - avg_rewards[choice]) / counts[choice]

    return {
        "bank_trials": counts[0],
        "fintech_trials": counts[1],
        "bank_total_reward": rewards[0],
        "fintech_total_reward": rewards[1],
        "reward_gap": rewards[1] - rewards[0]
    }

# Set parameters
epsilons = [0, 0.0001, 0.001, 0.005, 0.01, 0.025, 0.05, 0.075, 0.1, 0.2, 0.3, 0.4]
expected_rewards = [0.05, 0.06]
gamma = .9999
steps = 500000
runs_per_epsilon = 100

# Run and average simulations
results = []
for epsilon in epsilons:
    temp_results = [run_epsilon_greedy(expected_rewards, gamma, epsilon, steps) for _ in range(runs_per_epsilon)]
    avg_result = {
        "epsilon": epsilon,
        "bank_trials": np.mean([r["bank_trials"] for r in temp_results]),
        "fintech_trials": np.mean([r["fintech_trials"] for r in temp_results]),
        "bank_total_reward": np.mean([r["bank_total_reward"] for r in temp_results]),
        "fintech_total_reward": np.mean([r["fintech_total_reward"] for r in temp_results]),
        "reward_gap": np.mean([r["reward_gap"] for r in temp_results]),
    }
    results.append(avg_result)

# Convert to DataFrame
df = pd.DataFrame(results)

# Melt for plotting if needed
df_trials = df.melt(id_vars="epsilon", value_vars=["bank_trials", "fintech_trials"], var_name="Type", value_name="Trials")
df_rewards = df.melt(id_vars="epsilon", value_vars=["bank_total_reward", "fintech_total_reward"], var_name="Type", value_name="Total Reward")
df_gap = df[["epsilon", "reward_gap"]]

In [None]:
df

In [None]:
sns.set(style="whitegrid", context="notebook")

plt.figure(figsize=(18, 5))

# # Trials subplot
# plt.subplot(3, 1, 1)
# sns.lineplot(data=df_trials, x="epsilon", y="Trials", hue="Type", marker='o', palette={"bank_trials": "blue", "fintech_trials": "green"})
# plt.legend(title="Metric", loc='center left')
# plt.title("Bank/FinTech Selections vs Epsilon")
# plt.xlabel("Exploration Rate (epsilon)")
# plt.ylabel("Number of Trials")
# plt.xscale("log")
# plt.grid(True)

# Total rewards subplot
plt.subplot(1, 2, 1)
sns.lineplot(
    data=df_rewards, 
    x="epsilon", 
    y="Total Reward", 
    hue="Type", 
    marker='o', 
    palette={
        "bank_total_reward": "mediumseagreen", 
        "fintech_total_reward": "steelblue"
    }
)
plt.legend(title="Option", loc='upper left')
plt.title("Discounted Total Rewards vs Epsilon")
plt.xlabel("Exploration Rate (ε)")
plt.ylabel("Discounted Total Reward")
plt.xscale("log")
plt.grid(True)

# Reward gap subplot
plt.subplot(1, 2, 2)
sns.lineplot(
    data=df, 
    x="epsilon", 
    y="reward_gap", 
    marker='o', 
    label="Reward Gap (Fintech − Bank)", 
    color="tomato"
)
plt.axhline(0, linestyle="--", color="gray")
plt.legend(title="Metric", loc='upper left')
plt.title("Reward Gap vs Epsilon")
plt.xlabel("Exploration Rate (ε)")
plt.ylabel("Reward Gap")
plt.xscale("log")
plt.grid(True)

plt.tight_layout()
plt.show()