In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.patches as patches

df = pd.read_csv('alps-2025-icrl/exp1.0_openai_gpt_4o_mini_arms-5_delta-0.2_trials-100_v20250402120000.csv')

ARM_NAME_TO_IDX = {'blue': 0, 'green': 1, 'red': 2, 'yellow': 3, 'purple': 4}
arms = list(ARM_NAME_TO_IDX.keys())

# best_arm is an integer representing the best arm's index from your bandit
best_arm = ARM_NAME_TO_IDX[df['best_arm'].iloc[0]]
best_arm_name = df['best_arm'].iloc[0]

# Create the pivoted DataFrame (rows = arms, columns = trials)
n_trials = df['trial'].max() + 1
pivot_df = pd.DataFrame(0, index=arms, columns=range(n_trials))

# Mark 1 where an arm was pressed at a particular trial
for i, row in df.iterrows():
    pivot_df.loc[row['arm_name'], row['trial']] = 1

plt.figure(figsize=(8, 4))
ax = sns.heatmap(
    pivot_df,
    cmap=['white', 'black'],  # 0=white, 1=black
    cbar=False,
    linewidths=0.5,
    linecolor='lightgray'
)

plt.xlabel('t')
plt.ylabel('Arm')
plt.title('Arms Pressed Over Time')

# Invert the y-axis so that the first arm appears at the top.
plt.gca().invert_yaxis()

# Since we've inverted the y-axis, the displayed row order is the same as the order in 'arms'
row_idx = arms.index(best_arm_name)
n_cols = pivot_df.shape[1]

rect = patches.Rectangle(
    (-0.5, row_idx),  # bottom-left corner of the row
    n_cols,                # width covering all trials
    1,                     # height of one row
    facecolor='blue',      # filled with blue color
    alpha=0.2,             # semi-transparent
    edgecolor='blue',      # blue border
    linewidth=2
)
plt.gca().add_patch(rect)

plt.show()
plt.figure(figsize=(8, 4))
plt.plot(df['trial'], df['cumulative_reward'], marker='o')
plt.xlabel('Trial')
plt.ylabel('Cumulative Reward')
plt.title('Cumulative Reward Over Time')
plt.grid(True)
plt.show()
