In [1]:
import wandb
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from tqdm import tqdm, trange
from ddopai.experiments.meta_experiment_functions import *
# Assuming 'df' is your DataFrame
pd.set_option('display.max_rows', 1000)  # Show all rows
pd.set_option('display.max_columns', 1000)  # Show all columns
pd.set_option('display.width', None)  # Adjust width to avoid truncation
pd.set_option('display.max_colwidth', None)  # Show full content of each column


In [18]:
api = wandb.Api()

In [19]:
entity = 'timlachner' 
project = 'CMDP-Bandit'
sweep_id = "aelzufgr"

In [20]:
sweep = api.sweep(f"{entity}/{project}/{sweep_id}")

In [21]:
from concurrent.futures import ThreadPoolExecutor, as_completed
import pandas as pd

def fetch_run_history(run):
    try:
        history_iter = run.scan_history()
        df = pd.DataFrame(list(history_iter))
        df['agent'] = run.config.get('config_train-agent', None)
        df['artifact'] = run.config.get('artifact', None)
        print(f"Fetched run {run.id} with df shape {df.shape}")
        return df
    except Exception as e:
        print(f"Error loading history for run {run.id}: {e}")
        return None

dfs_list = []
with ThreadPoolExecutor(max_workers=10) as executor:
    futures = {executor.submit(fetch_run_history, run): run for run in sweep.runs}
    for future in as_completed(futures):
        df = future.result()
        if df is not None:
            dfs_list.append(df)

dfs = pd.concat(dfs_list, ignore_index=True) if dfs_list else pd.DataFrame()
backup_dfs = dfs.copy()
dfs

Fetched run 795bo7wg with df shape (240000, 13)
Fetched run 6iur76dd with df shape (240000, 13)
Fetched run lwy0f4no with df shape (240000, 13)
Fetched run 7vekvnpo with df shape (240000, 13)
Fetched run uun8tg1v with df shape (240000, 13)
Fetched run pv5t2rbq with df shape (240000, 13)
Fetched run 2sp7zfxm with df shape (240000, 13)
Fetched run jfxywyqk with df shape (240000, 13)
Fetched run glao1qut with df shape (240000, 13)
Fetched run yo4mcnng with df shape (240000, 13)
Fetched run bddyctua with df shape (240000, 13)
Fetched run hrpa657f with df shape (240000, 13)
Fetched run y3vjabm6 with df shape (240000, 13)
Fetched run 8pl6ovui with df shape (240000, 13)
Fetched run 0vtoazw2 with df shape (240000, 13)
Fetched run 3cez66qb with df shape (240000, 13)
Fetched run vocwj60k with df shape (240000, 13)
Fetched run uv94c2zm with df shape (240000, 13)
Fetched run wm8xvsyj with df shape (240000, 13)
Fetched run vejjsbjv with df shape (240000, 13)
Fetched run e2r0tryr with df shape (2400



Fetched run jbcyc4z9 with df shape (240000, 13)
Fetched run p8ankdg5 with df shape (240000, 13)


Unnamed: 0,Epoch,_step,t,True_Cumulative_Reward,_runtime,Cumulative_Reward,Action,Inventory,True_Reward,Reward,_timestamp,agent,artifact
0,0,0,0,16.191893,3.983975,16.383483,2.00,30000,16.191893,16.383483,1.742894e+09,TS,raw_data:v28
1,0,1,1,37.699022,3.984144,43.280986,5.00,30000,21.507129,26.897503,1.742894e+09,TS,raw_data:v28
2,0,2,2,37.699022,3.984210,43.280986,0.00,30000,0.000000,0.000000,1.742894e+09,TS,raw_data:v28
3,0,3,3,59.815253,3.984245,69.072361,6.84,30000,22.116231,25.791375,1.742894e+09,TS,raw_data:v28
4,0,4,4,89.798017,3.984298,104.033701,6.04,30000,29.982764,34.961340,1.742894e+09,TS,raw_data:v28
...,...,...,...,...,...,...,...,...,...,...,...,...,...
35999995,599,239995,395,9221.491462,28.903500,9261.763316,4.84,30000,23.641812,27.647586,1.742834e+09,Clairvoyant,raw_data:v0
35999996,599,239996,396,9244.167730,28.903525,9285.636602,4.63,30000,22.676268,23.873287,1.742834e+09,Clairvoyant,raw_data:v0
35999997,599,239997,397,9273.679523,28.903580,9318.043400,5.99,30000,29.511793,32.406798,1.742834e+09,Clairvoyant,raw_data:v0
35999998,599,239998,398,9296.518082,28.903606,9338.212229,4.62,30000,22.838559,20.168829,1.742834e+09,Clairvoyant,raw_data:v0


In [2]:
csv_path = "/Users/timlachner/Library/CloudStorage/OneDrive-Personal/Work/DDOP/ddopai_pricing_experiments/visualisation/data/timlachner_CMDP-Bandit_aelzufgr.csv"
dfs = pd.read_csv(csv_path)

In [3]:
df = dfs.copy()
df

Unnamed: 0,Epoch,t,True_Cumulative_Reward,Cumulative_Reward,Action,Inventory,True_Reward,Reward,agent,artifact
0,0,0,16.191893,16.383483,2.00,30000,16.191893,16.383483,TS,raw_data:v28
1,0,1,37.699022,43.280986,5.00,30000,21.507129,26.897503,TS,raw_data:v28
2,0,2,37.699022,43.280986,0.00,30000,0.000000,0.000000,TS,raw_data:v28
3,0,3,59.815253,69.072361,6.84,30000,22.116231,25.791375,TS,raw_data:v28
4,0,4,89.798017,104.033701,6.04,30000,29.982764,34.961340,TS,raw_data:v28
...,...,...,...,...,...,...,...,...,...,...
35999995,599,395,9221.491462,9261.763316,4.84,30000,23.641812,27.647586,Clairvoyant,raw_data:v0
35999996,599,396,9244.167730,9285.636602,4.63,30000,22.676268,23.873287,Clairvoyant,raw_data:v0
35999997,599,397,9273.679523,9318.043400,5.99,30000,29.511793,32.406798,Clairvoyant,raw_data:v0
35999998,599,398,9296.518082,9338.212229,4.62,30000,22.838559,20.168829,Clairvoyant,raw_data:v0


In [4]:


# --- Step 1: Extract clairvoyant rewards for both normal and true rewards
clairvoyant_df = df[df['agent'] == 'Clairvoyant'][['artifact', 'Epoch', 't', 'Reward', 'True_Reward', 'Action']]
clairvoyant_df = clairvoyant_df.rename(columns={
    'Reward': 'clairvoyant_reward', 
    'True_Reward': 'clairvoyant_true_reward',
    'Action': 'clairvoyant_action'
})

# --- Step 2: Merge the clairvoyant rewards back into the original dataframe on artifact, Epoch, and t.
merged_df = df.merge(clairvoyant_df, on=['artifact', 'Epoch', 't'], how='left')

# --- Step 3: Compute regrets for both reward types
merged_df['regret_reward'] = merged_df['clairvoyant_reward'] - merged_df['Reward']
merged_df['regret_true']   = merged_df['clairvoyant_true_reward'] - merged_df['True_Reward']

# --- Step 4: Sort the dataframe to ensure correct ordering for cumulative sums (including agent in the sort)
merged_df = merged_df.sort_values(by=['artifact', 'Epoch', 'agent', 't'])

# --- Step 5: Compute cumulative sum of regret per episode (for each artifact, Epoch, and agent)
merged_df['cumsum_regret_reward_episode'] = merged_df.groupby(['artifact', 'Epoch', 'agent'])['regret_reward'].cumsum()
merged_df['cumsum_regret_true_episode']   = merged_df.groupby(['artifact', 'Epoch', 'agent'])['regret_true'].cumsum()

# --- Step 6: Compute cumulative sum of regret over all epochs for each agent (within each artifact)
merged_df['cumsum_regret_reward_all'] = merged_df.groupby(['artifact', 'agent'])['regret_reward'].cumsum()
merged_df['cumsum_regret_true_all']   = merged_df.groupby(['artifact', 'agent'])['regret_true'].cumsum()



In [59]:
vis = merged_df[['artifact', 'Epoch', 'agent', 't', 'cumsum_regret_reward_episode', 'cumsum_regret_true_episode', 'cumsum_regret_reward_all', 'cumsum_regret_true_all', 'clairvoyant_action', 'Action']]
vis = vis[vis['agent'] != 'Greedy']

In [73]:
colors = { "ILQX": "#B66D0D", "SAC": "#1982C4", "TS": "#3A3335", "Clairvoyant": "#D81E5B"}

In [None]:
from lets_plot import *
import pandas as pd

# Initialize Lets-Plot
LetsPlot.setup_html()

# Group by 'Epoch' and 'agent' and get the last value for each group
last_values = vis.groupby(['Epoch', 'agent']).last().reset_index()
# Filter out the agent 'Clairvoyant'
last_values['agent'] = pd.Categorical(last_values['agent'], categories=['Clairvoyant', 'SAC', 'ILQX', 'TS'], ordered=True)
last_values = last_values.sort_values('agent')

# Create the plot with the specified colors
plot = ggplot(last_values, aes(x='Epoch', y='cumsum_regret_true_all', color='agent')) + \
    geom_line() + \
    scale_color_manual(values=colors) + \
    labs(x='N', y='Meta Regret') + \
    ggsize(1000, 400)
ggsave(plot, filename='lets-plot-images/meta_regret_plot.svg')
plot.show()

In [88]:

# Group by 'Epoch' and 'agent' and get the last value for each group
last_values = vis.groupby(['Epoch', 'agent']).last().reset_index()
last_values = last_values[last_values['Epoch'] < 200]
last_values['agent'] = pd.Categorical(last_values['agent'], categories=['Clairvoyant', 'SAC', 'ILQX', 'TS'], ordered=True)
last_values = last_values.sort_values('agent')
# Create the plot with the specified colors and a wider aspect ratio
plot = ggplot(last_values, aes(x='Epoch', y='cumsum_regret_true_all', color='agent')) + \
    geom_line() + \
    scale_color_manual(values=colors) + \
    labs(x='N', y='Meta Regret') + \
    ggsize(1000, 400)
ggsave(plot, filename='lets-plot-images/meta_regret_short_plot.svg')
plot.show()

In [91]:
# Remove the "Clairvoyant" key from the colors dictionary
colors_without_clairvoyant = {k: v for k, v in colors.items() if k != "Clairvoyant"}
last_values = vis.groupby(['Epoch', 'agent']).last().reset_index()
last_values = last_values[last_values['Epoch'] < 200]
# Filter out the Clairvoyant agent
filtered_last_values = last_values[last_values['agent'] != 'Clairvoyant']

# Re-run the plotting code
plot = ggplot(filtered_last_values, aes(x='Epoch', y='cumsum_regret_true_episode', color='agent')) + \
    geom_point() + \
    scale_color_manual(values=colors_without_clairvoyant) + \
    labs(x='N', y='Regret at n', color='Agent') + \
    ggsize(1000, 400)
ggsave(plot, filename='lets-plot-images/scatter_regret_plot.svg')
plot.show()

In [95]:
# Remove the "Clairvoyant" key from the colors dictionary
colors_without_clairvoyant = {k: v for k, v in colors.items() if k != "TS" or k != "ILQX"}

# Add a column to categorize epochs into four groups
filtered_agents_first_100_epochs['Epoch_Group'] = pd.cut(
    filtered_agents_first_100_epochs['Epoch'],
    bins=[-1, 25, 50, 75, 100],
    labels=['0-25', '26-50', '51-75', '76-100+']
)

# Create the facet plot with y-axis reset for each facet
plot = ggplot(filtered_agents_first_100_epochs, aes(x='Action', y='Epoch', fill='agent')) + \
    geom_area_ridges() + \
    facet_wrap('Epoch_Group', ncol=4, scales='free_y') + \
    labs(x='Charged Prices', y='N', fill='Agent') + \
    scale_color_manual(values=colors_without_clairvoyant) + \
    ggsize(1500, 800)
ggsave(plot, filename='lets-plot-images/ridge_action_plot.svg')
plot.show()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_agents_first_100_epochs['Epoch_Group'] = pd.cut(
