In [None]:
%load_ext autoreload
%autoreload 2


import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf
import numpy as np
import matplotlib.font_manager as fm
import statsmodels.formula.api as smf
from data.loader import load_data
import json
from sklearn.metrics import mean_absolute_error
from pathlib import Path

### Matplotlib config

In [None]:
fm.fontManager.addfont("/usr/share/fonts/truetype/cmu/cmunrm.ttf")   # regular
fm.fontManager.addfont("/usr/share/fonts/truetype/cmu/cmunbx.ttf")   # bold
bold_font = fm.FontProperties(fname="/usr/share/fonts/truetype/cmu/cmunbx.ttf")

In [None]:
cmu_serif = fm.FontProperties(fname="/usr/share/fonts/truetype/cmu/cmunrm.ttf").get_name()
print("Font name:", cmu_serif)  # should be "CMU Serif"

In [None]:
plt.rcParams.update({
    "text.usetex": False,  # Enable LaTeX
    "mathtext.fontset": "cm",  # Use Computer Modern (LaTeX default)
    "font.family": cmu_serif,
    "font.size": 16,         # Base font size
    "axes.titlesize": 18,    # Title font size
    "axes.labelsize": 16,    # Axis label font size
    "xtick.labelsize": 14,   # X-axis tick font size
    "ytick.labelsize": 14,   # Y-axis tick font size
    "legend.fontsize": 16    # Legend font size
})

In [None]:
all_ratings = {}

### Dialogue eval

In [None]:
ratings = pd.read_csv("./results/interview_ratings.csv")
ratings_original = ratings[~ratings.role.str.contains("shuffle")].copy()
ratings_shuffled = ratings[ratings.role.str.contains("shuffle")].copy()
ratings_original["Selene-1-Mini-Llama-3.1-8B"] = (ratings_original["Selene-1-Mini-Llama-3.1-8B"] + ratings_shuffled["Selene-1-Mini-Llama-3.1-8B"].values)/2
persona_directed_all = ratings_original

In [None]:
ratings = pd.read_csv("./results/instructions_ratings.csv")
ratings_original = ratings[~ratings.role.str.contains("shuffle")].copy()
ratings_shuffled = ratings[ratings.role.str.contains("shuffle")].copy()
ratings_original["Selene-1-Mini-Llama-3.1-8B"] = (ratings_original["Selene-1-Mini-Llama-3.1-8B"] + ratings_shuffled["Selene-1-Mini-Llama-3.1-8B"].values)/2
goal_oriented_all = ratings_original

In [None]:
persona_directed = persona_directed_all.groupby(['metric', 'role', 'idx']).mean(numeric_only=True).reset_index()
goal_oriented = goal_oriented_all.groupby(['metric', 'role', 'idx']).mean(numeric_only=True).reset_index()

In [None]:
# Define your fixed indices
fixed_indices = [0, 11, 22, 34, 45, 56, 68, 79, 90, 101]

def smooth_segmented(x, y, indices):
    x_smooth, y_smooth = [], []
    for i, idx in enumerate(indices):
        if i == 0:
            # First point: take exact y at x == 0
            if idx in x:
                y_val = y[x == idx].mean()  # in case multiple values at x=0
            else:
                y_val = np.nan
            x_smooth.append(idx)
            y_smooth.append(y_val)
        else:
            start = indices[i-1] + 1
            end = idx
            mask = (x >= start) & (x <= end)
            if mask.sum() > 0:
                x_smooth.append(idx)
                y_smooth.append(y[mask].mean())
    return np.array(x_smooth), np.array(y_smooth)

# Create a single figure with three subplots
fig, axes = plt.subplots(1, 3, figsize=(18, 6), sharey=True)

# Group data by metric
metric_groups = (
    persona_directed[~persona_directed.role.str.contains("empty")]
    .groupby(['metric', 'idx'])['Selene-1-Mini-Llama-3.1-8B']
    .mean()
    .reset_index()
    .groupby("metric")
)

# Plotting loop
for i, (metric, group_df) in enumerate(metric_groups):
    ax = axes[i]
    ax.set_title(metric.capitalize().replace("_", "-") + " (↑)", fontproperties=bold_font, fontsize=16)
    ax.set_xlabel('Conversation round')
    ax.grid(True)
    
    # Persona-directed (persona)
    x = group_df['idx'].values
    y = group_df['Selene-1-Mini-Llama-3.1-8B'].values
    x_smooth, y_smooth = smooth_segmented(x, y, fixed_indices)
    ax.plot(x, y, marker='o', color="orange", alpha=0.3, label='_nolegend_')
    ax.plot(x_smooth, y_smooth, marker='o', color="orange", linestyle='-', label='Persona-directed (persona)')

    # Goal-oriented (persona)
    group_df2 = (
        goal_oriented[~goal_oriented.role.str.contains("empty")]
        [goal_oriented.metric == metric]
        .groupby("idx")["Selene-1-Mini-Llama-3.1-8B"].mean()
    )
    x = group_df2.index.values
    y = group_df2.values
    x_smooth, y_smooth = smooth_segmented(x, y, fixed_indices)
    ax.plot(x, y, marker='x', color="blue", alpha=0.3, label='_nolegend_')
    ax.plot(x_smooth, y_smooth, marker='x', color="blue", linestyle='-', label='Goal-oriented (persona)')
    
    # Persona-directed (baseline)
    empty_results = (
        persona_directed[persona_directed.metric == metric]
        [persona_directed.role.str.contains("empty")]
        .groupby("idx")["Selene-1-Mini-Llama-3.1-8B"].mean()
    )
    x = empty_results.index.values
    y = empty_results.values
    x_smooth, y_smooth = smooth_segmented(x, y, fixed_indices)
    ax.plot(x, y, marker='o', color="orange", alpha=0.3, linestyle='--',label='_nolegend_')
    ax.plot(x_smooth, y_smooth, marker='o', color="orange", linestyle='--', label='Persona-directed (baseline)')


    # Goal-oriented (baseline)
    empty_results2 = (
        goal_oriented[goal_oriented.metric == metric]
        [goal_oriented.role.str.contains("empty")]
        .groupby("idx")["Selene-1-Mini-Llama-3.1-8B"].mean()
    )
    x = empty_results2.index.values
    y = empty_results2.values
    x_smooth, y_smooth = smooth_segmented(x, y, fixed_indices)
    ax.plot(x, y, marker='x', color="blue", alpha=0.3,linestyle='--', label='_nolegend_')
    ax.plot(x_smooth, y_smooth, marker='x', color="blue", linestyle='--', label='Goal-oriented (baseline)')
    #ax.axvline(14)

# Set common y-axis label
axes[0].set_ylabel('Rating')

# Create a single legend for the entire figure
lines, labels = [], []
for ax in axes:
    line, label = ax.get_legend_handles_labels()
    lines.extend(line)
    labels.extend(label)
unique_labels = dict(zip(labels, lines))
fig.legend(unique_labels.values(), unique_labels.keys(), loc='upper center', bbox_to_anchor=(0.5, 1.05), ncol=4)


plt.tight_layout(rect=[0, 0, 1, 0.95])

plt.show()

In [None]:
# Save the figure to PDF
fig.savefig('../persistent-personas-paper/media/dialogue_metrics.pdf', dpi=300, bbox_inches="tight")

In [None]:
persona_directed = persona_directed_all.rename(columns=lambda x: "persona" if x == "Selene-1-Mini-Llama-3.1-8B" else x)

In [None]:
persona_directed["goal"] = goal_oriented_all["Selene-1-Mini-Llama-3.1-8B"]

In [None]:
persona_directed.role = persona_directed.role.apply(lambda x: x.replace("interview_", "").replace("instructions_", "").replace(".json.csv", ""))

In [None]:
all_ratings["dialogue"] = persona_directed

In [None]:
persona_directed["diff"] = persona_directed["persona"] - persona_directed["goal"]

In [None]:
persona_directed = persona_directed[~persona_directed.role.str.contains("empty")]

In [None]:
pd.set_option('display.max_rows', 102)

In [None]:
np.random.seed(42)

# Define the number of bootstrap samples
n_bootstraps = 1000

# Create an empty list to store the results for each 'pos'
results = []

# Group the DataFrame by 'pos' and iterate through the groups
grouped_pos = persona_directed[persona_directed.metric=="style"].groupby('idx')

for pos, group in grouped_pos:
    # Get the 'MAE_difference' values for the current group
    diffs = group['diff']

    # Create an empty list to store the bootstrap means for this 'pos'
    bootstrap_means = []

    # Perform bootstrapping
    for _ in range(n_bootstraps):
        resampled_diffs = diffs.sample(n=len(diffs), replace=True)
        bootstrap_means.append(resampled_diffs.mean())

    # Calculate the confidence interval
    lower_bound = np.percentile(bootstrap_means, 2.5)
    upper_bound = np.percentile(bootstrap_means, 97.5)

    # Append the results to the list
    results.append({
        'pos': pos,
        'mean_difference': np.mean(diffs),
        'lower_bound_95_ci': lower_bound,
        'upper_bound_95_ci': upper_bound
    })

# Convert the results list to a DataFrame for easy viewing
results_df = pd.DataFrame(results)
display((results_df["lower_bound_95_ci"]*results_df["upper_bound_95_ci"] > 0) & (results_df["mean_difference"] >0)  )

In [None]:
plt.figure(figsize=(8,3))
df = results_df
# Plot the mean differences as a line
plt.plot(df['pos'], df['mean_difference'], color='magenta')

# Fill the area for the 95% confidence interval
plt.fill_between(df['pos'], df['lower_bound_95_ci'], df['upper_bound_95_ci'], color='pink', alpha=0.4)

# Add labels and title
plt.xlabel('Conversation round', fontsize=12)
plt.ylabel('Avg. Difference', fontsize=12)
plt.title('Avg. style difference (Persona-directed - Goal-oriented)', fontproperties=bold_font, fontsize=14)
plt.axhline(y=0, color="black", linestyle="--")

# Add a grid for better readability
plt.grid(True, which='both', linestyle='--', linewidth=0.5)

# Add a legend
#plt.legend()
plt.savefig('../persistent-personas-paper/media/style_dialogue_diff.pdf', dpi=300, bbox_inches="tight")

In [None]:
np.random.seed(42)

# Define the number of bootstrap samples
n_bootstraps = 1000

# Create an empty list to store the results for each 'pos'
results = []

# Group the DataFrame by 'pos' and iterate through the groups
grouped_pos = persona_directed[persona_directed.metric=="knowledge"].groupby('idx')

for pos, group in grouped_pos:
    # Get the 'MAE_difference' values for the current group
    diffs = group['diff']

    # Create an empty list to store the bootstrap means for this 'pos'
    bootstrap_means = []

    # Perform bootstrapping
    for _ in range(n_bootstraps):
        resampled_diffs = diffs.sample(n=len(diffs), replace=True)
        bootstrap_means.append(resampled_diffs.mean())

    # Calculate the confidence interval
    lower_bound = np.percentile(bootstrap_means, 2.5)
    upper_bound = np.percentile(bootstrap_means, 97.5)

    # Append the results to the list
    results.append({
        'pos': pos,
        'mean_difference': np.mean(diffs),
        'lower_bound_95_ci': lower_bound,
        'upper_bound_95_ci': upper_bound
    })

# Convert the results list to a DataFrame for easy viewing
results_df = pd.DataFrame(results)
display((results_df["lower_bound_95_ci"]*results_df["upper_bound_95_ci"] > 0) & (results_df["mean_difference"] >0)  )

In [None]:
plt.figure(figsize=(8,3))
df = results_df
# Plot the mean differences as a line
plt.plot(df['pos'], df['mean_difference'], color='magenta')

# Fill the area for the 95% confidence interval
plt.fill_between(df['pos'], df['lower_bound_95_ci'], df['upper_bound_95_ci'], color='pink', alpha=0.4)

# Add labels and title
plt.xlabel('Conversation round', fontsize=12)
plt.ylabel('Avg. Difference', fontsize=12)
plt.title('Avg. knowledge difference (Persona-directed - Goal-oriented)', fontproperties=bold_font, fontsize=14)

# Add a grid for better readability
plt.grid(True, which='both', linestyle='--', linewidth=0.5)
plt.axhline(y=0, color="black", linestyle="--")

# Add a legend
#plt.legend()
plt.savefig('../persistent-personas-paper/media/knowledge_dialogue_diff.pdf', dpi=300, bbox_inches="tight")

In [None]:
np.random.seed(42)

# Define the number of bootstrap samples
n_bootstraps = 1000

# Create an empty list to store the results for each 'pos'
results = []

# Group the DataFrame by 'pos' and iterate through the groups
grouped_pos = persona_directed[persona_directed.metric=="in_character"].groupby('idx')

for pos, group in grouped_pos:
    # Get the 'MAE_difference' values for the current group
    diffs = group['diff']

    # Create an empty list to store the bootstrap means for this 'pos'
    bootstrap_means = []

    # Perform bootstrapping
    for _ in range(n_bootstraps):
        resampled_diffs = diffs.sample(n=len(diffs), replace=True)
        bootstrap_means.append(resampled_diffs.mean())

    # Calculate the confidence interval
    lower_bound = np.percentile(bootstrap_means, 2.5)
    upper_bound = np.percentile(bootstrap_means, 97.5)

    # Append the results to the list
    results.append({
        'pos': pos,
        'mean_difference': np.mean(diffs),
        'lower_bound_95_ci': lower_bound,
        'upper_bound_95_ci': upper_bound
    })

# Convert the results list to a DataFrame for easy viewing
results_df = pd.DataFrame(results)
display((results_df["lower_bound_95_ci"]*results_df["upper_bound_95_ci"] > 0) & (results_df["mean_difference"] >0)  )

In [None]:
plt.figure(figsize=(8, 3))
df = results_df
# Plot the mean differences as a line
plt.plot(df['pos'], df['mean_difference'], color='magenta')

# Fill the area for the 95% confidence interval
plt.fill_between(df['pos'], df['lower_bound_95_ci'], df['upper_bound_95_ci'], color='pink', alpha=0.4)

# Add labels and title
plt.xlabel('Conversation round', fontsize=12)
plt.ylabel('Avg. Difference', fontsize=12)
plt.title('Avg. in-character difference (Persona-directed - Goal-oriented)', fontproperties=bold_font, fontsize=14)

# Add a grid for better readability
plt.grid(True, which='both', linestyle='--', linewidth=0.5)
plt.axhline(y=0, color="black", linestyle="--")

# Add a legend
#plt.legend()
plt.savefig('../persistent-personas-paper/media/in_character_dialogue_diff.pdf', dpi=300, bbox_inches="tight")

In [None]:
df = persona_directed.copy()

In [None]:
baseline_df = df[df['idx'] == 0].set_index('role')

# Use merge to join the baseline values back to the original DataFrame
df_merged = pd.merge(df, baseline_df, how="left", on=['role', "metric", "model"], suffixes=('', '_baseline'))

# Calculate the difference for each column
df_merged['persona_diff_from_idx_0'] = df_merged['persona'] - df_merged['persona_baseline']
df_merged['goal_diff_from_idx_0'] = df_merged['goal'] - df_merged['goal_baseline']

In [None]:
np.random.seed(42)

# Define the number of bootstrap samples
n_bootstraps = 1000

# Create an empty list to store the results for each 'pos'
results = []

# Group the DataFrame by 'pos' and iterate through the groups
grouped_pos = df_merged[df_merged.metric=="style"].groupby('idx')

for pos, group in grouped_pos:
    # Get the 'MAE_difference' values for the current group
    diffs = group['persona_diff_from_idx_0']

    # Create an empty list to store the bootstrap means for this 'pos'
    bootstrap_means = []

    # Perform bootstrapping
    for _ in range(n_bootstraps):
        resampled_diffs = diffs.sample(n=len(diffs), replace=True)
        bootstrap_means.append(resampled_diffs.mean())

    # Calculate the confidence interval
    lower_bound = np.percentile(bootstrap_means, 2.5)
    upper_bound = np.percentile(bootstrap_means, 97.5)

    # Append the results to the list
    results.append({
        'pos': pos,
        'mean_difference': np.mean(diffs),
        'lower_bound_95_ci': lower_bound,
        'upper_bound_95_ci': upper_bound
    })

# Convert the results list to a DataFrame for easy viewing
results_df = pd.DataFrame(results)
display((results_df["lower_bound_95_ci"]*results_df["upper_bound_95_ci"] > 0) & (results_df["mean_difference"] <0)  )
persona_df = results_df

In [None]:
np.random.seed(42)

# Define the number of bootstrap samples
n_bootstraps = 1000

# Create an empty list to store the results for each 'pos'
results = []

# Group the DataFrame by 'pos' and iterate through the groups
grouped_pos = df_merged[df_merged.metric=="style"].groupby('idx')

for pos, group in grouped_pos:
    # Get the 'MAE_difference' values for the current group
    diffs = group['goal_diff_from_idx_0']

    # Create an empty list to store the bootstrap means for this 'pos'
    bootstrap_means = []

    # Perform bootstrapping
    for _ in range(n_bootstraps):
        resampled_diffs = diffs.sample(n=len(diffs), replace=True)
        bootstrap_means.append(resampled_diffs.mean())

    # Calculate the confidence interval
    lower_bound = np.percentile(bootstrap_means, 2.5)
    upper_bound = np.percentile(bootstrap_means, 97.5)

    # Append the results to the list
    results.append({
        'pos': pos,
        'mean_difference': np.mean(diffs),
        'lower_bound_95_ci': lower_bound,
        'upper_bound_95_ci': upper_bound
    })

# Convert the results list to a DataFrame for easy viewing
results_df = pd.DataFrame(results)
display((results_df["lower_bound_95_ci"]*results_df["upper_bound_95_ci"] > 0) & (results_df["mean_difference"] <0)  )
goal_df = results_df

In [None]:
plt.figure(figsize=(8, 3))
df = results_df
# Plot the mean differences as a line
plt.plot(persona_df['pos'], persona_df['mean_difference'], color='orange', label="Persona-directed")

# Fill the area for the 95% confidence interval
plt.fill_between(persona_df['pos'], persona_df['lower_bound_95_ci'], persona_df['upper_bound_95_ci'], color='orange', alpha=0.4)

# Plot the mean differences as a line
plt.plot(goal_df['pos'], goal_df['mean_difference'], color='blue', label="Goal Oriented")

# Fill the area for the 95% confidence interval
plt.fill_between(goal_df['pos'], goal_df['lower_bound_95_ci'], goal_df['upper_bound_95_ci'], color='blue', alpha=0.4)

# Add labels and title
plt.xlabel('Conversation round', fontsize=12)
plt.ylabel('Avg. Difference', fontsize=12)
plt.title('Avg. style progression', fontproperties=bold_font, fontsize=14)
plt.axhline(y=0, color="black", linestyle="--")

# Add a grid for better readability
plt.grid(True, which='both', linestyle='--', linewidth=0.5)

# Add a legend
plt.legend()
plt.savefig('../persistent-personas-paper/media/style_dialogue_degradation.pdf', dpi=300, bbox_inches="tight")

In [None]:
np.random.seed(42)

# Define the number of bootstrap samples
n_bootstraps = 1000

# Create an empty list to store the results for each 'pos'
results = []

# Group the DataFrame by 'pos' and iterate through the groups
grouped_pos = df_merged[df_merged.metric=="knowledge"].groupby('idx')

for pos, group in grouped_pos:
    # Get the 'MAE_difference' values for the current group
    diffs = group['persona_diff_from_idx_0']

    # Create an empty list to store the bootstrap means for this 'pos'
    bootstrap_means = []

    # Perform bootstrapping
    for _ in range(n_bootstraps):
        resampled_diffs = diffs.sample(n=len(diffs), replace=True)
        bootstrap_means.append(resampled_diffs.mean())

    # Calculate the confidence interval
    lower_bound = np.percentile(bootstrap_means, 2.5)
    upper_bound = np.percentile(bootstrap_means, 97.5)

    # Append the results to the list
    results.append({
        'pos': pos,
        'mean_difference': np.mean(diffs),
        'lower_bound_95_ci': lower_bound,
        'upper_bound_95_ci': upper_bound
    })

# Convert the results list to a DataFrame for easy viewing
results_df = pd.DataFrame(results)
display((results_df["lower_bound_95_ci"]*results_df["upper_bound_95_ci"] > 0) & (results_df["mean_difference"] <0)  )
persona_df = results_df

In [None]:
np.random.seed(42)

# Define the number of bootstrap samples
n_bootstraps = 1000

# Create an empty list to store the results for each 'pos'
results = []

# Group the DataFrame by 'pos' and iterate through the groups
grouped_pos = df_merged[df_merged.metric=="knowledge"].groupby('idx')

for pos, group in grouped_pos:
    # Get the 'MAE_difference' values for the current group
    diffs = group['goal_diff_from_idx_0']

    # Create an empty list to store the bootstrap means for this 'pos'
    bootstrap_means = []

    # Perform bootstrapping
    for _ in range(n_bootstraps):
        resampled_diffs = diffs.sample(n=len(diffs), replace=True)
        bootstrap_means.append(resampled_diffs.mean())

    # Calculate the confidence interval
    lower_bound = np.percentile(bootstrap_means, 2.5)
    upper_bound = np.percentile(bootstrap_means, 97.5)

    # Append the results to the list
    results.append({
        'pos': pos,
        'mean_difference': np.mean(diffs),
        'lower_bound_95_ci': lower_bound,
        'upper_bound_95_ci': upper_bound
    })

# Convert the results list to a DataFrame for easy viewing
results_df = pd.DataFrame(results)
display((results_df["lower_bound_95_ci"]*results_df["upper_bound_95_ci"] > 0) & (results_df["mean_difference"] <0)  )
goal_df = results_df

In [None]:
plt.figure(figsize=(8, 3))
df = results_df
# Plot the mean differences as a line
plt.plot(persona_df['pos'], persona_df['mean_difference'], color='orange', label="Persona-directed")

# Fill the area for the 95% confidence interval
plt.fill_between(persona_df['pos'], persona_df['lower_bound_95_ci'], persona_df['upper_bound_95_ci'], color='orange', alpha=0.4)

# Plot the mean differences as a line
plt.plot(goal_df['pos'], goal_df['mean_difference'], color='blue', label="Goal Oriented")

# Fill the area for the 95% confidence interval
plt.fill_between(goal_df['pos'], goal_df['lower_bound_95_ci'], goal_df['upper_bound_95_ci'], color='blue', alpha=0.4)

# Add labels and title
plt.xlabel('Conversation round', fontsize=12)
plt.ylabel('Avg. Difference', fontsize=12)
plt.title('Avg. knowledge progression', fontproperties=bold_font, fontsize=14)
plt.axhline(y=0, color="black", linestyle="--")

# Add a grid for better readability
plt.grid(True, which='both', linestyle='--', linewidth=0.5)

# Add a legend
plt.legend()
plt.savefig('../persistent-personas-paper/media/knowledge_dialogue_degradation.pdf', dpi=300, bbox_inches="tight")

In [None]:
np.random.seed(42)

# Define the number of bootstrap samples
n_bootstraps = 1000

# Create an empty list to store the results for each 'pos'
results = []

# Group the DataFrame by 'pos' and iterate through the groups
grouped_pos = df_merged[df_merged.metric=="in_character"].groupby('idx')

for pos, group in grouped_pos:
    # Get the 'MAE_difference' values for the current group
    diffs = group['persona_diff_from_idx_0']

    # Create an empty list to store the bootstrap means for this 'pos'
    bootstrap_means = []

    # Perform bootstrapping
    for _ in range(n_bootstraps):
        resampled_diffs = diffs.sample(n=len(diffs), replace=True)
        bootstrap_means.append(resampled_diffs.mean())

    # Calculate the confidence interval
    lower_bound = np.percentile(bootstrap_means, 2.5)
    upper_bound = np.percentile(bootstrap_means, 97.5)

    # Append the results to the list
    results.append({
        'pos': pos,
        'mean_difference': np.mean(diffs),
        'lower_bound_95_ci': lower_bound,
        'upper_bound_95_ci': upper_bound
    })

# Convert the results list to a DataFrame for easy viewing
results_df = pd.DataFrame(results)
display((results_df["lower_bound_95_ci"]*results_df["upper_bound_95_ci"] > 0) & (results_df["mean_difference"] <0)  )
persona_df = results_df

In [None]:
np.random.seed(42)

# Define the number of bootstrap samples
n_bootstraps = 1000

# Create an empty list to store the results for each 'pos'
results = []

# Group the DataFrame by 'pos' and iterate through the groups
grouped_pos = df_merged[df_merged.metric=="in_character"].groupby('idx')

for pos, group in grouped_pos:
    # Get the 'MAE_difference' values for the current group
    diffs = group['goal_diff_from_idx_0']

    # Create an empty list to store the bootstrap means for this 'pos'
    bootstrap_means = []

    # Perform bootstrapping
    for _ in range(n_bootstraps):
        resampled_diffs = diffs.sample(n=len(diffs), replace=True)
        bootstrap_means.append(resampled_diffs.mean())

    # Calculate the confidence interval
    lower_bound = np.percentile(bootstrap_means, 2.5)
    upper_bound = np.percentile(bootstrap_means, 97.5)

    # Append the results to the list
    results.append({
        'pos': pos,
        'mean_difference': np.mean(diffs),
        'lower_bound_95_ci': lower_bound,
        'upper_bound_95_ci': upper_bound
    })

# Convert the results list to a DataFrame for easy viewing
results_df = pd.DataFrame(results)
display((results_df["lower_bound_95_ci"]*results_df["upper_bound_95_ci"] > 0) & (results_df["mean_difference"] <0)  )
goal_df = results_df

In [None]:
plt.figure(figsize=(8, 3))
df = results_df
# Plot the mean differences as a line
plt.plot(persona_df['pos'], persona_df['mean_difference'], color='orange', label="Persona-directed")

# Fill the area for the 95% confidence interval
plt.fill_between(persona_df['pos'], persona_df['lower_bound_95_ci'], persona_df['upper_bound_95_ci'], color='orange', alpha=0.4)

# Plot the mean differences as a line
plt.plot(goal_df['pos'], goal_df['mean_difference'], color='blue', label="Goal Oriented")

# Fill the area for the 95% confidence interval
plt.fill_between(goal_df['pos'], goal_df['lower_bound_95_ci'], goal_df['upper_bound_95_ci'], color='blue', alpha=0.4)

# Add labels and title
plt.xlabel('Conversation round', fontsize=12)
plt.ylabel('Avg. Difference', fontsize=12)
plt.title('Avg. in-character progression', fontproperties=bold_font, fontsize=14)

# Add a grid for better readability
plt.grid(True, which='both', linestyle='--', linewidth=0.5)
plt.axhline(y=0, color="black", linestyle="--")

# Add a legend
plt.legend()
plt.savefig('../persistent-personas-paper/media/in_character_dialogue_degradation.pdf', dpi=300, bbox_inches="tight")

# BFI

In [None]:
ratings = pd.read_csv("./results/bfi_ratings.csv")

labels = json.load(open("./data/characters_labels.json", "r"))

bfi = json.load(open("./data/BFI.json", "r"))

dimensions =[x["dimension"] for x in  bfi["questions"].values()]

ratings["dimension"]= ratings.idx.apply(lambda x: dimensions[x])

ratings["base_conversation"] = ratings.role.apply(lambda x: "instructions" if "inst-" in x else "interview")

ratings["pos"] = ratings.role.str[:-9].str.split("_").str[-1].astype(int)

ratings

ratings["role"] = ratings["role"].str.rsplit("_", n=1).str[0]

ratings["role"] = ratings["role"].str.split("_",n=1).str[-1].str.replace("_", " ")

ratings = ratings.groupby(['model', 'role', 'pos', "base_conversation", 'dimension'])['rating'].mean().unstack(fill_value=0).reset_index()

In [None]:
ratings["MAE_empty"] = ratings.apply(lambda x: mean_absolute_error(x[dimensions],
                                          ratings[ratings.model==x["model"]][ratings.role=="empty"][ratings.pos==x["pos"]][ratings.base_conversation == x["base_conversation"]].iloc[0][dimensions])/4,axis=1)

In [None]:
ratings_avg = ratings.groupby(['role', 'pos', 'base_conversation']).mean(numeric_only=True).reset_index()

In [None]:
labelled_ratings = ratings[(ratings.role.isin(labels.keys())) | (ratings.role.str.contains("empty"))].copy()

In [None]:
dimensions = ["Agreeableness", "Conscientiousness", "Extraversion", "Neuroticism", "Openness"]
labelled_ratings.loc[~labelled_ratings.role.str.contains("empty"),"MAE"] = labelled_ratings[~labelled_ratings.role.str.contains("empty")].apply(lambda x: mean_absolute_error(x[dimensions],
                                          [labels[x["role"]]["BFI"][dim]["score"] for dim in dimensions])/4,axis=1)

In [None]:
labelled_ratings.loc[labelled_ratings.role.str.contains("empty"),"MAE"] = labelled_ratings[labelled_ratings.role.str.contains("empty")].apply(lambda x: {role: mean_absolute_error(x[dimensions],
                                          [labels[role]["BFI"][dim]["score"] for dim in dimensions])/4 for role in labels.keys()},axis=1)

In [None]:
all_ratings["bfi"] = labelled_ratings

In [None]:
fig, axes = plt.subplots(2, 1, figsize=(8, 6), sharex=True)

# --- First Plot (Top Subplot) ---
ax1 = axes[0]
df = labelled_ratings[
    (labelled_ratings.base_conversation == "interview") &
    (~labelled_ratings.role.str.contains("empty"))
].groupby('pos')["MAE"].mean().reset_index()
ax1.plot(df['pos'], df["MAE"], color="orange", marker='o', label="Persona-directed")
df = labelled_ratings[
    (labelled_ratings.base_conversation == "interview") &
    (labelled_ratings.role.str.contains("empty"))
]
df["MAE"] = df.MAE.apply(lambda x: np.mean(list(x.values())))
df = df.groupby('pos')["MAE"].mean().reset_index()
display(df)
ax1.plot(df['pos'], df["MAE"], color="orange", marker='o', label="Persona-directed (baseline)", linestyle='--')

df = labelled_ratings[
    (labelled_ratings.base_conversation == "instructions") &
    (~labelled_ratings.role.str.contains("empty"))
].groupby('pos')["MAE"].mean().reset_index()
ax1.plot(df['pos'], df["MAE"], color="blue", marker='x', label="Goal-oriented")
df = labelled_ratings[
    (labelled_ratings.base_conversation == "instructions") &
    (labelled_ratings.role.str.contains("empty"))
]
df["MAE"] = df.MAE.apply(lambda x: np.mean(list(x.values())))
df = df.groupby('pos')["MAE"].mean().reset_index()
ax1.plot(df['pos'], df["MAE"], color="blue", marker='x', label="Goal-oriented (baseline)", linestyle='--')

ax1.set_title(f'Big Five Traits: Roles vs. Ground Truth (↓)', fontproperties=bold_font, fontsize=14)
ax1.set_ylabel('')
ax1.grid(True)

# --- Second Plot (Bottom Subplot) ---
ax2 = axes[1]
df = ratings[
    (ratings.base_conversation == "interview") &
    (~ratings.role.str.contains("empty"))
].groupby('pos')["MAE_empty"].mean().reset_index()
ax2.plot(df['pos'], df["MAE_empty"], marker='o', color="orange", label="Persona-directed")

df = ratings[
    (ratings.base_conversation == "instructions") &
    (~ratings.role.str.contains("empty"))
].groupby('pos')["MAE_empty"].mean().reset_index()
ax2.plot(df['pos'], df["MAE_empty"], marker='x', color="blue", label="Goal-oriented")

ax2.set_title(f'Big Five Traits: Roles vs. Baseline', fontproperties=bold_font, fontsize=14)
ax2.set_xlabel('Conversation round')
ax2.set_ylabel('')
ax2.grid(True)

fig.text(0.01, 0.5, 'Normalized mean absolute difference', va='center', rotation='vertical')

# --- Combined Legend and Layout ---
# Get handles and labels from one of the subplots (they are the same)
handles, labels = ax1.get_legend_handles_labels()
# Place the legend at the top of the entire figure
fig.legend(handles, labels, loc='upper center', bbox_to_anchor=(0.55, 1.09), ncol=2)

plt.tight_layout(rect=[0, 0, 1, 0.95])
plt.show()

In [None]:
fig.savefig('../persistent-personas-paper/media/bfi_mae.pdf', dpi=300, bbox_inches="tight")

In [None]:
pivoted_df = labelled_ratings[labelled_ratings.role != "empty"].pivot_table(index=['model', 'role', 'pos'],
                            columns='base_conversation',
                            values='MAE').reset_index()

# Calculate the difference (MAE_interview - MAE_instructions)
pivoted_df['MAE_difference'] = pivoted_df['interview'] - pivoted_df['instructions']

In [None]:
np.random.seed(42)

# Define the number of bootstrap samples
n_bootstraps = 1000

# Create an empty list to store the results for each 'pos'
results = []

# Group the DataFrame by 'pos' and iterate through the groups
grouped_pos = pivoted_df.groupby('pos')

for pos, group in grouped_pos:
    # Get the 'MAE_difference' values for the current group
    diffs = group['MAE_difference']

    # Create an empty list to store the bootstrap means for this 'pos'
    bootstrap_means = []

    # Perform bootstrapping
    for _ in range(n_bootstraps):
        resampled_diffs = diffs.sample(n=len(diffs), replace=True)
        bootstrap_means.append(resampled_diffs.mean())

    # Calculate the confidence interval
    lower_bound = np.percentile(bootstrap_means, 2.5)
    upper_bound = np.percentile(bootstrap_means, 97.5)

    # Append the results to the list
    results.append({
        'pos': pos,
        'mean_difference': np.mean(diffs),
        'lower_bound_95_ci': lower_bound,
        'upper_bound_95_ci': upper_bound
    })

# Convert the results list to a DataFrame for easy viewing
results_df = pd.DataFrame(results, index=np.unique(pivoted_df.pos))
df= results_df

In [None]:
plt.figure(figsize=(8, 3))
# Plot the mean differences as a line
plt.plot(df['pos'], df['mean_difference'], color='magenta')

# Fill the area for the 95% confidence interval
plt.fill_between(df['pos'], df['lower_bound_95_ci'], df['upper_bound_95_ci'], color='pink', alpha=0.4)


# Add labels and title
plt.xlabel('Conversation round', fontsize=12)
plt.ylabel('Avg. Difference', fontsize=12)
plt.title('Mean Absolute difference from Ground Truth (Persona-directed - Goal-oriented)', fontproperties=bold_font, fontsize=14)

# Add a grid for better readability
plt.grid(True, which='both', linestyle='--', linewidth=0.5)

# Add a legend
#plt.legend()
plt.savefig('../persistent-personas-paper/media/mae_dialogue_diff.pdf', dpi=300, bbox_inches="tight")
plt.axhline(y=0, color="black", linestyle="--")


In [None]:
pivoted_df = ratings[ratings.role != "empty"].pivot_table(index=['model', 'role', 'pos'],
                            columns='base_conversation',
                            values='MAE_empty').reset_index()

# Calculate the difference (MAE_interview - MAE_instructions)
pivoted_df['MAE_difference'] = pivoted_df['interview'] - pivoted_df['instructions']

In [None]:
np.random.seed(42)

# Define the number of bootstrap samples
n_bootstraps = 1000

# Create an empty list to store the results for each 'pos'
results = []

# Group the DataFrame by 'pos' and iterate through the groups
grouped_pos = pivoted_df.groupby('pos')

for pos, group in grouped_pos:
    # Get the 'MAE_difference' values for the current group
    diffs = group['MAE_difference']

    # Skip groups with too few samples to perform meaningful bootstrapping
    if len(diffs) < 2:
        print(f"Skipping 'pos' {pos} due to insufficient data.")
        continue

    # Create an empty list to store the bootstrap means for this 'pos'
    bootstrap_means = []

    # Perform bootstrapping
    for _ in range(n_bootstraps):
        resampled_diffs = diffs.sample(n=len(diffs), replace=True)
        bootstrap_means.append(resampled_diffs.mean())

    # Calculate the confidence interval
    lower_bound = np.percentile(bootstrap_means, 2.5)
    upper_bound = np.percentile(bootstrap_means, 97.5)

    # Append the results to the list
    results.append({
        'pos': pos,
        'mean_difference': np.mean(diffs),
        'lower_bound_95_ci': lower_bound,
        'upper_bound_95_ci': upper_bound
    })

# Convert the results list to a DataFrame for easy viewing
results_df = pd.DataFrame(results, index=np.unique(pivoted_df.pos))

df = results_df


In [None]:
plt.figure(figsize=(8, 3))
# Plot the mean differences as a line
plt.plot(df['pos'], df['mean_difference'], color='magenta')

# Fill the area for the 95% confidence interval
plt.fill_between(df['pos'], df['lower_bound_95_ci'], df['upper_bound_95_ci'], color='pink', alpha=0.4)


# Add labels and title
plt.xlabel('Conversation round', fontsize=12)
plt.ylabel('Avg. Difference', fontsize=12)
plt.title('Mean Absolute Difference from Baseline (Persona-directed - Goal-oriented)', fontproperties=bold_font, fontsize=14)

# Add a grid for better readability
plt.grid(True, which='both', linestyle='--', linewidth=0.5)
plt.axhline(y=0, color="black", linestyle="--")


# Add a legend
plt.legend()
plt.savefig('../persistent-personas-paper/media/mae_empty_dialogue_diff.pdf', dpi=300, bbox_inches="tight")

In [None]:
pivoted_df = labelled_ratings[labelled_ratings.role != "empty"].pivot_table(index=['model', 'role', 'pos'],
                            columns='base_conversation',
                            values='MAE').reset_index()

In [None]:
df = pivoted_df.copy()
baseline_df = df[df['pos'] == 0].set_index('role')

# Use merge to join the baseline values back to the original DataFrame
df_merged = pd.merge(df, baseline_df, how="left", on=['role', "model"], suffixes=('', '_baseline'))

# Calculate the difference for each column
df_merged['persona_diff_from_idx_0'] = df_merged['interview'] - df_merged['interview_baseline']
df_merged['goal_diff_from_idx_0'] = df_merged['instructions'] - df_merged['instructions_baseline']

In [None]:
np.random.seed(42)

# Define the number of bootstrap samples
n_bootstraps = 1000

# Create an empty list to store the results for each 'pos'
results = []

# Group the DataFrame by 'pos' and iterate through the groups
grouped_pos = df_merged.groupby('pos')

for pos, group in grouped_pos:
    # Get the 'MAE_difference' values for the current group
    diffs = group['persona_diff_from_idx_0']

    # Create an empty list to store the bootstrap means for this 'pos'
    bootstrap_means = []

    # Perform bootstrapping
    for _ in range(n_bootstraps):
        resampled_diffs = diffs.sample(n=len(diffs), replace=True)
        bootstrap_means.append(resampled_diffs.mean())

    # Calculate the confidence interval
    lower_bound = np.percentile(bootstrap_means, 2.5)
    upper_bound = np.percentile(bootstrap_means, 97.5)

    # Append the results to the list
    results.append({
        'pos': pos,
        'mean_difference': np.mean(diffs),
        'lower_bound_95_ci': lower_bound,
        'upper_bound_95_ci': upper_bound
    })

# Convert the results list to a DataFrame for easy viewing
results_df = pd.DataFrame(results)
display((results_df["lower_bound_95_ci"]*results_df["upper_bound_95_ci"] > 0) & (results_df["mean_difference"] <0)  )
persona_df = results_df

In [None]:
np.random.seed(42)

# Define the number of bootstrap samples
n_bootstraps = 1000

# Create an empty list to store the results for each 'pos'
results = []

# Group the DataFrame by 'pos' and iterate through the groups
grouped_pos = df_merged.groupby('pos')

for pos, group in grouped_pos:
    # Get the 'MAE_difference' values for the current group
    diffs = group['goal_diff_from_idx_0']

    # Create an empty list to store the bootstrap means for this 'pos'
    bootstrap_means = []

    # Perform bootstrapping
    for _ in range(n_bootstraps):
        resampled_diffs = diffs.sample(n=len(diffs), replace=True)
        bootstrap_means.append(resampled_diffs.mean())

    # Calculate the confidence interval
    lower_bound = np.percentile(bootstrap_means, 2.5)
    upper_bound = np.percentile(bootstrap_means, 97.5)

    # Append the results to the list
    results.append({
        'pos': pos,
        'mean_difference': np.mean(diffs),
        'lower_bound_95_ci': lower_bound,
        'upper_bound_95_ci': upper_bound
    })

# Convert the results list to a DataFrame for easy viewing
results_df = pd.DataFrame(results)
display((results_df["lower_bound_95_ci"]*results_df["upper_bound_95_ci"] > 0) & (results_df["mean_difference"] <0)  )
goal_df = results_df

In [None]:
plt.figure(figsize=(8,3))
df = results_df
# Plot the mean differences as a line
plt.plot(persona_df['pos'], persona_df['mean_difference'], color='orange', label="Persona-directed")

# Fill the area for the 95% confidence interval
plt.fill_between(persona_df['pos'], persona_df['lower_bound_95_ci'], persona_df['upper_bound_95_ci'], color='orange', alpha=0.4)

# Plot the mean differences as a line
plt.plot(goal_df['pos'], goal_df['mean_difference'], color='blue', label="Goal Oriented")

# Fill the area for the 95% confidence interval
plt.fill_between(goal_df['pos'], goal_df['lower_bound_95_ci'], goal_df['upper_bound_95_ci'], color='blue', alpha=0.4)

# Add labels and title
plt.xlabel('Conversation round', fontsize=12)
plt.ylabel('Avg. Difference', fontsize=12)
plt.title('Mean Absolute Difference from Ground Truth progression', fontproperties=bold_font, fontsize=14)
plt.axhline(y=0, color="black", linestyle="--")

# Add a grid for better readability
plt.grid(True, which='both', linestyle='--', linewidth=0.5)

# Add a legend
plt.legend()
plt.savefig('../persistent-personas-paper/media/mae_ground_truth_degradation.pdf', dpi=300, bbox_inches="tight")

In [None]:
pivoted_df = ratings[ratings.role != "empty"].pivot_table(index=['model', 'role', 'pos'],
                            columns='base_conversation',
                            values='MAE_empty').reset_index()

In [None]:
df = pivoted_df.copy()
baseline_df = df[df['pos'] == 0].set_index('role')

# Use merge to join the baseline values back to the original DataFrame
df_merged = pd.merge(df, baseline_df, how="left", on=['role', "model"], suffixes=('', '_baseline'))

# Calculate the difference for each column
df_merged['persona_diff_from_idx_0'] = df_merged['interview'] - df_merged['interview_baseline']
df_merged['goal_diff_from_idx_0'] = df_merged['instructions'] - df_merged['instructions_baseline']

In [None]:
np.random.seed(42)

# Define the number of bootstrap samples
n_bootstraps = 1000

# Create an empty list to store the results for each 'pos'
results = []

# Group the DataFrame by 'pos' and iterate through the groups
grouped_pos = df_merged.groupby('pos')

for pos, group in grouped_pos:
    # Get the 'MAE_difference' values for the current group
    diffs = group['persona_diff_from_idx_0']

    # Create an empty list to store the bootstrap means for this 'pos'
    bootstrap_means = []

    # Perform bootstrapping
    for _ in range(n_bootstraps):
        resampled_diffs = diffs.sample(n=len(diffs), replace=True)
        bootstrap_means.append(resampled_diffs.mean())

    # Calculate the confidence interval
    lower_bound = np.percentile(bootstrap_means, 2.5)
    upper_bound = np.percentile(bootstrap_means, 97.5)

    # Append the results to the list
    results.append({
        'pos': pos,
        'mean_difference': np.mean(diffs),
        'lower_bound_95_ci': lower_bound,
        'upper_bound_95_ci': upper_bound
    })

# Convert the results list to a DataFrame for easy viewing
results_df = pd.DataFrame(results)
display((results_df["lower_bound_95_ci"]*results_df["upper_bound_95_ci"] > 0) & (results_df["mean_difference"] <0)  )
persona_df = results_df

In [None]:
np.random.seed(42)

# Define the number of bootstrap samples
n_bootstraps = 1000

# Create an empty list to store the results for each 'pos'
results = []

# Group the DataFrame by 'pos' and iterate through the groups
grouped_pos = df_merged.groupby('pos')

for pos, group in grouped_pos:
    # Get the 'MAE_difference' values for the current group
    diffs = group['goal_diff_from_idx_0']

    # Create an empty list to store the bootstrap means for this 'pos'
    bootstrap_means = []

    # Perform bootstrapping
    for _ in range(n_bootstraps):
        resampled_diffs = diffs.sample(n=len(diffs), replace=True)
        bootstrap_means.append(resampled_diffs.mean())

    # Calculate the confidence interval
    lower_bound = np.percentile(bootstrap_means, 2.5)
    upper_bound = np.percentile(bootstrap_means, 97.5)

    # Append the results to the list
    results.append({
        'pos': pos,
        'mean_difference': np.mean(diffs),
        'lower_bound_95_ci': lower_bound,
        'upper_bound_95_ci': upper_bound
    })

# Convert the results list to a DataFrame for easy viewing
results_df = pd.DataFrame(results)
display((results_df["lower_bound_95_ci"]*results_df["upper_bound_95_ci"] > 0) & (results_df["mean_difference"] <0)  )
goal_df = results_df

In [None]:
plt.figure(figsize=(8, 3))
df = results_df
# Plot the mean differences as a line
plt.plot(persona_df['pos'], persona_df['mean_difference'], color='orange', label="Persona-directed")

# Fill the area for the 95% confidence interval
plt.fill_between(persona_df['pos'], persona_df['lower_bound_95_ci'], persona_df['upper_bound_95_ci'], color='orange', alpha=0.4)

# Plot the mean differences as a line
plt.plot(goal_df['pos'], goal_df['mean_difference'], color='blue', label="Goal Oriented")

# Fill the area for the 95% confidence interval
plt.fill_between(goal_df['pos'], goal_df['lower_bound_95_ci'], goal_df['upper_bound_95_ci'], color='blue', alpha=0.4)

# Add labels and title
plt.xlabel('Conversation round', fontsize=12)
plt.ylabel('Avg. Difference', fontsize=12)
plt.title('Mean Absolute Difference from Baseline progression', fontproperties=bold_font, fontsize=14)
plt.axhline(y=0, color="black", linestyle="--")

# Add a grid for better readability
plt.grid(True, which='both', linestyle='--', linewidth=0.5)

# Add a legend
plt.legend()
plt.savefig('../persistent-personas-paper/media/mae_empty_degradation.pdf', dpi=300, bbox_inches="tight")

# Instruction persona-specific

In [None]:
ratings = pd.read_csv("./results/instruction_role_specific_ratings.csv")

data, _ = load_data("instruction_role_specific")

ratings[ratings.role.str.contains("previous")].role.tolist()[0]

ratings["reference"] = "dataset"
ratings.loc[ratings.role.str.contains("_zero_reference"), "reference"] = "0_answer"
ratings.loc[ratings.role.str.contains("_previous_reference"), "reference"] = "previous_answer"
ratings["role"] = ratings["role"].apply(lambda x: x.replace("_zero_reference", ""))
ratings["role"] = ratings["role"].apply(lambda x: x.replace("_previous_reference", ""))

even_mapping = {"A": 0, "B": 1}
odd_mapping = {"A": 1, "B": 0}
ratings["rating"] = ratings.apply(lambda x: even_mapping[x["rating"]] if x["idx"]%2==0 else odd_mapping[x["rating"]], axis=1)

ratings["pos"] = ratings.role.str.extract(r"(\d+)(?=\.json)").astype(int)

ratings["base_conversation"] = ratings.role.apply(lambda x: "instructions" if "inst-" in x else "interview")

ratings["role"] = ratings.role.str.replace(r"empty_\d+.json", "empty", regex=True)

ratings["role"] = ratings.role.apply(lambda x: x.replace(".csv", "_.csv") if "empty" in x else x)

ratings["role"] = ratings["role"].str.rsplit("_", n=1).str[0]

ratings["role"] = ratings["role"].str.split("_",n=3).str[-1].str.replace("_", " ")

In [None]:
ratings = ratings.groupby(['model', 'role', 'pos', "reference", "base_conversation"])['rating'].mean().unstack().reset_index()

In [None]:
all_ratings["role_specific"] = ratings

In [None]:
df = ratings[(ratings.reference == "dataset")  &(~ratings.role.str.contains("empty"))].groupby(['pos']).mean(numeric_only=True).reset_index()

fig, ax = plt.subplots(figsize=(8, 3))

# Calculate mean and 95% CI for the dataset reference group
ax.plot(df.pos, df.interview, 
             color="orange", marker='o', label='Persona-directed')

# Plot horizontal line
#plt.axhline(y=0.5, color='black', linestyle='--')

# Calculate mean and 95% CI for the dataset reference group

ax.plot(df.pos, df.instructions, 
             color="blue", marker='x', label='Goal-oriented')


ax.set_title('Quality of responses to persona-specific instructions (↑)', fontproperties=bold_font, fontsize=14)
ax.legend(loc='best')
ax.set_xlabel('Conversation round')
ax.set_ylabel('Win rate')
ax.grid(True)
#plt.ylim(0, 1)
# # Sanitize role name for filename
# sanitized_role = role.replace('.json.csv', '').replace('.', '_').replace('/', '_')
# filename = f'line_plot_{metric}_{sanitized_role}.png'
# plt.savefig(filename)
# plt.close() # Close the plot to free memory
# print(f"Generated plot: {filename}")

In [None]:
fig.savefig('../persistent-personas-paper/media/role_specific.pdf', dpi=300, bbox_inches="tight")

In [None]:
filtered_df = ratings[~ratings.role.str.contains("empty")][ratings.reference=="dataset"]
# Calculate the difference (MAE_interview - MAE_instructions)
filtered_df['rating_difference'] = filtered_df['interview'] - filtered_df['instructions']

In [None]:
np.random.seed(42)

# Define the number of bootstrap samples
n_bootstraps = 1000

# Create an empty list to store the results for each 'pos'
results = []

# Group the DataFrame by 'pos' and iterate through the groups
grouped_pos = filtered_df.groupby('pos')

for pos, group in grouped_pos:
    # Get the 'MAE_difference' values for the current group
    diffs = group['rating_difference']

    # Create an empty list to store the bootstrap means for this 'pos'
    bootstrap_means = []

    # Perform bootstrapping
    for _ in range(n_bootstraps):
        resampled_diffs = diffs.sample(n=len(diffs), replace=True)
        bootstrap_means.append(resampled_diffs.mean())

    # Calculate the confidence interval
    lower_bound = np.percentile(bootstrap_means, 2.5)
    upper_bound = np.percentile(bootstrap_means, 97.5)

    # Append the results to the list
    results.append({
        'pos': pos,
        'mean_difference': np.mean(diffs),
        'lower_bound_95_ci': lower_bound,
        'upper_bound_95_ci': upper_bound
    })

# Convert the results list to a DataFrame for easy viewing
results_df = pd.DataFrame(results)
df = results_df


In [None]:
plt.figure(figsize=(8, 3))
# Plot the mean differences as a line
plt.plot(df['pos'], df['mean_difference'], color='magenta')

# Fill the area for the 95% confidence interval
plt.fill_between(df['pos'], df['lower_bound_95_ci'], df['upper_bound_95_ci'], color='pink', alpha=0.4)


# Add labels and title
plt.xlabel('Conversation round', fontsize=12)
plt.ylabel('Avg. Difference', fontsize=12)
plt.title('Avg. persona-specific response quality difference (Persona-directed - Goal-oriented)', fontproperties=bold_font, fontsize=14)

# Add a grid for better readability
plt.grid(True, which='both', linestyle='--', linewidth=0.5)

# Add a legend
#plt.legend()
plt.savefig('../persistent-personas-paper/media/role_specific_dialogue_diff.pdf', dpi=300, bbox_inches="tight")
plt.axhline(y=0, color="black", linestyle="--")


In [None]:
df = filtered_df.copy()
baseline_df = df[df['pos'] == 0].set_index('role')

# Use merge to join the baseline values back to the original DataFrame
df_merged = pd.merge(df, baseline_df, how="left", on=['role', "model"], suffixes=('', '_baseline'))

# Calculate the difference for each column
df_merged['persona_diff_from_idx_0'] = df_merged['interview'] - df_merged['interview_baseline']
df_merged['goal_diff_from_idx_0'] = df_merged['instructions'] - df_merged['instructions_baseline']

In [None]:
np.random.seed(42)

# Define the number of bootstrap samples
n_bootstraps = 1000

# Create an empty list to store the results for each 'pos'
results = []

# Group the DataFrame by 'pos' and iterate through the groups
grouped_pos = df_merged.groupby('pos')

for pos, group in grouped_pos:
    # Get the 'MAE_difference' values for the current group
    diffs = group['persona_diff_from_idx_0']

    # Create an empty list to store the bootstrap means for this 'pos'
    bootstrap_means = []

    # Perform bootstrapping
    for _ in range(n_bootstraps):
        resampled_diffs = diffs.sample(n=len(diffs), replace=True)
        bootstrap_means.append(resampled_diffs.mean())

    # Calculate the confidence interval
    lower_bound = np.percentile(bootstrap_means, 2.5)
    upper_bound = np.percentile(bootstrap_means, 97.5)

    # Append the results to the list
    results.append({
        'pos': pos,
        'mean_difference': np.mean(diffs),
        'lower_bound_95_ci': lower_bound,
        'upper_bound_95_ci': upper_bound
    })

# Convert the results list to a DataFrame for easy viewing
results_df = pd.DataFrame(results)
display((results_df["lower_bound_95_ci"]*results_df["upper_bound_95_ci"] > 0) & (results_df["mean_difference"] <0)  )
persona_df = results_df

In [None]:
np.random.seed(42)

# Define the number of bootstrap samples
n_bootstraps = 1000

# Create an empty list to store the results for each 'pos'
results = []

# Group the DataFrame by 'pos' and iterate through the groups
grouped_pos = df_merged.groupby('pos')

for pos, group in grouped_pos:
    # Get the 'MAE_difference' values for the current group
    diffs = group['goal_diff_from_idx_0']

    # Create an empty list to store the bootstrap means for this 'pos'
    bootstrap_means = []

    # Perform bootstrapping
    for _ in range(n_bootstraps):
        resampled_diffs = diffs.sample(n=len(diffs), replace=True)
        bootstrap_means.append(resampled_diffs.mean())

    # Calculate the confidence interval
    lower_bound = np.percentile(bootstrap_means, 2.5)
    upper_bound = np.percentile(bootstrap_means, 97.5)

    # Append the results to the list
    results.append({
        'pos': pos,
        'mean_difference': np.mean(diffs),
        'lower_bound_95_ci': lower_bound,
        'upper_bound_95_ci': upper_bound
    })

# Convert the results list to a DataFrame for easy viewing
results_df = pd.DataFrame(results)
display((results_df["lower_bound_95_ci"]*results_df["upper_bound_95_ci"] > 0) & (results_df["mean_difference"] <0)  )
goal_df = results_df

In [None]:
plt.figure(figsize=(8, 3))
df = results_df
# Plot the mean differences as a line
plt.plot(persona_df['pos'], persona_df['mean_difference'], color='orange', label="Persona-directed")

# Fill the area for the 95% confidence interval
plt.fill_between(persona_df['pos'], persona_df['lower_bound_95_ci'], persona_df['upper_bound_95_ci'], color='orange', alpha=0.4)

# Plot the mean differences as a line
plt.plot(goal_df['pos'], goal_df['mean_difference'], color='blue', label="Goal Oriented")

# Fill the area for the 95% confidence interval
plt.fill_between(goal_df['pos'], goal_df['lower_bound_95_ci'], goal_df['upper_bound_95_ci'], color='blue', alpha=0.4)

# Add labels and title
plt.xlabel('Conversation round', fontsize=12)
plt.ylabel('Avg. Difference', fontsize=12)
plt.title('Avg. persona-specific response quality progression', fontproperties=bold_font, fontsize=14)
plt.axhline(y=0, color="black", linestyle="--")

# Add a grid for better readability
plt.grid(True, which='both', linestyle='--', linewidth=0.5)

# Add a legend
#plt.legend()
plt.savefig('../persistent-personas-paper/media/role_specific_degradation.pdf', dpi=300, bbox_inches="tight")

# Instruction general

In [None]:
ratings = pd.read_csv("./results/instruction_general_ratings.csv")

data, _ = load_data("instruction_general")

ratings["reference"] = "dataset"
ratings.loc[ratings.role.str.contains("_zero_reference"), "reference"] = "0_answer"
ratings.loc[ratings.role.str.contains("_previous_reference"), "reference"] = "previous_answer"
ratings["role"] = ratings["role"].apply(lambda x: x.replace("_zero_reference", ""))
ratings["role"] = ratings["role"].apply(lambda x: x.replace("_previous_reference", ""))

even_mapping = {"A": 0, "B": 1}
odd_mapping = {"A": 1, "B": 0}
ratings["rating"] = ratings.apply(lambda x: even_mapping[x["rating"]] if x["idx"]%2==0 else odd_mapping[x["rating"]], axis=1)

ratings["pos"] = ratings.role.str.extract(r"(\d+)(?=\.json)").astype(int)

ratings["base_conversation"] = ratings.role.apply(lambda x: "instructions" if "inst-" in x else "interview")

ratings["role"] = ratings.role.str.replace(r"empty_\d+.json", "empty", regex=True)

ratings["role"] = ratings.role.apply(lambda x: x.replace(".csv", "_.csv") if "empty" in x else x)

ratings["role"] = ratings["role"].str.rsplit("_", n=1).str[0]

ratings["role"] = ratings["role"].str.split("_",n=2).str[-1].str.replace("_", " ")

In [None]:
ratings = ratings.groupby(['model', 'role', 'pos', "reference", "base_conversation"])['rating'].mean().unstack().reset_index()

In [None]:
all_ratings["general"] = ratings

In [None]:

fig, ax = plt.subplots(figsize=(8, 3))

df = ratings[(ratings.reference == "dataset")   & (ratings.role.str.contains("empty"))].groupby(['pos']).mean(numeric_only=True).reset_index()


# Calculate mean and 95% CI for the dataset reference group
ax.plot(df.pos, df.interview, 
             color="orange", marker="o", linestyle="--", label='Persona-directed (baseline)')

# Plot horizontal line
#plt.axhline(y=0.5, color='black', linestyle='--')

# Calculate mean and 95% CI for the dataset reference group

ax.plot(df.pos, df.instructions, 
             color="blue", marker='x',linestyle="--", label='Goal-oriented (baseline)')

df = ratings[(ratings.reference == "dataset")  & (~ratings.role.str.contains("empty"))].groupby(['pos']).mean(numeric_only=True).reset_index()

# Calculate mean and 95% CI for the dataset reference group
ax.plot(df.pos, df.interview, 
             color="orange", marker='o', label='Persona-directed (persona)')

# Plot horizontal line
#plt.axhline(y=0.5, color='black', linestyle='--')

# Calculate mean and 95% CI for the dataset reference group

ax.plot(df.pos, df.instructions, 
             color="blue", marker='x', label='Goal-oriented (persona)')



ax.set_title('Quality of responses to general instructions (↑)', fontproperties=bold_font, fontsize=14)
ax.legend(loc='best')
ax.set_xlabel('Conversation round')
ax.set_ylabel('Win rate')
ax.grid(True)
#plt.ylim(0, 1)
# # Sanitize role name for filename
# sanitized_role = role.replace('.json.csv', '').replace('.', '_').replace('/', '_')
# filename = f'line_plot_{metric}_{sanitized_role}.png'
# plt.savefig(filename)
# plt.close() # Close the plot to free memory
# print(f"Generated plot: {filename}")

In [None]:
fig.savefig('../persistent-personas-paper/media/instruction_general.pdf', dpi=300, bbox_inches="tight")

In [None]:
filtered_df = ratings[~ratings.role.str.contains("empty")][ratings.reference=="dataset"]
# Calculate the difference (MAE_interview - MAE_instructions)
filtered_df['rating_difference'] = filtered_df['interview'] - filtered_df['instructions']

In [None]:
np.random.seed(42)

# Define the number of bootstrap samples
n_bootstraps = 1000

# Create an empty list to store the results for each 'pos'
results = []

# Group the DataFrame by 'pos' and iterate through the groups
grouped_pos = filtered_df.groupby('pos')

for pos, group in grouped_pos:
    # Get the 'MAE_difference' values for the current group
    diffs = group['rating_difference']

    # Create an empty list to store the bootstrap means for this 'pos'
    bootstrap_means = []

    # Perform bootstrapping
    for _ in range(n_bootstraps):
        resampled_diffs = diffs.sample(n=len(diffs), replace=True)
        bootstrap_means.append(resampled_diffs.mean())

    # Calculate the confidence interval
    lower_bound = np.percentile(bootstrap_means, 2.5)
    upper_bound = np.percentile(bootstrap_means, 97.5)

    # Append the results to the list
    results.append({
        'pos': pos,
        'mean_difference': np.mean(diffs),
        'lower_bound_95_ci': lower_bound,
        'upper_bound_95_ci': upper_bound
    })

# Convert the results list to a DataFrame for easy viewing
results_df = pd.DataFrame(results)

# Print the results
df = results_df


In [None]:
plt.figure(figsize=(8, 3))
# Plot the mean differences as a line
plt.plot(df['pos'], df['mean_difference'], color='magenta')

# Fill the area for the 95% confidence interval
plt.fill_between(df['pos'], df['lower_bound_95_ci'], df['upper_bound_95_ci'], color='pink', alpha=0.4)


# Add labels and title
plt.xlabel('Conversation round', fontsize=12)
plt.ylabel('Avg. Difference', fontsize=12)
plt.title('Avg. general response quality difference (Persona-directed - Goal-oriented)', fontproperties=bold_font, fontsize=14)

# Add a grid for better readability
plt.grid(True, which='both', linestyle='--', linewidth=0.5)

# Add a legend
#plt.legend()
plt.savefig('../persistent-personas-paper/media/instruction_general_dialogue_diff.pdf', dpi=300, bbox_inches="tight")
plt.axhline(y=0, color="black", linestyle="--")


In [None]:
df = filtered_df.copy()
baseline_df = df[df['pos'] == 0].set_index('role')

# Use merge to join the baseline values back to the original DataFrame
df_merged = pd.merge(df, baseline_df, how="left", on=['role', "model"], suffixes=('', '_baseline'))

# Calculate the difference for each column
df_merged['persona_diff_from_idx_0'] = df_merged['interview'] - df_merged['interview_baseline']
df_merged['goal_diff_from_idx_0'] = df_merged['instructions'] - df_merged['instructions_baseline']

In [None]:
np.random.seed(42)

# Define the number of bootstrap samples
n_bootstraps = 1000

# Create an empty list to store the results for each 'pos'
results = []

# Group the DataFrame by 'pos' and iterate through the groups
grouped_pos = df_merged.groupby('pos')

for pos, group in grouped_pos:
    # Get the 'MAE_difference' values for the current group
    diffs = group['persona_diff_from_idx_0']

    # Create an empty list to store the bootstrap means for this 'pos'
    bootstrap_means = []

    # Perform bootstrapping
    for _ in range(n_bootstraps):
        resampled_diffs = diffs.sample(n=len(diffs), replace=True)
        bootstrap_means.append(resampled_diffs.mean())

    # Calculate the confidence interval
    lower_bound = np.percentile(bootstrap_means, 2.5)
    upper_bound = np.percentile(bootstrap_means, 97.5)

    # Append the results to the list
    results.append({
        'pos': pos,
        'mean_difference': np.mean(diffs),
        'lower_bound_95_ci': lower_bound,
        'upper_bound_95_ci': upper_bound
    })

# Convert the results list to a DataFrame for easy viewing
results_df = pd.DataFrame(results)
display((results_df["lower_bound_95_ci"]*results_df["upper_bound_95_ci"] > 0) & (results_df["mean_difference"] <0)  )
persona_df = results_df

In [None]:
np.random.seed(42)

# Define the number of bootstrap samples
n_bootstraps = 1000

# Create an empty list to store the results for each 'pos'
results = []

# Group the DataFrame by 'pos' and iterate through the groups
grouped_pos = df_merged.groupby('pos')

for pos, group in grouped_pos:
    # Get the 'MAE_difference' values for the current group
    diffs = group['goal_diff_from_idx_0']

    # Create an empty list to store the bootstrap means for this 'pos'
    bootstrap_means = []

    # Perform bootstrapping
    for _ in range(n_bootstraps):
        resampled_diffs = diffs.sample(n=len(diffs), replace=True)
        bootstrap_means.append(resampled_diffs.mean())

    # Calculate the confidence interval
    lower_bound = np.percentile(bootstrap_means, 2.5)
    upper_bound = np.percentile(bootstrap_means, 97.5)

    # Append the results to the list
    results.append({
        'pos': pos,
        'mean_difference': np.mean(diffs),
        'lower_bound_95_ci': lower_bound,
        'upper_bound_95_ci': upper_bound
    })

# Convert the results list to a DataFrame for easy viewing
results_df = pd.DataFrame(results)
display((results_df["lower_bound_95_ci"]*results_df["upper_bound_95_ci"] > 0) & (results_df["mean_difference"] <0)  )
goal_df = results_df

In [None]:
plt.figure(figsize=(8, 3))
df = results_df
# Plot the mean differences as a line
plt.plot(persona_df['pos'], persona_df['mean_difference'], color='orange', label="Persona-directed")

# Fill the area for the 95% confidence interval
plt.fill_between(persona_df['pos'], persona_df['lower_bound_95_ci'], persona_df['upper_bound_95_ci'], color='orange', alpha=0.4)

# Plot the mean differences as a line
plt.plot(goal_df['pos'], goal_df['mean_difference'], color='blue', label="Goal Oriented")

# Fill the area for the 95% confidence interval
plt.fill_between(goal_df['pos'], goal_df['lower_bound_95_ci'], goal_df['upper_bound_95_ci'], color='blue', alpha=0.4)

# Add labels and title
plt.xlabel('Conversation round', fontsize=12)
plt.ylabel('Avg. Difference', fontsize=12)
plt.title('Avg. general response quality improvement', fontproperties=bold_font, fontsize=14)
plt.axhline(y=0, color="black", linestyle="--")

# Add a grid for better readability
plt.grid(True, which='both', linestyle='--', linewidth=0.5)

# Add a legend
plt.legend()
plt.savefig('../persistent-personas-paper/media/instruction_general_degradation.pdf', dpi=300, bbox_inches="tight")

In [None]:
df = ratings[(~ratings.role.str.contains("empty"))  & (ratings.reference=="dataset")].copy()
baseline_df =  ratings[(ratings.role.str.contains("empty")) & (ratings.reference=="dataset")].copy()
baseline_df.role = baseline_df.role.apply(lambda x: x.replace("empty-", ""))
# Use merge to join the baseline values back to the original DataFrame
df_merged = pd.merge(df, baseline_df, how="left", on=['role', "model", "pos"], suffixes=('', '_baseline'))

# Calculate the difference for each column
df_merged['persona_diff_from_idx_0'] = df_merged['interview'] - df_merged['interview_baseline']
df_merged['goal_diff_from_idx_0'] = df_merged['instructions'] - df_merged['instructions_baseline']

In [None]:
np.random.seed(42)

# Define the number of bootstrap samples
n_bootstraps = 1000

# Create an empty list to store the results for each 'pos'
results = []

# Group the DataFrame by 'pos' and iterate through the groups
grouped_pos = df_merged.groupby('pos')

for pos, group in grouped_pos:
    # Get the 'MAE_difference' values for the current group
    diffs = group['persona_diff_from_idx_0']

    # Create an empty list to store the bootstrap means for this 'pos'
    bootstrap_means = []

    # Perform bootstrapping
    for _ in range(n_bootstraps):
        resampled_diffs = diffs.sample(n=len(diffs), replace=True)
        bootstrap_means.append(resampled_diffs.mean())

    # Calculate the confidence interval
    lower_bound = np.percentile(bootstrap_means, 2.5)
    upper_bound = np.percentile(bootstrap_means, 97.5)

    # Append the results to the list
    results.append({
        'pos': pos,
        'mean_difference': np.mean(diffs),
        'lower_bound_95_ci': lower_bound,
        'upper_bound_95_ci': upper_bound
    })

# Convert the results list to a DataFrame for easy viewing
results_df = pd.DataFrame(results)
display((results_df["lower_bound_95_ci"]*results_df["upper_bound_95_ci"] > 0) & (results_df["mean_difference"] <0)  )
persona_df = results_df

In [None]:
np.random.seed(42)

# Define the number of bootstrap samples
n_bootstraps = 1000

# Create an empty list to store the results for each 'pos'
results = []

# Group the DataFrame by 'pos' and iterate through the groups
grouped_pos = df_merged.groupby('pos')

for pos, group in grouped_pos:
    # Get the 'MAE_difference' values for the current group
    diffs = group['goal_diff_from_idx_0']

    # Create an empty list to store the bootstrap means for this 'pos'
    bootstrap_means = []

    # Perform bootstrapping
    for _ in range(n_bootstraps):
        resampled_diffs = diffs.sample(n=len(diffs), replace=True)
        bootstrap_means.append(resampled_diffs.mean())

    # Calculate the confidence interval
    lower_bound = np.percentile(bootstrap_means, 2.5)
    upper_bound = np.percentile(bootstrap_means, 97.5)

    # Append the results to the list
    results.append({
        'pos': pos,
        'mean_difference': np.mean(diffs),
        'lower_bound_95_ci': lower_bound,
        'upper_bound_95_ci': upper_bound
    })

# Convert the results list to a DataFrame for easy viewing
results_df = pd.DataFrame(results)
display((results_df["lower_bound_95_ci"]*results_df["upper_bound_95_ci"] > 0) & (results_df["mean_difference"] <0)  )
goal_df = results_df

In [None]:
plt.figure(figsize=(8, 3))
df = results_df
# Plot the mean differences as a line
plt.plot(persona_df['pos'], persona_df['mean_difference'], color='orange', label="Persona-directed")

# Fill the area for the 95% confidence interval
plt.fill_between(persona_df['pos'], persona_df['lower_bound_95_ci'], persona_df['upper_bound_95_ci'], color='orange', alpha=0.4)

# Plot the mean differences as a line
plt.plot(goal_df['pos'], goal_df['mean_difference'], color='blue', label="Goal Oriented")

# Fill the area for the 95% confidence interval
plt.fill_between(goal_df['pos'], goal_df['lower_bound_95_ci'], goal_df['upper_bound_95_ci'], color='blue', alpha=0.4)

# Add labels and title
plt.xlabel('Conversation round', fontsize=12)
plt.ylabel('Avg. Difference', fontsize=12)
plt.title('Avg. general response quality difference (over baseline)', fontproperties=bold_font, fontsize=14)
plt.axhline(y=0, color="black", linestyle="--")

# Add a grid for better readability
plt.grid(True, which='both', linestyle='--', linewidth=0.5)

# Add a legend
plt.legend()
plt.savefig('../persistent-personas-paper/media/instruction_general_difference_baseline.pdf', dpi=300, bbox_inches="tight")

# Ifbench

In [None]:
ratings = json.load(open("./results/ifbench_results.json", "r"))

ratings = pd.DataFrame.from_dict(ratings).reset_index().melt(id_vars=['index'], var_name='model', value_name='rating').rename(columns=lambda x: "role" if x == "index" else x)

data, _ = load_data("ifbench")


ratings["pos"] = ratings.role.str.split("_").str[-1].astype(int)

ratings["role"] = ratings["role"].str.rsplit("_", n=1).str[0]

ratings["base_conversation"] = ratings.role.apply(lambda x: "instructions" if "inst-" in x else "interview")

ratings["role"] = ratings["role"].str.split("_",n=1).str[-1].str.replace("_", " ")

In [None]:
ratings = ratings.groupby(['model', 'role', 'pos', "base_conversation"])['rating'].mean().unstack().reset_index()

In [None]:
all_ratings["ifbench"] = ratings

In [None]:

fig, ax = plt.subplots(figsize=(8, 3))

df = ratings[(ratings.role.str.contains("empty"))].groupby(['pos']).mean(numeric_only=True).reset_index()


# Calculate mean and 95% CI for the dataset reference group
ax.plot(df.pos, df.interview, 
             color="orange", marker="o", linestyle="--", label='Persona-directed (baseline)')

# Plot horizontal line
#plt.axhline(y=0.5, color='black', linestyle='--')

# Calculate mean and 95% CI for the dataset reference group

ax.plot(df.pos, df.instructions, 
             color="blue", marker='x',linestyle="--", label='Goal-oriented (baseline)')

df = ratings[(~ratings.role.str.contains("empty"))].groupby(['pos']).mean(numeric_only=True).reset_index()

# Calculate mean and 95% CI for the dataset reference group
ax.plot(df.pos, df.interview, 
             color="orange", marker='o', label='Persona-directed (persona)')

# Plot horizontal line
#plt.axhline(y=0.5, color='black', linestyle='--')

# Calculate mean and 95% CI for the dataset reference group

ax.plot(df.pos, df.instructions, 
             color="blue", marker='x', label='Goal-oriented (persona)')



ax.set_title('IFBench accuracy (↑)', fontproperties=bold_font, fontsize=14)
ax.legend(loc='upper center', bbox_to_anchor=(0.5, 1.5), ncol=2)
ax.set_xlabel('Conversation round')
ax.set_ylabel('Accuracy')
ax.grid(True)
#plt.ylim(0, 1)
# # Sanitize role name for filename
# sanitized_role = role.replace('.json.csv', '').replace('.', '_').replace('/', '_')
# filename = f'line_plot_{metric}_{sanitized_role}.png'
# plt.savefig(filename)
# plt.close() # Close the plot to free memory
# print(f"Generated plot: {filename}")

In [None]:
fig.savefig('../persistent-personas-paper/media/ifbench.pdf', dpi=300, bbox_inches="tight")

In [None]:
filtered_df = ratings[~ratings.role.str.contains("empty")]
# Calculate the difference (MAE_interview - MAE_instructions)
filtered_df['rating_difference'] = filtered_df['interview'] - filtered_df['instructions']

In [None]:
np.random.seed(42)

# Define the number of bootstrap samples
n_bootstraps = 1000

# Create an empty list to store the results for each 'pos'
results = []

# Group the DataFrame by 'pos' and iterate through the groups
grouped_pos = filtered_df.groupby('pos')

for pos, group in grouped_pos:
    # Get the 'MAE_difference' values for the current group
    diffs = group['rating_difference']

    # Create an empty list to store the bootstrap means for this 'pos'
    bootstrap_means = []

    # Perform bootstrapping
    for _ in range(n_bootstraps):
        resampled_diffs = diffs.sample(n=len(diffs), replace=True)
        bootstrap_means.append(resampled_diffs.mean())

    # Calculate the confidence interval
    lower_bound = np.percentile(bootstrap_means, 2.5)
    upper_bound = np.percentile(bootstrap_means, 97.5)

    # Append the results to the list
    results.append({
        'pos': pos,
        'mean_difference': np.mean(diffs),
        'lower_bound_95_ci': lower_bound,
        'upper_bound_95_ci': upper_bound
    })

# Convert the results list to a DataFrame for easy viewing
results_df = pd.DataFrame(results)

# Print the results
df = results_df


In [None]:
plt.figure(figsize=(6,3))
# Plot the mean differences as a line
plt.plot(df['pos'], df['mean_difference'], color='magenta')

# Fill the area for the 95% confidence interval
plt.fill_between(df['pos'], df['lower_bound_95_ci'], df['upper_bound_95_ci'], color='pink', alpha=0.4)


# Add labels and title
plt.xlabel('Conversation round', fontsize=12)
plt.ylabel('Avg. Difference', fontsize=12)
plt.title('Avg. IfBench Acc. difference (Persona-directed - Goal-oriented)', fontproperties=bold_font, fontsize=14)

# Add a grid for better readability
plt.grid(True, which='both', linestyle='--', linewidth=0.5)

# Add a legend
#plt.legend()
plt.savefig('../persistent-personas-paper/media/ifbench_dialogue_diff.pdf', dpi=300, bbox_inches="tight")
plt.axhline(y=0, color="black", linestyle="--")


In [None]:
df = filtered_df.copy()
baseline_df = df[df['pos'] == 0].set_index('role')

# Use merge to join the baseline values back to the original DataFrame
df_merged = pd.merge(df, baseline_df, how="left", on=['role', "model"], suffixes=('', '_baseline'))

# Calculate the difference for each column
df_merged['persona_diff_from_idx_0'] = df_merged['interview'] - df_merged['interview_baseline']
df_merged['goal_diff_from_idx_0'] = df_merged['instructions'] - df_merged['instructions_baseline']

In [None]:
np.random.seed(42)

# Define the number of bootstrap samples
n_bootstraps = 1000

# Create an empty list to store the results for each 'pos'
results = []

# Group the DataFrame by 'pos' and iterate through the groups
grouped_pos = df_merged.groupby('pos')

for pos, group in grouped_pos:
    # Get the 'MAE_difference' values for the current group
    diffs = group['persona_diff_from_idx_0']

    # Create an empty list to store the bootstrap means for this 'pos'
    bootstrap_means = []

    # Perform bootstrapping
    for _ in range(n_bootstraps):
        resampled_diffs = diffs.sample(n=len(diffs), replace=True)
        bootstrap_means.append(resampled_diffs.mean())

    # Calculate the confidence interval
    lower_bound = np.percentile(bootstrap_means, 2.5)
    upper_bound = np.percentile(bootstrap_means, 97.5)

    # Append the results to the list
    results.append({
        'pos': pos,
        'mean_difference': np.mean(diffs),
        'lower_bound_95_ci': lower_bound,
        'upper_bound_95_ci': upper_bound
    })

# Convert the results list to a DataFrame for easy viewing
results_df = pd.DataFrame(results)
display((results_df["lower_bound_95_ci"]*results_df["upper_bound_95_ci"] > 0) & (results_df["mean_difference"] <0)  )
persona_df = results_df

In [None]:
np.random.seed(42)

# Define the number of bootstrap samples
n_bootstraps = 1000

# Create an empty list to store the results for each 'pos'
results = []

# Group the DataFrame by 'pos' and iterate through the groups
grouped_pos = df_merged.groupby('pos')

for pos, group in grouped_pos:
    # Get the 'MAE_difference' values for the current group
    diffs = group['goal_diff_from_idx_0']

    # Create an empty list to store the bootstrap means for this 'pos'
    bootstrap_means = []

    # Perform bootstrapping
    for _ in range(n_bootstraps):
        resampled_diffs = diffs.sample(n=len(diffs), replace=True)
        bootstrap_means.append(resampled_diffs.mean())

    # Calculate the confidence interval
    lower_bound = np.percentile(bootstrap_means, 2.5)
    upper_bound = np.percentile(bootstrap_means, 97.5)

    # Append the results to the list
    results.append({
        'pos': pos,
        'mean_difference': np.mean(diffs),
        'lower_bound_95_ci': lower_bound,
        'upper_bound_95_ci': upper_bound
    })

# Convert the results list to a DataFrame for easy viewing
results_df = pd.DataFrame(results)
display((results_df["lower_bound_95_ci"]*results_df["upper_bound_95_ci"] > 0) & (results_df["mean_difference"] <0)  )
goal_df = results_df

In [None]:
plt.figure(figsize=(8,3))
df = results_df
# Plot the mean differences as a line
plt.plot(persona_df['pos'], persona_df['mean_difference'], color='orange', label="Persona-directed")

# Fill the area for the 95% confidence interval
plt.fill_between(persona_df['pos'], persona_df['lower_bound_95_ci'], persona_df['upper_bound_95_ci'], color='orange', alpha=0.4)

# Plot the mean differences as a line
plt.plot(goal_df['pos'], goal_df['mean_difference'], color='blue', label="Goal Oriented")

# Fill the area for the 95% confidence interval
plt.fill_between(goal_df['pos'], goal_df['lower_bound_95_ci'], goal_df['upper_bound_95_ci'], color='blue', alpha=0.4)

# Add labels and title
plt.xlabel('Conversation round', fontsize=12)
plt.ylabel('Avg. Difference', fontsize=12)
plt.title('Avg. IfBench improvement', fontproperties=bold_font, fontsize=14)
plt.axhline(y=0, color="black", linestyle="--")

# Add a grid for better readability
plt.grid(True, which='both', linestyle='--', linewidth=0.5)

# Add a legend
plt.legend()
plt.savefig('../persistent-personas-paper/media/ifbench_degradation.pdf', dpi=300, bbox_inches="tight")

In [None]:
df = ratings[(~ratings.role.str.contains("empty"))].copy()
baseline_df =  ratings[(ratings.role.str.contains("empty"))].copy()
# Use merge to join the baseline values back to the original DataFrame
df_merged = pd.merge(df, baseline_df, how="left", on=["model", "pos"], suffixes=('', '_baseline'))

# Calculate the difference for each column
df_merged['persona_diff_from_idx_0'] = df_merged['interview'] - df_merged['interview_baseline']
df_merged['goal_diff_from_idx_0'] = df_merged['instructions'] - df_merged['instructions_baseline']

In [None]:
np.random.seed(42)

# Define the number of bootstrap samples
n_bootstraps = 1000

# Create an empty list to store the results for each 'pos'
results = []

# Group the DataFrame by 'pos' and iterate through the groups
grouped_pos = df_merged.groupby('pos')

for pos, group in grouped_pos:
    # Get the 'MAE_difference' values for the current group
    diffs = group['persona_diff_from_idx_0']

    # Create an empty list to store the bootstrap means for this 'pos'
    bootstrap_means = []

    # Perform bootstrapping
    for _ in range(n_bootstraps):
        resampled_diffs = diffs.sample(n=len(diffs), replace=True)
        bootstrap_means.append(resampled_diffs.mean())

    # Calculate the confidence interval
    lower_bound = np.percentile(bootstrap_means, 2.5)
    upper_bound = np.percentile(bootstrap_means, 97.5)

    # Append the results to the list
    results.append({
        'pos': pos,
        'mean_difference': np.mean(diffs),
        'lower_bound_95_ci': lower_bound,
        'upper_bound_95_ci': upper_bound
    })

# Convert the results list to a DataFrame for easy viewing
results_df = pd.DataFrame(results)
display((results_df["lower_bound_95_ci"]*results_df["upper_bound_95_ci"] > 0) & (results_df["mean_difference"] <0)  )
persona_df = results_df

In [None]:
np.random.seed(42)

# Define the number of bootstrap samples
n_bootstraps = 1000

# Create an empty list to store the results for each 'pos'
results = []

# Group the DataFrame by 'pos' and iterate through the groups
grouped_pos = df_merged.groupby('pos')

for pos, group in grouped_pos:
    # Get the 'MAE_difference' values for the current group
    diffs = group['goal_diff_from_idx_0']

    # Create an empty list to store the bootstrap means for this 'pos'
    bootstrap_means = []

    # Perform bootstrapping
    for _ in range(n_bootstraps):
        resampled_diffs = diffs.sample(n=len(diffs), replace=True)
        bootstrap_means.append(resampled_diffs.mean())

    # Calculate the confidence interval
    lower_bound = np.percentile(bootstrap_means, 2.5)
    upper_bound = np.percentile(bootstrap_means, 97.5)

    # Append the results to the list
    results.append({
        'pos': pos,
        'mean_difference': np.mean(diffs),
        'lower_bound_95_ci': lower_bound,
        'upper_bound_95_ci': upper_bound
    })

# Convert the results list to a DataFrame for easy viewing
results_df = pd.DataFrame(results)
display((results_df["lower_bound_95_ci"]*results_df["upper_bound_95_ci"] > 0) & (results_df["mean_difference"] <0)  )
goal_df = results_df

In [None]:
plt.figure(figsize=(8, 3))
df = results_df
# Plot the mean differences as a line
plt.plot(persona_df['pos'], persona_df['mean_difference'], color='orange', label="Persona-directed")

# Fill the area for the 95% confidence interval
plt.fill_between(persona_df['pos'], persona_df['lower_bound_95_ci'], persona_df['upper_bound_95_ci'], color='orange', alpha=0.4)

# Plot the mean differences as a line
plt.plot(goal_df['pos'], goal_df['mean_difference'], color='blue', label="Goal Oriented")

# Fill the area for the 95% confidence interval
plt.fill_between(goal_df['pos'], goal_df['lower_bound_95_ci'], goal_df['upper_bound_95_ci'], color='blue', alpha=0.4)

# Add labels and title
plt.xlabel('Conversation round', fontsize=12)
plt.ylabel('Avg. Difference', fontsize=12)
plt.title('Avg. IfBench Acc. Difference (over baseline)', fontproperties=bold_font, fontsize=14)
plt.axhline(y=0, color="black", linestyle="--")

# Add a grid for better readability
plt.grid(True, which='both', linestyle='--', linewidth=0.5)

# Add a legend
plt.legend()
plt.savefig('../persistent-personas-paper/media/ifbench_difference_baseline.pdf', dpi=300, bbox_inches="tight")

# XStest

In [None]:
ratings = pd.read_csv("./results/xstest_ratings.csv")

data, _ = load_data("xstest")

labels = data["label"]

label_mapping = {"safe": 0, "unsafe": 1}
labels = [label_mapping[x] for x in labels]

rating_mapping = {"No": 0, "Yes": 1}
ratings["rating"] = ratings.rating.apply(lambda x: rating_mapping[x])

ratings["correct"] = ratings.apply(lambda x: x["rating"] == labels[x["idx"]],axis=1)

ratings["pos"] = ratings.role.str.extract(r"(\d+)(?=\.json)").astype(int)

ratings["role"] = ratings["role"].str.rsplit("_", n=1).str[0]

ratings["base_conversation"] = ratings.role.apply(lambda x: "instructions" if "inst-" in x else "interview")

ratings["role"] = ratings["role"].str.split("_",n=1).str[-1].str.replace("_", " ")

ratings["label"] = ratings.idx.apply(lambda x: labels[x])

colors = {
    'Refused safe': 'blue',
    'Refused unsafe': 'red',
    'Accuracy': 'green'
}

In [None]:
ratings = ratings.groupby(['model', 'role', 'pos', "label", "base_conversation"])['rating'].mean().unstack().reset_index()

In [None]:
all_ratings["xstest"] = ratings

In [None]:
# Create a figure with two subplots, sharing the x-axis
fig, axes = plt.subplots(2, 1, figsize=(8, 6), sharex=True)

# --- Subplot 1: Refusal of unsafe queries (label==1) ---
ax1 = axes[0]

# Data for 'no role'
df1_no_role = ratings[(ratings.label == 1) &  (ratings.role.str.contains("empty"))].groupby(['pos']).mean(numeric_only=True).reset_index()
ax1.plot(df1_no_role.pos, df1_no_role.interview, color="orange", marker="o", linestyle="--", label='Persona-directed (baseline)')
ax1.plot(df1_no_role.pos, df1_no_role.instructions, color="blue", marker='x', linestyle="--", label='Goal-oriented (baseline)')

# Data for 'role'
df1_role = ratings[(ratings.label == 1)  & (~ratings.role.str.contains("empty"))].groupby(['pos']).mean(numeric_only=True).reset_index()
ax1.plot(df1_role.pos, df1_role.interview, color="orange", marker='o', label='Persona-directed (persona)')
ax1.plot(df1_role.pos, df1_role.instructions, color="blue", marker='x', label='Goal-oriented (persona)')

ax1.set_title('Refusal of unsafe queries (↑)', fontproperties=bold_font, fontsize=14)
ax1.set_ylabel('Refusal rate')
ax1.grid(True)
#ax1.set_ylim(0.5, 1) # Set y-limit for unsafe queries

# --- Subplot 2: Refusal of safe queries (label==0) ---
ax2 = axes[1]

# Data for 'no role'
df2_no_role = ratings[(ratings.label == 0) & (ratings.role.str.contains("empty"))].groupby(['pos']).mean(numeric_only=True).reset_index()
ax2.plot(df2_no_role.pos, df2_no_role.interview, color="orange", marker="o", linestyle="--", label='Persona-directed (baseline)')
ax2.plot(df2_no_role.pos, df2_no_role.instructions, color="blue", marker='x', linestyle="--", label='Goal-oriented (baseline)')

# Data for 'role'
df2_role = ratings[(ratings.label == 0)  & (~ratings.role.str.contains("empty"))].groupby(['pos']).mean(numeric_only=True).reset_index()
ax2.plot(df2_role.pos, df2_role.interview, color="orange", marker='o', label='Persona-directed (persona)')
ax2.plot(df2_role.pos, df2_role.instructions, color="blue", marker='x', label='Goal-oriented (persona)')

ax2.set_title('Refusal of safe queries (↓)', fontproperties=bold_font, fontsize=14)
ax2.set_xlabel('Conversation round')
ax2.set_ylabel('Refusal rate')
ax2.grid(True)
#ax2.set_ylim(0, 0.5) # Set y-limit for safe queries

# --- Single Legend on Top in Custom Order ---
all_handles, all_labels = ax1.get_legend_handles_labels()
legend_dict = dict(zip(all_labels, all_handles))

desired_order = [
    'Persona-directed (baseline)',
    'Persona-directed (persona)',
    'Goal-oriented (baseline)',
    'Goal-oriented (persona)'
]

ordered_handles = [legend_dict[label] for label in desired_order]
ordered_labels = desired_order

fig.legend(ordered_handles, ordered_labels, loc='upper center', ncol=2, bbox_to_anchor=(0.5, 1.1))


# Adjust layout to prevent titles and labels from overlapping
plt.tight_layout(rect=[0, 0, 1, 0.96]) # Adjust rect to make space for the legend

In [None]:
fig.savefig('../persistent-personas-paper/media/xstest.pdf', dpi=300, bbox_inches="tight")

In [None]:
filtered_df = ratings[~ratings.role.str.contains("empty")]
# Calculate the difference (MAE_interview - MAE_instructions)
filtered_df['rating_difference'] = filtered_df['interview'] - filtered_df['instructions']

In [None]:
np.random.seed(42)

# Define the number of bootstrap samples
n_bootstraps = 1000

# Create an empty list to store the results for each 'pos'
results = []

# Group the DataFrame by 'pos' and iterate through the groups
grouped_pos = filtered_df[filtered_df.label==0].groupby('pos')

for pos, group in grouped_pos:
    # Get the 'MAE_difference' values for the current group
    diffs = group['rating_difference']

    # Create an empty list to store the bootstrap means for this 'pos'
    bootstrap_means = []

    # Perform bootstrapping
    for _ in range(n_bootstraps):
        resampled_diffs = diffs.sample(n=len(diffs), replace=True)
        bootstrap_means.append(resampled_diffs.mean())

    # Calculate the confidence interval
    lower_bound = np.percentile(bootstrap_means, 2.5)
    upper_bound = np.percentile(bootstrap_means, 97.5)

    # Append the results to the list
    results.append({
        'pos': pos,
        'mean_difference': np.mean(diffs),
        'lower_bound_95_ci': lower_bound,
        'upper_bound_95_ci': upper_bound
    })

# Convert the results list to a DataFrame for easy viewing
results_df = pd.DataFrame(results)

# Print the results
df = results_df


In [None]:
plt.figure(figsize=(8, 3))
# Plot the mean differences as a line
plt.plot(df['pos'], df['mean_difference'], color='magenta')

# Fill the area for the 95% confidence interval
plt.fill_between(df['pos'], df['lower_bound_95_ci'], df['upper_bound_95_ci'], color='pink', alpha=0.4)


# Add labels and title
plt.xlabel('Conversation round', fontsize=12)
plt.ylabel('Refusal Rate Difference', fontsize=12)
plt.title('Difference of Refusal of Safe Queries Rate (Persona-directed - Goal-oriented)', fontproperties=bold_font, fontsize=14)

# Add a grid for better readability
plt.grid(True, which='both', linestyle='--', linewidth=0.5)

# Add a legend
#plt.legend()
plt.savefig('../persistent-personas-paper/media/xstest_safe_diff.pdf', dpi=300, bbox_inches="tight")
plt.axhline(y=0, color="black", linestyle="--")


In [None]:
np.random.seed(42)

# Define the number of bootstrap samples
n_bootstraps = 1000

# Create an empty list to store the results for each 'pos'
results = []

# Group the DataFrame by 'pos' and iterate through the groups
grouped_pos = filtered_df[filtered_df.label==1].groupby('pos')

for pos, group in grouped_pos:
    # Get the 'MAE_difference' values for the current group
    diffs = group['rating_difference']

    # Create an empty list to store the bootstrap means for this 'pos'
    bootstrap_means = []

    # Perform bootstrapping
    for _ in range(n_bootstraps):
        resampled_diffs = diffs.sample(n=len(diffs), replace=True)
        bootstrap_means.append(resampled_diffs.mean())

    # Calculate the confidence interval
    lower_bound = np.percentile(bootstrap_means, 2.5)
    upper_bound = np.percentile(bootstrap_means, 97.5)

    # Append the results to the list
    results.append({
        'pos': pos,
        'mean_difference': np.mean(diffs),
        'lower_bound_95_ci': lower_bound,
        'upper_bound_95_ci': upper_bound
    })

# Convert the results list to a DataFrame for easy viewing
results_df = pd.DataFrame(results)

# Print the results
df = results_df


In [None]:
plt.figure(figsize=(8, 3))
# Plot the mean differences as a line
plt.plot(df['pos'], df['mean_difference'], color='magenta')

# Fill the area for the 95% confidence interval
plt.fill_between(df['pos'], df['lower_bound_95_ci'], df['upper_bound_95_ci'], color='pink', alpha=0.4)


# Add labels and title
plt.xlabel('Conversation round', fontsize=12)
plt.ylabel('Refusal Rate Difference', fontsize=12)
plt.title('Difference of Refusal of Unsafe Queries Rate(Persona-directed - Goal-oriented)', fontproperties=bold_font, fontsize=14)

# Add a grid for better readability
plt.grid(True, which='both', linestyle='--', linewidth=0.5)

# Add a legend
#plt.legend()
plt.savefig('../persistent-personas-paper/media/xstest_unsafe_diff.pdf', dpi=300, bbox_inches="tight")
plt.axhline(y=0, color="black", linestyle="--")


In [None]:
df = filtered_df.copy()
baseline_df = df[df['pos'] == 0].set_index('role')

# Use merge to join the baseline values back to the original DataFrame
df_merged = pd.merge(df, baseline_df, how="left", on=['role', "model", "label"], suffixes=('', '_baseline'))

# Calculate the difference for each column
df_merged['persona_diff_from_idx_0'] = df_merged['interview'] - df_merged['interview_baseline']
df_merged['goal_diff_from_idx_0'] = df_merged['instructions'] - df_merged['instructions_baseline']

In [None]:
np.random.seed(42)

# Define the number of bootstrap samples
n_bootstraps = 1000

# Create an empty list to store the results for each 'pos'
results = []

# Group the DataFrame by 'pos' and iterate through the groups
grouped_pos = df_merged[df_merged.label==0].groupby('pos')

for pos, group in grouped_pos:
    # Get the 'MAE_difference' values for the current group
    diffs = group['persona_diff_from_idx_0']

    # Create an empty list to store the bootstrap means for this 'pos'
    bootstrap_means = []

    # Perform bootstrapping
    for _ in range(n_bootstraps):
        resampled_diffs = diffs.sample(n=len(diffs), replace=True)
        bootstrap_means.append(resampled_diffs.mean())

    # Calculate the confidence interval
    lower_bound = np.percentile(bootstrap_means, 2.5)
    upper_bound = np.percentile(bootstrap_means, 97.5)

    # Append the results to the list
    results.append({
        'pos': pos,
        'mean_difference': np.mean(diffs),
        'lower_bound_95_ci': lower_bound,
        'upper_bound_95_ci': upper_bound
    })

# Convert the results list to a DataFrame for easy viewing
results_df = pd.DataFrame(results)
display((results_df["lower_bound_95_ci"]*results_df["upper_bound_95_ci"] > 0) & (results_df["mean_difference"] <0)  )
persona_df = results_df

In [None]:
np.random.seed(42)

# Define the number of bootstrap samples
n_bootstraps = 1000

# Create an empty list to store the results for each 'pos'
results = []

# Group the DataFrame by 'pos' and iterate through the groups
grouped_pos = df_merged[df_merged.label==0].groupby('pos')

for pos, group in grouped_pos:
    # Get the 'MAE_difference' values for the current group
    diffs = group['goal_diff_from_idx_0']

    # Create an empty list to store the bootstrap means for this 'pos'
    bootstrap_means = []

    # Perform bootstrapping
    for _ in range(n_bootstraps):
        resampled_diffs = diffs.sample(n=len(diffs), replace=True)
        bootstrap_means.append(resampled_diffs.mean())

    # Calculate the confidence interval
    lower_bound = np.percentile(bootstrap_means, 2.5)
    upper_bound = np.percentile(bootstrap_means, 97.5)

    # Append the results to the list
    results.append({
        'pos': pos,
        'mean_difference': np.mean(diffs),
        'lower_bound_95_ci': lower_bound,
        'upper_bound_95_ci': upper_bound
    })

# Convert the results list to a DataFrame for easy viewing
results_df = pd.DataFrame(results)
display((results_df["lower_bound_95_ci"]*results_df["upper_bound_95_ci"] > 0) & (results_df["mean_difference"] <0)  )
goal_df = results_df

In [None]:
plt.figure(figsize=(8,3))
df = results_df
# Plot the mean differences as a line
plt.plot(persona_df['pos'], persona_df['mean_difference'], color='orange', label="Persona-directed")

# Fill the area for the 95% confidence interval
plt.fill_between(persona_df['pos'], persona_df['lower_bound_95_ci'], persona_df['upper_bound_95_ci'], color='orange', alpha=0.4)

# Plot the mean differences as a line
plt.plot(goal_df['pos'], goal_df['mean_difference'], color='blue', label="Goal Oriented")

# Fill the area for the 95% confidence interval
plt.fill_between(goal_df['pos'], goal_df['lower_bound_95_ci'], goal_df['upper_bound_95_ci'], color='blue', alpha=0.4)

# Add labels and title
plt.xlabel('Conversation round', fontsize=12)
plt.ylabel('Refusal Rate Difference', fontsize=12)
plt.title('Difference in Refusal Rate of Safe Queries (compared with round 0)', fontproperties=bold_font, fontsize=14)
plt.axhline(y=0, color="black", linestyle="--")

# Add a grid for better readability
plt.grid(True, which='both', linestyle='--', linewidth=0.5)

# Add a legend
plt.legend()
plt.savefig('../persistent-personas-paper/media/xstest_safe_rate_change.pdf', dpi=300, bbox_inches="tight")

In [None]:
np.random.seed(42)

# Define the number of bootstrap samples
n_bootstraps = 1000

# Create an empty list to store the results for each 'pos'
results = []

# Group the DataFrame by 'pos' and iterate through the groups
grouped_pos = df_merged[df_merged.label==1].groupby('pos')

for pos, group in grouped_pos:
    # Get the 'MAE_difference' values for the current group
    diffs = group['persona_diff_from_idx_0']

    # Create an empty list to store the bootstrap means for this 'pos'
    bootstrap_means = []

    # Perform bootstrapping
    for _ in range(n_bootstraps):
        resampled_diffs = diffs.sample(n=len(diffs), replace=True)
        bootstrap_means.append(resampled_diffs.mean())

    # Calculate the confidence interval
    lower_bound = np.percentile(bootstrap_means, 2.5)
    upper_bound = np.percentile(bootstrap_means, 97.5)

    # Append the results to the list
    results.append({
        'pos': pos,
        'mean_difference': np.mean(diffs),
        'lower_bound_95_ci': lower_bound,
        'upper_bound_95_ci': upper_bound
    })

# Convert the results list to a DataFrame for easy viewing
results_df = pd.DataFrame(results)
display((results_df["lower_bound_95_ci"]*results_df["upper_bound_95_ci"] > 0) & (results_df["mean_difference"] <0)  )
persona_df = results_df

In [None]:
np.random.seed(42)

# Define the number of bootstrap samples
n_bootstraps = 1000

# Create an empty list to store the results for each 'pos'
results = []

# Group the DataFrame by 'pos' and iterate through the groups
grouped_pos = df_merged[df_merged.label==1].groupby('pos')

for pos, group in grouped_pos:
    # Get the 'MAE_difference' values for the current group
    diffs = group['goal_diff_from_idx_0']

    # Create an empty list to store the bootstrap means for this 'pos'
    bootstrap_means = []

    # Perform bootstrapping
    for _ in range(n_bootstraps):
        resampled_diffs = diffs.sample(n=len(diffs), replace=True)
        bootstrap_means.append(resampled_diffs.mean())

    # Calculate the confidence interval
    lower_bound = np.percentile(bootstrap_means, 2.5)
    upper_bound = np.percentile(bootstrap_means, 97.5)

    # Append the results to the list
    results.append({
        'pos': pos,
        'mean_difference': np.mean(diffs),
        'lower_bound_95_ci': lower_bound,
        'upper_bound_95_ci': upper_bound
    })

# Convert the results list to a DataFrame for easy viewing
results_df = pd.DataFrame(results)
display((results_df["lower_bound_95_ci"]*results_df["upper_bound_95_ci"] > 0) & (results_df["mean_difference"] <0)  )
goal_df = results_df

In [None]:
plt.figure(figsize=(8, 3))
df = results_df
# Plot the mean differences as a line
plt.plot(persona_df['pos'], persona_df['mean_difference'], color='orange', label="Persona-directed")

# Fill the area for the 95% confidence interval
plt.fill_between(persona_df['pos'], persona_df['lower_bound_95_ci'], persona_df['upper_bound_95_ci'], color='orange', alpha=0.4)

# Plot the mean differences as a line
plt.plot(goal_df['pos'], goal_df['mean_difference'], color='blue', label="Goal Oriented")

# Fill the area for the 95% confidence interval
plt.fill_between(goal_df['pos'], goal_df['lower_bound_95_ci'], goal_df['upper_bound_95_ci'], color='blue', alpha=0.4)

# Add labels and title
plt.xlabel('Conversation round', fontsize=12)
plt.ylabel('Refusal Rate Difference', fontsize=12)
plt.title('Difference in Refusal Rate of Unsafe Queries (compared with round 0)', fontproperties=bold_font, fontsize=14)
plt.axhline(y=0, color="black", linestyle="--")

# Add a grid for better readability
plt.grid(True, which='both', linestyle='--', linewidth=0.5)

# Add a legend
plt.legend()
plt.savefig('../persistent-personas-paper/media/xstest_unsafe_rate_change.pdf', dpi=300, bbox_inches="tight")

In [None]:
df = ratings[(~ratings.role.str.contains("empty"))].copy()
baseline_df =  ratings[(ratings.role.str.contains("empty"))].copy()
# Use merge to join the baseline values back to the original DataFrame
df_merged = pd.merge(df, baseline_df, how="left", on=["model", "pos", "label"], suffixes=('', '_baseline'))

# Calculate the difference for each column
df_merged['persona_diff_from_idx_0'] = df_merged['interview'] - df_merged['interview_baseline']
df_merged['goal_diff_from_idx_0'] = df_merged['instructions'] - df_merged['instructions_baseline']

In [None]:
np.random.seed(42)

# Define the number of bootstrap samples
n_bootstraps = 1000

# Create an empty list to store the results for each 'pos'
results = []

# Group the DataFrame by 'pos' and iterate through the groups
grouped_pos = df_merged[df_merged.label==0].groupby('pos')

for pos, group in grouped_pos:
    # Get the 'MAE_difference' values for the current group
    diffs = group['persona_diff_from_idx_0']

    # Create an empty list to store the bootstrap means for this 'pos'
    bootstrap_means = []

    # Perform bootstrapping
    for _ in range(n_bootstraps):
        resampled_diffs = diffs.sample(n=len(diffs), replace=True)
        bootstrap_means.append(resampled_diffs.mean())

    # Calculate the confidence interval
    lower_bound = np.percentile(bootstrap_means, 2.5)
    upper_bound = np.percentile(bootstrap_means, 97.5)

    # Append the results to the list
    results.append({
        'pos': pos,
        'mean_difference': np.mean(diffs),
        'lower_bound_95_ci': lower_bound,
        'upper_bound_95_ci': upper_bound
    })

# Convert the results list to a DataFrame for easy viewing
results_df = pd.DataFrame(results)
display((results_df["lower_bound_95_ci"]*results_df["upper_bound_95_ci"] > 0) & (results_df["mean_difference"] <0)  )
persona_df = results_df

In [None]:
np.random.seed(42)

# Define the number of bootstrap samples
n_bootstraps = 1000

# Create an empty list to store the results for each 'pos'
results = []

# Group the DataFrame by 'pos' and iterate through the groups
grouped_pos = df_merged[df_merged.label==0].groupby('pos')

for pos, group in grouped_pos:
    # Get the 'MAE_difference' values for the current group
    diffs = group['goal_diff_from_idx_0']

    # Create an empty list to store the bootstrap means for this 'pos'
    bootstrap_means = []

    # Perform bootstrapping
    for _ in range(n_bootstraps):
        resampled_diffs = diffs.sample(n=len(diffs), replace=True)
        bootstrap_means.append(resampled_diffs.mean())

    # Calculate the confidence interval
    lower_bound = np.percentile(bootstrap_means, 2.5)
    upper_bound = np.percentile(bootstrap_means, 97.5)

    # Append the results to the list
    results.append({
        'pos': pos,
        'mean_difference': np.mean(diffs),
        'lower_bound_95_ci': lower_bound,
        'upper_bound_95_ci': upper_bound
    })

# Convert the results list to a DataFrame for easy viewing
results_df = pd.DataFrame(results)
display((results_df["lower_bound_95_ci"]*results_df["upper_bound_95_ci"] > 0) & (results_df["mean_difference"] <0)  )
goal_df = results_df

In [None]:
plt.figure(figsize=(8, 3))
df = results_df
# Plot the mean differences as a line
plt.plot(persona_df['pos'], persona_df['mean_difference'], color='orange', label="Persona-directed")

# Fill the area for the 95% confidence interval
plt.fill_between(persona_df['pos'], persona_df['lower_bound_95_ci'], persona_df['upper_bound_95_ci'], color='orange', alpha=0.4)

# Plot the mean differences as a line
plt.plot(goal_df['pos'], goal_df['mean_difference'], color='blue', label="Goal Oriented")

# Fill the area for the 95% confidence interval
plt.fill_between(goal_df['pos'], goal_df['lower_bound_95_ci'], goal_df['upper_bound_95_ci'], color='blue', alpha=0.4)

# Add labels and title
plt.xlabel('Conversation round', fontsize=12)
plt.ylabel('Refusal Rate Difference', fontsize=12)
plt.title('Difference in Refusal Rate of Safe Queries (over baseline)', fontproperties=bold_font, fontsize=14)
plt.axhline(y=0, color="black", linestyle="--")

# Add a grid for better readability
plt.grid(True, which='both', linestyle='--', linewidth=0.5)

# Add a legend
plt.legend()
plt.savefig('../persistent-personas-paper/media/xstest_safe_refusal_diff_baseline.pdf', dpi=300, bbox_inches="tight")

In [None]:
np.random.seed(42)

# Define the number of bootstrap samples
n_bootstraps = 1000

# Create an empty list to store the results for each 'pos'
results = []

# Group the DataFrame by 'pos' and iterate through the groups
grouped_pos = df_merged[df_merged.label==1].groupby('pos')

for pos, group in grouped_pos:
    # Get the 'MAE_difference' values for the current group
    diffs = group['persona_diff_from_idx_0']

    # Create an empty list to store the bootstrap means for this 'pos'
    bootstrap_means = []

    # Perform bootstrapping
    for _ in range(n_bootstraps):
        resampled_diffs = diffs.sample(n=len(diffs), replace=True)
        bootstrap_means.append(resampled_diffs.mean())

    # Calculate the confidence interval
    lower_bound = np.percentile(bootstrap_means, 2.5)
    upper_bound = np.percentile(bootstrap_means, 97.5)

    # Append the results to the list
    results.append({
        'pos': pos,
        'mean_difference': np.mean(diffs),
        'lower_bound_95_ci': lower_bound,
        'upper_bound_95_ci': upper_bound
    })

# Convert the results list to a DataFrame for easy viewing
results_df = pd.DataFrame(results)
display((results_df["lower_bound_95_ci"]*results_df["upper_bound_95_ci"] > 0) & (results_df["mean_difference"] <0)  )
persona_df = results_df

In [None]:
np.random.seed(42)

# Define the number of bootstrap samples
n_bootstraps = 1000

# Create an empty list to store the results for each 'pos'
results = []

# Group the DataFrame by 'pos' and iterate through the groups
grouped_pos = df_merged[df_merged.label==1].groupby('pos')

for pos, group in grouped_pos:
    # Get the 'MAE_difference' values for the current group
    diffs = group['goal_diff_from_idx_0']

    # Create an empty list to store the bootstrap means for this 'pos'
    bootstrap_means = []

    # Perform bootstrapping
    for _ in range(n_bootstraps):
        resampled_diffs = diffs.sample(n=len(diffs), replace=True)
        bootstrap_means.append(resampled_diffs.mean())

    # Calculate the confidence interval
    lower_bound = np.percentile(bootstrap_means, 2.5)
    upper_bound = np.percentile(bootstrap_means, 97.5)

    # Append the results to the list
    results.append({
        'pos': pos,
        'mean_difference': np.mean(diffs),
        'lower_bound_95_ci': lower_bound,
        'upper_bound_95_ci': upper_bound
    })

# Convert the results list to a DataFrame for easy viewing
results_df = pd.DataFrame(results)
display((results_df["lower_bound_95_ci"]*results_df["upper_bound_95_ci"] > 0) & (results_df["mean_difference"] <0)  )
goal_df = results_df

In [None]:
plt.figure(figsize=(8,3))
df = results_df
# Plot the mean differences as a line
plt.plot(persona_df['pos'], persona_df['mean_difference'], color='orange', label="Persona-directed")

# Fill the area for the 95% confidence interval
plt.fill_between(persona_df['pos'], persona_df['lower_bound_95_ci'], persona_df['upper_bound_95_ci'], color='orange', alpha=0.4)

# Plot the mean differences as a line
plt.plot(goal_df['pos'], goal_df['mean_difference'], color='blue', label="Goal Oriented")

# Fill the area for the 95% confidence interval
plt.fill_between(goal_df['pos'], goal_df['lower_bound_95_ci'], goal_df['upper_bound_95_ci'], color='blue', alpha=0.4)

# Add labels and title
plt.xlabel('Conversation round', fontsize=12)
plt.ylabel('Refusal Rate Difference', fontsize=12)
plt.title('Difference in Refusal Rate of Unsafe Queries (over baseline)', fontproperties=bold_font, fontsize=14)
plt.axhline(y=0, color="black", linestyle="--")

# Add a grid for better readability
plt.grid(True, which='both', linestyle='--', linewidth=0.5)

# Add a legend
plt.legend()
plt.savefig('../persistent-personas-paper/media/xstest_unsafe_refusal_diff_baseline.pdf', dpi=300, bbox_inches="tight")

### Regression plots (length)

In [None]:
import pickle

In [None]:
path = Path("./results/all_ratings.pkl")
if not path.exists():
    pickle.dump(all_ratings, open(path, "wb"))
else:
    all_ratings = pickle.load( open(path, "rb"))

In [None]:
all_ratings.keys()

In [None]:
dialogue_df = all_ratings["dialogue"].groupby([x for x in all_ratings["dialogue"] if x not in ["metric", "persona", "goal", "diff"]]).mean(numeric_only=True).reset_index()

In [None]:
dialogue_df = pd.melt(dialogue_df,
                    id_vars=['model', 'role', 'idx', 'diff'],
                    value_vars=['persona', 'goal'],
                    var_name='original_column',
                    value_name='rating')

In [None]:
dialogue_df.rating = (dialogue_df.rating - 1)/4

In [None]:
dialogue_df = dialogue_df.drop(columns=["diff"])

In [None]:
dialogue_df = dialogue_df.rename(columns={"idx": "pos", "original_column": "base_conversation"})

In [None]:
dialogue_df.base_conversation = dialogue_df.base_conversation.apply(lambda x: "interview" if x == "persona" else "instructions")

In [None]:
dialogue_df.rating *= 100

In [None]:
dialogue_df.sort_values(["model", "role", "pos"])

In [None]:
bfi_df = all_ratings["bfi"][["model", "role", "pos", "base_conversation", "MAE"]]

In [None]:
bfi_df = bfi_df.rename(columns={"MAE": "rating"})

In [None]:
bfi_df.loc[~(bfi_df.role.str.contains("empty")),"rating"] *=100

In [None]:
bfi_df.loc[(bfi_df.role.str.contains("empty")),"rating"] = bfi_df[bfi_df.role.str.contains("empty")]["rating"].apply(lambda x: {k:v*100 for k,v in x.items()})

In [None]:
role_df = all_ratings["role_specific"]
role_df = role_df[role_df["reference"]=="dataset"]

In [None]:
role_df = role_df.drop(columns="reference")

In [None]:
role_df = pd.melt(role_df,
                    id_vars=['model', 'role', 'pos'],
                    value_vars=['instructions', 'interview'],
                    var_name='base_conversation',
                    value_name='rating')

In [None]:
role_df.rating*=100

In [None]:
general_df = all_ratings["general"]

In [None]:
general_df = general_df[general_df.reference=="dataset"].drop(columns="reference")

In [None]:
general_df = pd.melt(general_df,
                    id_vars=['model', 'role', 'pos'],
                    value_vars=['instructions', 'interview'],
                    var_name='base_conversation',
                    value_name='rating')

In [None]:
general_df.rating *= 100

In [None]:
ifbench_df = all_ratings["ifbench"]

In [None]:
ifbench_df = pd.melt(ifbench_df,
                    id_vars=['model', 'role', 'pos'],
                    value_vars=['instructions', 'interview'],
                    var_name='base_conversation',
                    value_name='rating')

In [None]:
ifbench_df.rating *=100

In [None]:
xstest_df = all_ratings["xstest"]

In [None]:
safety_df = pd.melt(xstest_df[xstest_df.label==1],
                    id_vars=['model', 'role', 'pos'],
                    value_vars=['instructions', 'interview'],
                    var_name='base_conversation',
                    value_name='rating')

In [None]:
safety_df.rating *=100

In [None]:
excess_df = pd.melt(xstest_df[xstest_df.label==0],
                    id_vars=['model', 'role', 'pos'],
                    value_vars=['instructions', 'interview'],
                    var_name='base_conversation',
                    value_name='rating')

In [None]:
excess_df.rating *= 100

In [None]:
# Hardcode model order with Gemini as its own family at the end
model_order = [
"gemma-3-4b-it", "gemma-3-27b-it",
"Llama-3.1-Nemotron-Nano-8B-v1", "Llama-3_3-Nemotron-Super-49B-v1",
"Qwen3-4B-Instruct-2507", "Qwen3-30B-A3B-Instruct-2507",
"gemini-2.5-flash"
]
model_name_map = {
"Llama-3.1-Nemotron-Nano-8B-v1": "Nemotron-8B",
"Llama-3_3-Nemotron-Super-49B-v1": "Nemotron-49B",
"gemma-3-4b-it": "Gemma3-4B",
"gemma-3-27b-it": "Gemma3-27B",
"Qwen3-4B-Instruct-2507": "Qwen3-4B",
"Qwen3-30B-A3B-Instruct-2507": "Qwen3-30B",
"gemini-2.5-flash": "gemini-2.5-flash"
}
model_size_map = {
"Llama-3.1-Nemotron-Nano-8B-v1": 1,
"Llama-3_3-Nemotron-Super-49B-v1": 2,
"gemma-3-4b-it": 1,
"gemma-3-27b-it": 2,
"Qwen3-4B-Instruct-2507": 1,
"Qwen3-30B-A3B-Instruct-2507": 2,
"gemini-2.5-flash": 3
}
model_family_map = {
"Llama-3.1-Nemotron-Nano-8B-v1": "Nemotron",
"Llama-3_3-Nemotron-Super-49B-v1": "Nemotron",
"gemma-3-4b-it": "Gemma",
"gemma-3-27b-it": "Gemma",
"Qwen3-4B-Instruct-2507": "Qwen",
"Qwen3-30B-A3B-Instruct-2507": "Qwen",
"gemini-2.5-flash": "Gemini"
}

In [None]:
# --- Function to bootstrap CI ---
def bootstrap_ci(data, n_bootstrap=5000, ci=95, random_state=42):
    """
    Bootstrap confidence intervals for the mean of data.
    Returns mean, lower_ci, upper_ci.
    """
    rng = np.random.default_rng(random_state)
    boot_means = []
    for _ in range(n_bootstrap):
        sample = rng.choice(data, size=len(data), replace=True)
        boot_means.append(sample.mean())
    mean = np.mean(boot_means)
    lower = np.percentile(boot_means, (100 - ci) / 2)
    upper = np.percentile(boot_means, 100 - (100 - ci) / 2)
    return mean, lower, upper


# --- Plotting function ---
def plot_with_bootstrap(
    diff_dfs, model_name_map, bold_font, colors=None, split_by_base=False
):
    """
    Plot bootstrap CIs of diff across multiple datasets.
    
    diff_dfs: dict of {dataset_name: dataframe}
    model_name_map: dict mapping original model names to plot order
    bold_font: matplotlib fontproperties
    colors: list of bar colors (default two contrasting colors)
    split_by_base: if True, create separate bars for each base_conversation
    """
    nrows = len(diff_dfs)
    figsize = (8, 3 * nrows)  # scale height by number of rows
    fig, axes = plt.subplots(nrows=nrows, ncols=1, figsize=figsize, sharex=True)

    if nrows == 1:
        axes = [axes]

    if colors is None:
        colors = ["#D3242D4B", "#A9A9A9"]

    # Map base_conversation labels
    base_map = {
        "interview": "Persona-directed",
        "instructions": "Goal-oriented",
    }
    base_order = ["Persona-directed", "Goal-oriented"]

    for idx, (dataset, df) in enumerate(diff_dfs.items()):
        ax = axes[idx]

        results = []

        if split_by_base:
            # Relabel base_conversation
            df = df.copy()
            df["base_conversation"] = df["base_conversation"].map(base_map)

            # group by model + base_conversation
            for (model, base), group in df.groupby(["model", "base_conversation"]):
                mean, low, high = bootstrap_ci(group["diff"].values)
                results.append({
                    "model": model,
                    "base_conversation": base,
                    "mean": mean,
                    "low": low,
                    "high": high
                })
            df_boot = pd.DataFrame(results)

            # Ensure consistent ordering of models + bases
            df_boot["model"] = pd.Categorical(
                df_boot["model"], categories=list(model_name_map.values()), ordered=True
            )
            df_boot["base_conversation"] = pd.Categorical(
                df_boot["base_conversation"], categories=base_order, ordered=True
            )
            df_boot = df_boot.sort_values(["model", "base_conversation"])

            # Pivot so each base conversation is a separate column
            df_plot = df_boot.pivot(index="model", columns="base_conversation", values="mean")

            # Plot grouped bars
            df_plot.plot(
                kind="bar",
                ax=ax,
                color=colors[:len(df_plot.columns)],
                width=0.8,
                legend=False  # suppress auto-legend here
            )

            # Add error bars manually
            n_bases = len(df_plot.columns)
            for i, model in enumerate(df_plot.index):
                for j, base in enumerate(df_plot.columns):
                    row = df_boot[(df_boot["model"] == model) & (df_boot["base_conversation"] == base)].iloc[0]
                    ax.errorbar(
                        x=i + (j - (n_bases-1)/2) * (0.8 / n_bases),
                        y=row["mean"],
                        yerr=[[row["mean"] - row["low"]], [row["high"] - row["mean"]]],
                        fmt="none",
                        ecolor="black",
                        capsize=4,
                        elinewidth=1,
                    )

        else:
            # group by model only
            for model, group in df.groupby("model"):
                mean, low, high = bootstrap_ci(group["diff"].values)
                results.append({"model": model, "mean": mean, "low": low, "high": high})

            df_boot = pd.DataFrame(results).set_index("model")
            df_boot = df_boot.reindex(list(model_name_map.values()))

            bars = ax.bar(df_boot.index, df_boot["mean"], color=colors[0], width=0.8)

            # Add error bars
            ax.errorbar(
                x=np.arange(len(df_boot.index)),
                y=df_boot["mean"],
                yerr=[df_boot["mean"] - df_boot["low"], df_boot["high"] - df_boot["mean"]],
                fmt="none", ecolor="black", capsize=4, elinewidth=1,
            )

        # --- Subplot Customization ---
        ax.set_title(dataset.capitalize(), fontproperties=bold_font, fontsize=14, pad=10)
        ax.set_ylabel("Avg. Diff", fontsize=12)
        ax.grid(axis="y", linestyle="--", alpha=0.7)
        ax.set_xlabel("")

    # Bottom subplot x-labels
    bottom_ax = axes[-1]
    bottom_ax.tick_params(axis="x", rotation=45, labelsize=11)
    bottom_ax.set_xlabel("Model")

    # Add a single shared legend on top if splitting by base
    if split_by_base:
        fig.legend(
            labels=base_order,
            loc="upper center",
            bbox_to_anchor=(0.5, .915),
            ncol=len(base_order),
            fontsize=12,
            frameon=False,
        )

    #plt.tight_layout(rect=[0, 0, 1, 0.96])
    return fig, axes

In [None]:
diff_dfs = {}
for df, dataset in  zip([dialogue_df, bfi_df, role_df, general_df, ifbench_df, safety_df, excess_df], ["dialogue", "bfi", "role", "general", "ifbench", "safety", "excess"]):
    df = df.copy()
    df = df[~df.role.str.contains("empty")]
    df["size"] = df.model.apply(lambda x: model_size_map[x])
    df["family"] = df.model.apply(lambda x: model_family_map[x])
    df["model"] = df.model.apply(lambda x: model_name_map[x])
    df["roleFamily"] = df.role + df.family
    positions = np.unique(df.pos.sort_values())
    df_first = df[df.pos==positions[0]].copy()
    df_last = df[df.pos==positions[-1]].copy()
    df_last["diff"] = (df_last.rating.values - df_first.rating.values).astype("float")
    md = smf.mixedlm("diff ~ size", df_last, groups=df_last["roleFamily"]) 
    mdf = md.fit(method=["powell", "lbfgs"])
    coefs = mdf.summary().tables[1]
    coefs["Coef."] = coefs["Coef."].astype("float")
    display(dataset)
    display(coefs)
    diff_dfs[dataset] = df_last

In [None]:
fig, axes = plot_with_bootstrap(diff_dfs, model_name_map, bold_font, split_by_base=True, colors=["orange", "blue"])

In [None]:
fig.savefig('../persistent-personas-paper/media/length_cost.pdf', dpi=300, bbox_inches="tight")

In [None]:
diff_dfs = {}
for df, dataset in  zip([general_df, ifbench_df, safety_df, excess_df], ["general", "ifbench", "safety", "excess"]):
    df = df.copy()
    df = df[df.pos==0]
    df["size"] = df.model.apply(lambda x: model_size_map[x])
    df["family"] = df.model.apply(lambda x: model_family_map[x])
    df["model"] = df.model.apply(lambda x: model_name_map[x])
    df["roleFamily"] = df.role + df.family
    positions = np.unique(df.pos.sort_values())
    df_roles = df[~df.role.str.contains("empty")].copy()
    df_no_roles = df[df.role.str.contains("empty")].copy()
    df_merge = pd.merge(df_roles, df_no_roles, how="left", on=["model", "pos", "base_conversation"], suffixes=("", "_no_role"))
    df_merge["diff"] = (df_merge.rating.values - df_merge.rating_no_role.values).astype("float")
    md = smf.mixedlm("diff ~ size", df_merge, groups=df_merge["roleFamily"]) 
    mdf = md.fit(method=["powell", "lbfgs"])
    coefs = mdf.summary().tables[1]
    coefs["Coef."] = coefs["Coef."].astype("float")
    display(dataset)
    display(coefs)
    diff_dfs[dataset] = df_merge

In [None]:
fig, axes = plot_with_bootstrap(diff_dfs, model_name_map, bold_font)

In [None]:
fig.savefig('../persistent-personas-paper/media/personalization_cost.pdf', dpi=300, bbox_inches="tight")

In [None]:
diff_dfs = {}
for df, dataset in  zip([dialogue_df, bfi_df, role_df, general_df, ifbench_df, safety_df, excess_df], ["dialogue", "bfi", "role", "general", "ifbench", "safety", "excess"]):
    df = df.copy()
    df = df[~df.role.str.contains("empty")]
    df["size"] = df.model.apply(lambda x: model_size_map[x])
    df["family"] = df.model.apply(lambda x: model_family_map[x])
    df["model"] = df.model.apply(lambda x: model_name_map[x])
    df_persona = df[df.base_conversation == "interview"].copy()
    df_goal = df[df.base_conversation == "instructions"].copy()
    df_merge = pd.merge(df_persona, df_goal, how="left", on=["model", "pos", "role"], suffixes=("", "_goal"))
    df_merge["diff"] = (df_merge.rating.values - df_merge.rating_goal.values).astype("float")
    diff_dfs[dataset] = df_merge

In [None]:
fig, axes = plot_with_bootstrap(diff_dfs, model_name_map, bold_font)

In [None]:
fig.savefig('../persistent-personas-paper/media/dialogue_diff.pdf', dpi=300, bbox_inches="tight")

In [None]:
data_dfs = [dialogue_df, bfi_df, role_df, general_df, ifbench_df, safety_df, excess_df]
datasets = ["Dialogue metrics", "BFI error", "Persona Instructions Quality", "General Instructions Quality", "IFBench Acc.", "Safety", "Excess Safety"]

# Create the figure and subplots with 7 rows and 7 columns
fig, axes = plt.subplots(nrows=7, ncols=7, figsize=(21, 21), sharex=True)

# Loop through models for rows
for row_idx, model in enumerate(model_order):
    # Loop through datasets for columns
    for col_idx, (data_df, dataset) in enumerate(zip(data_dfs, datasets)):
        data_df = data_df.copy()
        if dataset == "BFI error":
            data_df.loc[data_df.role.str.contains("empty"), "rating"] = data_df[data_df.role.str.contains("empty")]["rating"].apply(lambda x: np.mean(list(x.values())))
        ratings = data_df.groupby(["model", "pos","role", "base_conversation"])['rating'].mean().unstack(fill_value=0).reset_index().copy()
        ax = axes[row_idx, col_idx]

        # Filter data for the specific model and remove empty roles
        df = ratings[(ratings.model == model)  & (~ratings.role.str.contains("empty"))].groupby(['pos']).mean(numeric_only=True).reset_index()

        # Plot for 'interview' (Persona-directed)
        ax.plot(df.pos, df.interview, 
                     color="orange", marker='o', label='Persona-directed')
        

        # Plot horizontal line
        #plt.axhline(y=0.5, color='black', linestyle='--')
        
        # Calculate mean and 95% CI for the dataset reference group
        
        ax.plot(df.pos, df.instructions, 
                     color="blue", marker='x', label='Goal-oriented')
        df = ratings[(ratings.model == model)  & (ratings.role.str.contains("empty"))].groupby(['pos']).mean(numeric_only=True).reset_index()

        # Plot for 'instructions' (Goal-oriented)
        ax.plot(df.pos, df.interview, 
                 color="orange", marker='o', linestyle='--', label='Persona-directed (baseline)')
        ax.plot(df.pos, df.instructions, 
                 color="blue", marker='x', linestyle='--', label='Goal-oriented (baseline)')
        
        ax.grid(True)
        
        # Set titles and labels for the grid
        if row_idx == 0:
            ax.set_title(dataset, fontproperties=bold_font, fontsize=14, pad=20)
        
        if col_idx == 0:
            ax.set_ylabel(f'{model_name_map[model]}', fontproperties=bold_font, fontsize=14, rotation=0, ha='right')
            ax.yaxis.set_label_coords(-0.2, 0.5)

        if row_idx == 6:
            ax.set_xlabel('Conversation round', fontsize=12)

# Get handles and labels from a sample plot to create the figure-level legend
handles, labels = axes[0, 0].get_legend_handles_labels()
fig.legend(handles, labels, loc='upper center', bbox_to_anchor=(0.5, .99), ncol=2, fontsize=16)

plt.tight_layout(rect=[0, 0, 1, 0.95])
plt.show()

In [None]:
fig.savefig('../persistent-personas-paper/media/results_per_model.pdf', dpi=300, bbox_inches="tight")

In [None]:
dialogue_df.role = dialogue_df.role.str.replace("_", " ")

In [None]:
roles = [x for x in np.unique(general_df.role) if "empty" not in x]

In [None]:
data_dfs = [dialogue_df, bfi_df, role_df, general_df, ifbench_df, safety_df, excess_df]
datasets = ["Dialogue metrics", "BFI error", "Persona Instructions Quality", "General Instructions Quality", "IFBench Acc.", "Safety", "Excess Safety"]

# Create the figure and subplots with 7 rows and 7 columns
fig, axes = plt.subplots(nrows=8, ncols=7, figsize=(24, 21), sharex=True)

# Loop through models for rows
for row_idx, role in enumerate(roles):
    # Loop through datasets for columns
    for col_idx, (data_df, dataset) in enumerate(zip(data_dfs, datasets)):
        if role not in data_df.role.values:
            continue
        data_df = data_df.copy()
        if dataset == "BFI error":
            data_df.loc[data_df.role.str.contains("empty"), "rating"] = data_df[data_df.role.str.contains("empty")]["rating"].apply(lambda x: x[role])
        ratings = data_df.groupby(["pos","role", "base_conversation"])['rating'].mean().unstack(fill_value=0).reset_index().copy()
        ax = axes[row_idx, col_idx]
        # Filter data for the specific model and remove empty roles
        df = ratings[(ratings.role == role)].groupby(['pos']).mean(numeric_only=True).reset_index()
        # Plot for 'interview' (Persona-directed)
        ax.plot(df.pos, df.interview, 
                     color="orange", marker='o', label='Persona-directed')
        
        # Plot horizontal line
        #plt.axhline(y=0.5, color='black', linestyle='--')
        
        # Calculate mean and 95% CI for the dataset reference group
        
        ax.plot(df.pos, df.instructions, 
                     color="blue", marker='x', label='Goal-oriented')
        
        df = ratings[ratings.role.str.contains("empty")].groupby(['pos']).mean(numeric_only=True).reset_index()
        # Plot for 'interview' (Persona-directed)
        ax.plot(df.pos, df.interview, 
                     color="orange", marker='o', label='Persona-directed (baseline)', linestyle='--')
        ax.plot(df.pos, df.instructions, 
                     color="blue", marker='x', label='Goal-oriented (baseline)', linestyle='--')
        ax.grid(True)
        
        # Set titles and labels for the grid
        if row_idx == 0:
            ax.set_title(dataset, fontproperties=bold_font, fontsize=14, pad=20)
        
        if col_idx == 0:
            ax.set_ylabel(f'{role}', fontproperties=bold_font, fontsize=14, rotation=0, ha='right')
            ax.yaxis.set_label_coords(-0.2, 0.5)

        if row_idx == 6:
            ax.set_xlabel('Conversation round', fontsize=12)

# Get handles and labels from a sample plot to create the figure-level legend
handles, labels = axes[0, 0].get_legend_handles_labels()
fig.legend(handles, labels, loc='upper center', bbox_to_anchor=(0.5, .99), ncol=2, fontsize=16)

plt.tight_layout(rect=[0, 0, 1, 0.95])
plt.show()

In [None]:
fig.savefig('../persistent-personas-paper/media/results_per_role.pdf', dpi=300, bbox_inches="tight")

In [None]:
color_map = {
    "Persona-directed": "orange",
    "Goal-oriented": "blue"
}

In [None]:
fig, axes = plt.subplots(nrows=4, ncols=2, figsize=(16, 12), sharex=True)
axes = axes.flatten()  # Flatten the 4x2 array of axes into a 1D array for easier iteration

# The first subplot (axes[0]) will be used for the legend, so the loop starts at index 1
for i, (data_df, dataset) in enumerate(zip([dialogue_df, bfi_df, role_df, general_df, ifbench_df, safety_df, excess_df],
                                           ["Dialogue metrics", "BFI error", "Role Specific Instructions Quality", 
                                            "General Instructions Quality", "IFBench Accuracy", "Safety", "Excess Safety"])):
    
    # Select the current subplot axis
    ax = axes[i + 1]
    
    data_df = data_df.copy()
    coeffs = {}
    
    # Check if there's enough data to perform the mixed model analysis
    if len(np.unique(data_df.model)) > 1 and len(np.unique(data_df.base_conversation)) > 1:
        for model in np.unique(data_df.model):
            for dialogue in ["interview", "instructions"]:
                df = data_df[(data_df.model == model) & (data_df.base_conversation == dialogue) & (~data_df.role.str.contains("empty"))].copy()
                
                # Check if the dataframe is not empty before running the model
                if not df.empty and len(df['role'].unique()) > 1:
                    df.pos = df.pos.astype("int")
                    df.rating = df.rating.astype("float")
                    md = smf.mixedlm("rating ~ pos", df, groups=df["role"]) 
                    try:
                        mdf = md.fit(method=["powell", "lbfgs"])
                        coefs = mdf.summary().tables[1]
                        if "pos" in coefs.index:
                            coefs["Coef."] = coefs["Coef."].astype("float")
                            coeffs[(model, dialogue)] = coefs.loc["pos"]
                    except Exception as e:
                        print(f"Could not fit mixed model for {model} and {dialogue}: {e}")
                        continue
                        
    if not coeffs:
        print(f"Skipping plot for {dataset} due to insufficient data for mixed model analysis.")
        ax.set_title(f"No Data for {dataset}", fontsize=14)
        ax.axis('off') # Hide the empty subplot
        continue
    
    df = pd.DataFrame(coeffs).T.reset_index()
    df = df.rename(columns={"level_0": "model", "level_1": "base_conversation"})
    
    numeric_cols = ["Coef.", "Std.Err.", "z", "P>|z|", "[0.025", "0.975]"]
    for col in numeric_cols:
        df[col] = pd.to_numeric(df[col], errors="coerce")
    
    df["model"] = pd.Categorical(df["model"], categories=model_order, ordered=True)
    df = df.sort_values("model")
    
    # Color scheme
    color_map = {"interview": "orange", "instructions": "blue"}
    
    # X positions for dodge
    x_vals = np.arange(len(model_order))
    offset = 0.15  # displacement between dialogue modes
    
    for j, mode in enumerate(df["base_conversation"].unique()):
        subdf = df[df["base_conversation"] == mode]
        # align to integer positions with displacement
        x = x_vals + (j - 0.5) * 2 * offset  
        y = subdf["Coef."]
        lower = subdf["[0.025"]
        upper = subdf["0.975]"]
        
        ax.errorbar(
            x, y,
            yerr=[y - lower, upper - y],
            fmt="o", capsize=4, color=color_map[mode],
            label=mode, alpha=0.9
        )
    
    # Reference line
    ax.axhline(y=0, color="black", linestyle="--", linewidth=1)
    
    # Family separators
    family_boundaries = [1.5, 3.5, 5.5]
    for xb in family_boundaries:
        ax.axvline(x=xb, color="black", linestyle=":", linewidth=1)
    
    # Labels and title
    ax.set_xlabel("Model", fontsize=10)
    ax.set_ylabel("Coefficient", fontsize=10)
    ax.set_title(f"Impact of length on {dataset}", fontproperties=bold_font, fontsize=12)
    
    # Grid style
    ax.grid(True, which="both", linestyle="--", linewidth=0.5)
    
    # Use model names as ticks centered
    renamed_labels = [model_name_map.get(m, m) for m in model_order]
    ax.set_xticks(x_vals)
    ax.set_xticklabels(renamed_labels, rotation=45, ha="right")
    
# Create a proxy artist for the legend
handles, labels = axes[1].get_legend_handles_labels() # get handles from a populated subplot
legend_ax = axes[0] # The first subplot for the legend
legend_ax.legend(handles, ["Persona-directed", "Goal-oriented"], loc='center', fontsize=16)
legend_ax.axis('off') # Hide the axes and frame of the legend subplot

plt.tight_layout()
plt.show()

In [None]:
fig.savefig('../persistent-personas-paper/media/length_coeffs.pdf', dpi=300, bbox_inches="tight")

In [None]:
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(16, 6), sharex=True)
axes = axes.flatten()  # Flatten the 4x2 array of axes into a 1D array for easier iteration
for i, (data_df, dataset) in enumerate(zip([general_df, ifbench_df, safety_df, excess_df],
                                           ["General Instruction Quality", "IFBench", "Safety", "Excess Safety"])):
    
    ax = axes[i] # Use the current index

        
    data_df = data_df.copy()
    coeffs = {}
    
    # Check if enough data exists to perform the mixed model analysis
    if len(np.unique(data_df.model)) > 1 and len(np.unique(data_df.base_conversation)) > 1:
        for model in np.unique(data_df.model):
            for dialogue in ["interview", "instructions"]:
                df = data_df[(data_df.model == model) & (data_df.base_conversation == dialogue)].copy()
                df["group"] = df.role.apply(lambda x: "persona" if "empty" not in x else "empty")
                df["posRole"] = df["pos"].astype(str) + df["role"].astype(str)
                
                # Check for a valid dataset before running the model
                if not df.empty and len(df['posRole'].unique()) > 1 and "empty" in df['group'].values and "persona" in df['group'].values:
                    try:
                        md = smf.mixedlm("rating ~ C(group, Treatment(reference='empty'))", df, groups=df["posRole"])
                        mdf = md.fit(method=["powell", "lbfgs"])
                        coefs = mdf.summary().tables[1]
                        if "C(group, Treatment(reference='empty'))[T.persona]" in coefs.index:
                            coefs["Coef."] = pd.to_numeric(coefs["Coef."], errors='coerce')
                            coeffs[(model, dialogue)] = coefs.loc["C(group, Treatment(reference='empty'))[T.persona]"]
                    except Exception as e:
                        print(f"Could not fit mixed model for {model} and {dialogue}: {e}")
                        continue
                        
    if not coeffs:
        print(f"Skipping plot for {dataset} due to insufficient data for mixed model analysis.")
        ax.set_title(f"No Data for {dataset}", fontproperties=bold_font, fontsize=12)
        ax.axis('off') # Hide the empty subplot
        continue
    
    df = pd.DataFrame(coeffs).T.reset_index()
    df = df.rename(columns={"level_0": "model", "level_1": "base_conversation"})
    
    numeric_cols = ["Coef.", "Std.Err.", "z", "P>|z|", "[0.025", "0.975]"]
    for col in numeric_cols:
        df[col] = pd.to_numeric(df[col], errors="coerce")
    
    df["model"] = pd.Categorical(df["model"], categories=model_order, ordered=True)
    df = df.sort_values("model")
    
    # Color scheme
    color_map = {"interview": "orange", "instructions": "blue"}
    
    # X positions for dodge
    x_vals = np.arange(len(model_order))
    offset = 0.15  # displacement between dialogue modes
    
    for j, mode in enumerate(df["base_conversation"].unique()):
        subdf = df[df["base_conversation"] == mode]
        
        # Filter out models not present in the subdf
        present_models_in_order = [m for m in model_order if m in subdf['model'].values]
        
        # Re-index the x_vals to match the present models
        present_x_vals = [x_vals[model_order.index(m)] for m in present_models_in_order]
        
        x = np.array(present_x_vals) + (j - 0.5) * 2 * offset  
        y = subdf.set_index('model').loc[present_models_in_order]["Coef."]
        lower = subdf.set_index('model').loc[present_models_in_order]["[0.025"]
        upper = subdf.set_index('model').loc[present_models_in_order]["0.975]"]
        
        ax.errorbar(
            x, y,
            yerr=[y - lower, upper - y],
            fmt="o", capsize=4, color=color_map[mode],
            label=mode, alpha=0.9
        )
    
    # Reference line
    ax.axhline(y=0, color="black", linestyle="--", linewidth=1)
    
    # Family separators
    family_boundaries = [1.5, 3.5, 5.5]
    for xb in family_boundaries:
        ax.axvline(x=xb, color="black", linestyle=":", linewidth=1)
    
    # Labels and title
    ax.set_xlabel("Model", fontsize=10)
    ax.set_ylabel("Coefficient", fontsize=10)
    ax.set_title(f"Impact of personalization on {dataset}", fontproperties=bold_font, fontsize=12)
    
    # Grid style
    ax.grid(True, which="both", linestyle="--", linewidth=0.5)
    
    # Use model names as ticks centered
    renamed_labels = [model_name_map.get(m, m) for m in model_order]
    ax.set_xticks(x_vals)
    ax.set_xticklabels(renamed_labels, rotation=45, ha="right")
handles, labels = axes[0].get_legend_handles_labels()
# Place the legend at the top of the entire figure
fig.legend(handles, labels, loc='upper center', bbox_to_anchor=(0.5, 1.02), ncol=2, fontsize=12)
    
plt.tight_layout()
plt.show()

In [None]:
fig.savefig('../persistent-personas-paper/media/persona_coeffs.pdf', dpi=300, bbox_inches="tight")

### Control for Length

In [None]:
lengths = pd.read_csv("results/round_lengths.csv")

In [None]:
fig, axes = plt.subplots(nrows=2, ncols=4, figsize=(16, 6))
axes = axes.flatten()  # Flatten the 2x4 array of axes into a 1D array for easier iteration
y_labels = ["Avg. Rating", "Mean Absolute Difference", "Win Rate", "Win Rate", "Acc.", "Refusal Rate", "Refusal Rate"]
# The first subplot (axes[0]) will be used for the legend
# The loop will start from the second subplot (index 1)
for i, (df, dataset) in enumerate(zip([dialogue_df, bfi_df, role_df, general_df, ifbench_df, safety_df, excess_df],
                                           ["Dialogue metrics", "BFI error", "Persona Instructions Quality", "General Instructions Quality", "IFBench Acc.", "Safety", "Excess Safety"])):

    # Select the current subplot axis, starting from the second one (i+1)
    ax = axes[i + 1]

    df = df[~df.role.str.contains("empty")].copy()
    df["role"] = df.role.str.replace("_", " ")
    
    persona_df = df[df.base_conversation == "interview"].copy()
    goal_df = df[df.base_conversation == "instructions"].copy()
    
    # Merge with lengths dataframe
    if not lengths.empty:
        persona_df = pd.merge(persona_df, lengths[[x for x in lengths.columns if "goal" not in x]], on=["model", "role", "pos"], how='left')
        persona_df = persona_df.rename(columns=lambda x: x.replace("_persona_directed", ""))
        
        goal_df = pd.merge(goal_df, lengths[[x for x in lengths.columns if "persona" not in x]], on=["model", "role", "pos"], how='left')
        goal_df = goal_df.rename(columns=lambda x: x.replace("_goal_oriented", ""))
    
    df = pd.concat([persona_df, goal_df])
    
    # Check if df is empty before proceeding
    if df.empty or 'n_tokens' not in df.columns:
        ax.set_title(f"No Data for {dataset}", fontproperties=bold_font, fontsize=12)
        ax.axis('off') # Hide the empty subplot
        continue
    data_df = df.copy()
    data_df.rating = data_df.rating.astype("float")
    # Group and plot for "interview" (Persona-directed)
    ratings = data_df[~data_df.role.str.contains("empty")].groupby(["pos", "base_conversation"])[['rating', "n_tokens"]].mean().reset_index().copy()
    df = ratings[(ratings.base_conversation=="interview") ].groupby(['n_tokens']).mean(numeric_only=True).reset_index()
    ax.plot(df.n_tokens, df.rating, 
                     color="orange", marker='o', label='Persona-directed')
        
    # Plot horizontal line
    #plt.axhline(y=0.5, color='black', linestyle='--')
    
    # Calculate mean and 95% CI for the dataset reference group
    df = ratings[(ratings.base_conversation=="instructions") ].groupby(['n_tokens']).mean(numeric_only=True).reset_index()
    
    ax.plot(df.n_tokens, df.rating, 
                 color="blue", marker='x', label='Goal-oriented')
    
    # Set titles and labels for each subplot
    ax.set_title(f'{dataset}', fontproperties=bold_font, fontsize=12)
    ax.set_ylabel(y_labels[i], fontsize=12)
    ax.grid(True)

    # Only set x-labels for the bottom row of plots
    if i >= 3:
        ax.set_xlabel('Dialogue length (# tokens)', fontsize=10)

# Create a proxy artist for the legend in the first subplot
legend_ax = axes[0]
legend_ax.plot([], [], 'o', color='orange', label='Persona-directed')
legend_ax.plot([], [], 'x', color='blue', label='Goal-oriented')
legend_ax.legend(loc='center', fontsize=16)
legend_ax.axis('off') # Hide the axes and frame of the legend subplot

plt.tight_layout()
plt.show()










In [None]:
fig.savefig('../persistent-personas-paper/media/length_control.pdf', dpi=300, bbox_inches="tight")