In [None]:
%load_ext autoreload
%autoreload 2

import glob
from transformers import AutoTokenizer
from tqdm.auto import tqdm
import numpy as np
import json
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
def get_auto_tokenizer(model_name: str):
  """
  Returns the appropriate tokenizer from Hugging Face for a given model.

  Args:
      model_name (str): The name of the model.

  Returns:
      AutoTokenizer: The initialized tokenizer object.
  """
  model_to_tokenizer = {
      'Llama-3.1-Nemotron-Nano-8B-v1': 'nvidia/Llama-3.1-Nemotron-Nano-8B-v1',
      'Llama-3_3-Nemotron-Super-49B-v1': 'nvidia/Llama-3_3-Nemotron-Super-49B-v1',
      'Qwen3-30B-A3B-Instruct-2507': 'Qwen/Qwen3-30B-A3B-Instruct-2507',
      'Qwen3-4B-Instruct-2507': 'Qwen/Qwen3-4B-Instruct-2507',
      'gemma-3-27b-it': 'google/gemma-3-27b-it',
      'gemma-3-4b-it': 'google/gemma-3-4b-it',
      'gemini-2.5-flash': 'google/gemma-3-27b-it',
  }
  
  # The Gemini models are not publicly available on Hugging Face.
  if model_name in model_to_tokenizer:
    tokenizer_name = model_to_tokenizer[model_name]
    return AutoTokenizer.from_pretrained(tokenizer_name)
  else:
    raise ValueError(f"Model name '{model_name}' not supported. Please choose from: {list(model_to_tokenizer.keys())}")

In [None]:
models = np.unique([x.split("/")[2] for x in interviews])

In [None]:
all_tokenizers = {model: get_auto_tokenizer(model) for model in models}

In [None]:
interviews =  glob.glob("./generations/*/interview*")

In [None]:
models = []
idxs = []
counts = []
roles =  []
for interview in tqdm(interviews):
    tokenizer = all_tokenizers[interview.split("/")[2]]
    if "shuffle" in interview: continue
    dialogue = json.load(open(interview, "r"))
    parts = interview.split("/")
    model = parts[2]
    role = parts[3].replace("_shuffle", "").split("_", maxsplit=1)[-1].replace(".json", "").replace("_", " ")
    old_count = 0
    idxs.append(0)
    counts.append(0)
    models.append(model)
    roles.append(role)
    for i in range(0, len(dialogue)//2):
        new_count = len(tokenizer.apply_chat_template(dialogue[i*2:i*2+2], tokenize=True))
        if i != 0: new_count -= 1 #remove bos token
        old_count += new_count
        models.append(model)
        idxs.append(i+1)
        counts.append(old_count)
        roles.append(role)
        
    

In [None]:
persona_df = pd.DataFrame({"model": models, "role": roles, "pos": idxs, "n_tokens": counts})

In [None]:
interviews =  glob.glob("./generations/*/instructions_*")

In [None]:
models = []
idxs = []
counts = []
roles =  []
for interview in tqdm(interviews):
    tokenizer = all_tokenizers[interview.split("/")[2]]
    if "shuffle" in interview: continue
    dialogue = json.load(open(interview, "r"))
    parts = interview.split("/")
    model = parts[2]
    role = parts[3].replace("_shuffle", "").split("_", maxsplit=1)[-1].replace(".json", "").replace("_", " ")
    old_count = 0
    idxs.append(0)
    counts.append(0)
    models.append(model)
    roles.append(role)
    for i in range(0, len(dialogue)//2):
        new_count = len(tokenizer.apply_chat_template(dialogue[i*2:i*2+2], tokenize=True))
        if i != 0: new_count -= 1 #remove bos token
        old_count += new_count
        models.append(model)
        idxs.append(i+1)
        counts.append(old_count)
        roles.append(role)
        
    

In [None]:
goal_df = pd.DataFrame({"model": models, "role": roles, "pos": idxs, "n_tokens": counts})

In [None]:
length_df = pd.merge(persona_df, goal_df, on=["model", "role", "pos"],suffixes=["_persona_directed", "_goal_oriented"])

In [None]:
length_df[length_df.pos==102].groupby(['model', 'pos'])[['n_tokens_persona_directed', 'n_tokens_goal_oriented']].mean()

In [None]:
df = length_df.copy()
df_model_avg = df.groupby(['model', 'pos'])[['n_tokens_persona_directed', 'n_tokens_goal_oriented']].mean().reset_index()

# Plot counts over 'pos' for different 'models'
plt.figure(figsize=(10, 6))
for model in df_model_avg['model'].unique():
    subset = df_model_avg[df_model_avg['model'] == model]
    plt.plot(subset['pos'], subset['n_tokens_persona_directed'], marker="o", label=model)
    plt.plot(subset['pos'], subset['n_tokens_goal_oriented'], marker="x", label=model)
plt.title('Average Token Count by Position for Different Models')
plt.xlabel('Position')
plt.ylabel('Average Token Count')
plt.legend()
plt.grid(True)

plt.show()

In [None]:
length_df[length_df.pos==102].groupby(['role', 'pos'])[['n_tokens_persona_directed', 'n_tokens_goal_oriented']].mean()

In [None]:

# Group by 'role' and 'pos' and calculate the mean of 'n_tokens'
df_role_avg = df.groupby(['role', 'pos'])[['n_tokens_persona_directed', 'n_tokens_goal_oriented']].mean().reset_index()

# Plot counts over 'pos' for different 'roles'
plt.figure(figsize=(10, 6))
for role in df_role_avg['role'].unique():
    subset = df_role_avg[df_role_avg['role'] == role]
    plt.plot(subset['pos'], subset['n_tokens_persona_directed'], marker="o", label=role)
    plt.plot(subset['pos'], subset['n_tokens_goal_oriented'], marker="x", label=role)
plt.title('Average Token Count by Position for Different Roles')
plt.xlabel('Position')
plt.ylabel('Average Token Count')
plt.legend(title='Role')
plt.grid(True)

plt.show()

In [None]:
length_df.to_csv("./results/round_lengths.csv", index=False)