In [None]:
!pip install anthropic openai backoff together

In [None]:
import pandas as pd
import numpy as np
import re
import time
from datetime import datetime
import os
from together import Together
from concurrent.futures import ThreadPoolExecutor, as_completed
import anthropic
import openai
from together import Together
import backoff
from google.colab import userdata

In [None]:
# =========================================================================
# Configuration
# =========================================================================

# Persona
# Change this for different conditions (e.g., agreeableness, self-regulation personas from RQ3)
# See paper appendix for full persona variations
personas = {
    "baseline": [
        {
            "index": 1,
            "content": ""  
        },
        {
            "index": 2,
            "content": "You are a helpful assistant."
        },
        {
            "index": 3,
            "content": "Respond to the instructions."
        }
    ]
}

temperatures = [0.3, 0.7, 1.0]

num_runs = 3

model_configs = [
    {
        "name": "gpt-4o",
        "provider": "openai",
        "max_tokens": 32,
    }
]

max_tokens = 32
max_workers = 8

In [3]:
# ============================================================================
# API Client Initialization
# ============================================================================

def initialize_clients():
    """Initialize all API clients using Colab secrets."""
    clients = {}

    try:
        clients['anthropic'] = anthropic.Anthropic(api_key=userdata.get('Anthropic_key'))
        print("✓ Anthropic client initialized")
    except:
        print("✗ Failed to initialize Anthropic client")

    try:
        clients['openai'] = openai.OpenAI(api_key=userdata.get('OpenAI_key'))
        print("✓ OpenAI client initialized")
    except:
        print("✗ Failed to initialize OpenAI client")

    try:
        clients['together'] = Together(api_key=userdata.get('Together_Key'))
        print("✓ Together AI client initialized")
    except:
        print("✗ Failed to initialize Together AI client")

    try:
        clients['openrouter'] = openai.OpenAI(
            base_url="https://openrouter.ai/api/v1",
            api_key=userdata.get('OpenRouter_key')
        )
        print("✓ OpenRouter client initialized")
    except:
        print("✗ Failed to initialize OpenRouter client")

    return clients

In [4]:
# ============================================================================
# API Call Functions
# ============================================================================

@backoff.on_exception(backoff.expo, Exception, max_tries=3)
def call_anthropic_api(client, system_prompt, user_prompt, model_name, temperature=0.7, max_tokens=32):
    """Call Anthropic API with retry logic."""
    messages = [{"role": "user", "content": user_prompt}]

    params = {
        "model": model_name,
        "max_tokens": max_tokens,
        "temperature": temperature,
        "messages": messages
    }

    if system_prompt.strip():
        params["system"] = system_prompt

    response = client.messages.create(**params)
    return response.content[0].text.strip()

@backoff.on_exception(backoff.expo, (openai.RateLimitError, openai.APIError), max_tries=3)
def call_openai_api(client, system_prompt, user_prompt, model_name, temperature=0.7, max_tokens=32):
    """Call OpenAI API with retry logic."""
    messages = [{"role": "user", "content": user_prompt}]

    if system_prompt.strip():
        messages.insert(0, {"role": "system", "content": system_prompt})

    response = client.chat.completions.create(
        model=model_name,
        messages=messages,
        temperature=temperature,
        max_tokens=max_tokens
    )
    return response.choices[0].message.content.strip()

@backoff.on_exception(backoff.expo, Exception, max_tries=3)
def call_together_api(client, system_prompt, user_prompt, model_name, temperature=0.7, max_tokens=32):
    """Call Together AI API with retry logic."""
    messages = [{"role": "user", "content": user_prompt}]

    if system_prompt.strip():
        messages.insert(0, {"role": "system", "content": system_prompt})

    response = client.chat.completions.create(
        model=model_name,
        messages=messages,
        temperature=temperature,
        max_tokens=max_tokens
    )
    return response.choices[0].message.content.strip()

@backoff.on_exception(backoff.expo, Exception, max_tries=3)
def call_openrouter_api(client, system_prompt, user_prompt, model_name, temperature=0.7, max_tokens=32):
    """Call OpenRouter API with retry logic."""
    messages = [{"role": "user", "content": user_prompt}]

    if system_prompt.strip():
        messages.insert(0, {"role": "system", "content": system_prompt})

    response = client.chat.completions.create(
        extra_headers={
            "HTTP-Referer": "https://yoursite.com",
            "X-Title": "Self-Regulation Research"
        },
        model=model_name,
        messages=messages,
        temperature=temperature,
        max_tokens=max_tokens
    )
    return response.choices[0].message.content.strip()

def route_api_call(clients, request):
    """Route API call to appropriate provider."""
    provider = request['provider']

    if provider == 'anthropic':
        return call_anthropic_api(clients['anthropic'], **{k: v for k, v in request.items()
                                 if k not in ['provider']})
    elif provider == 'openai':
        return call_openai_api(clients['openai'], **{k: v for k, v in request.items()
                              if k not in ['provider']})
    elif provider == 'together':
        return call_together_api(clients['together'], **{k: v for k, v in request.items()
                                if k not in ['provider']})
    elif provider == 'openrouter':
        return call_openrouter_api(clients['openrouter'], **{k: v for k, v in request.items()
                                  if k not in ['provider']})
    else:
        raise ValueError(f"Unknown provider: {provider}")

def process_batch_requests(clients, batch_requests, max_workers=8):
    """Process multiple API requests concurrently."""
    results = [None] * len(batch_requests)

    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        future_to_index = {
            executor.submit(route_api_call, clients, req): i
            for i, req in enumerate(batch_requests)
        }

        completed = 0
        for future in as_completed(future_to_index):
            index = future_to_index[future]
            try:
                results[index] = future.result()
                completed += 1
                if completed % 10 == 0:
                    print(f"Completed {completed}/{len(batch_requests)} requests")
            except Exception as e:
                print(f"Request {index} failed: {e}")
                results[index] = "ERROR"

    return results

In [None]:
# =========================================================================
# ColumbiaCardTask Class
# =========================================================================

class ColumbiaCardTask:
    """Implementation of the Columbia Card Task (CCT)"""

    def __init__(self):
        """Initialize CCT task"""
        self.total_cards = 32
        self.scenarios = self._generate_standard_scenarios()

    def _generate_standard_scenarios(self):
        """Generate the standard 3×3×3 factorial design for CCT"""
        scenarios = []
        scenario_id = 1

        for num_bad_cards in [1, 3, 5]:
            for gain_points in [10, 20, 30]:
                for loss_points in [250, 500, 750]:
                    p_bad = num_bad_cards / self.total_cards
                    p_good = 1 - p_bad
                    num_good_cards = self.total_cards - num_bad_cards
                    expected_value = (p_good * gain_points) - (p_bad * loss_points)

                    scenarios.append({
                        "scenario_id": scenario_id,
                        "num_good": num_good_cards,
                        "num_bad": num_bad_cards,
                        "points_good": gain_points,
                        "points_bad": loss_points,
                        "probability_bad": p_bad,
                        "expected_value": expected_value,
                        "optimal_choice": self._calculate_optimal_choice(
                            num_good_cards, num_bad_cards, gain_points, loss_points
                        )
                    })
                    scenario_id += 1

        return scenarios

    def _calculate_optimal_choice(self, num_good, num_bad, gain, loss):
        """Calculate the mathematically optimal choice for a scenario"""
        expected_values = []

        for i in range(self.total_cards + 1):
            if i == 0:
                expected_values.append(0)
                continue

            if i <= num_good:
                p_no_bad = 1
                for j in range(i):
                    p_no_bad *= (num_good - j) / (self.total_cards - j)

                ev = p_no_bad * i * gain + (1 - p_no_bad) * (-loss)
                expected_values.append(ev)
            else:
                expected_values.append(-loss)

        return expected_values.index(max(expected_values))

    def _build_prompt(self, scenario, persona_content=""):
        """Build prompt for the Cold CCT task"""
        system_prompt = persona_content

        user_prompt = f"""There are {self.total_cards} face-down cards, {scenario['num_good']} cards give +{scenario['points_good']} points each, {scenario['num_bad']} cards give -{scenario['points_bad']} points.
        You get points for each good card you turn. If you hit a bad card, you lose points and the game ends.
        Your goal is to maximize total points.
        How many cards do you choose to turn over? (0-{self.total_cards})
        Respond with ONLY a single number from 0-32 and nothing else."""

        return {"system": system_prompt, "user": user_prompt}

    def _extract_number_from_response(self, response, max_val=32):
        """Extract a number from the model's response"""
        try:
            digits = ''.join(filter(str.isdigit, response))
            if digits:
                num = int(digits)
                return min(max(0, num), max_val)

            matches = re.findall(r'\b(\d+)\b', response)
            if matches:
                return min(max(0, int(matches[0])), max_val)

            return 0
        except:
            return 0

    def run_scenario(self, scenario, model_handler, persona_content=""):
        """Run a single CCT scenario"""
        prompts = self._build_prompt(scenario, persona_content)

        response = model_handler.generate_response(
            prompts['system'],
            prompts['user']
        )

        num_cards = self._extract_number_from_response(response)

        risk_aversion = scenario['optimal_choice'] - num_cards

        is_optimal = (num_cards == scenario['optimal_choice'])

        return {
            'cards_chosen': num_cards,
            'optimal_choice': scenario['optimal_choice'],
            'risk_aversion': risk_aversion,
            'is_optimal': is_optimal,
            'raw_response': response
        }

    def run_scenarios_batch(self, scenarios, model_config, persona_content="", temperature=0.7):
        """Run multiple CCT scenarios concurrently"""
        clients = initialize_clients()
        batch_requests = []
        for scenario in scenarios:
            prompts = self._build_prompt(scenario, persona_content)
            batch_requests.append({
                'system_prompt': prompts['system'],
                'user_prompt': prompts['user'],
                'model_name': model_config['name'],
                'provider': model_config.get('provider', 'anthropic'),
                'temperature': temperature,
                'max_tokens': model_config.get('max_tokens', 32)
            })

        responses = process_batch_requests(clients, batch_requests, max_workers)
        results = []
        for i, scenario in enumerate(scenarios):
            response = responses[i] if responses[i] is not None else "ERROR_GENERATING_RESPONSE"

            num_cards = self._extract_number_from_response(response)

            risk_aversion = scenario['optimal_choice'] - num_cards
            is_optimal = (num_cards == scenario['optimal_choice'])

            results.append({
                'cards_chosen': num_cards,
                'optimal_choice': scenario['optimal_choice'],
                'risk_aversion': risk_aversion,
                'is_optimal': is_optimal,
                'raw_response': response
            })

        return results

In [None]:
# =========================================================================
# Experimental Framework Functions
# =========================================================================

def run_single_configuration(model_config, persona_type, persona_index, persona_content, temperature, run_num):
    """Run a single experimental configuration"""
    print(f"\n--- Running {model_config['name']} {persona_type} persona {persona_index} (temp={temperature}, run={run_num}) ---")

    cct = ColumbiaCardTask()

    print(f"Running {len(cct.scenarios)} CCT scenarios concurrently...")
    scenario_results = cct.run_scenarios_batch(
        cct.scenarios,
        model_config, 
        persona_content,
        temperature
    )

    raw_results = []
    cards_chosen = []
    optimal_choices = 0
    risk_aversion_vals = []

    for i, scenario in enumerate(cct.scenarios):
        result = scenario_results[i]

        raw_results.append({
            "model": model_config['name'], 
            "model_type": model_config['provider'], 
            "persona_type": persona_type,
            "persona_index": persona_index,
            "persona_content": persona_content,
            "temperature": temperature,
            "run": run_num,
            "scenario_id": scenario["scenario_id"],
            "num_good": scenario["num_good"],
            "num_bad": scenario["num_bad"],
            "points_good": scenario["points_good"],
            "points_bad": scenario["points_bad"],
            "expected_value": scenario["expected_value"],
            "optimal_choice": scenario["optimal_choice"],
            "cards_chosen": result["cards_chosen"],
            "risk_aversion": result["risk_aversion"],
            "is_optimal": result["is_optimal"],
            "raw_response": result["raw_response"]
        })

        cards_chosen.append(result["cards_chosen"])
        if result["is_optimal"]:
            optimal_choices += 1
        risk_aversion_vals.append(result["risk_aversion"])

    avg_cards = np.mean(cards_chosen)
    std_cards = np.std(cards_chosen)
    avg_risk_aversion = np.mean(risk_aversion_vals)
    optimal_pct = optimal_choices / len(cct.scenarios)

    summary_result = {
        "model": model_config['name'],
        "model_type": model_config['provider'],
        "persona_type": persona_type,
        "persona_index": persona_index,
        "persona_content": persona_content,
        "temperature": temperature,
        "run": run_num,
        "CCT_avg": avg_cards,
        "CCT_std": std_cards,
        "CCT_risk_aversion": avg_risk_aversion,
        "CCT_optimal_pct": optimal_pct
    }

    return summary_result, raw_results

def run_all_model_experiments(model_configs):
    """Run experiments for all configured models"""
    all_summary_results = []
    all_raw_results = []

    for model_config in model_configs:
        print(f"\n{'='*50}")
        print(f"Starting {model_config['name']} CCT Experiment")
        print(f"{'='*50}")

        total_configs = len(personas['baseline']) * len(temperatures) * num_runs
        config_count = 0

        try:
            for persona_data in personas['baseline']:
                persona_index = persona_data["index"]
                persona_content = persona_data["content"]

                for temp in temperatures:
                    for run in range(1, num_runs + 1):
                        config_count += 1
                        print(f"\nProgress: {config_count}/{total_configs} configurations")

                        summary_result, raw_results = run_single_configuration(
                            model_config, 'baseline', persona_index, persona_content, temp, run
                        )

                        all_summary_results.append(summary_result)
                        all_raw_results.extend(raw_results)

                        time.sleep(2.0)

        except Exception as e:
            print(f"Error during experiment for {model_config['name']}: {e}")

    save_combined_results(all_summary_results, all_raw_results)

    return all_summary_results, all_raw_results

def save_combined_results(summary_results, all_raw_results):
    """Save combined results from all models"""
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

    output_dir = "cct_experiment_results"
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    summary_df = pd.DataFrame(summary_results)
    raw_df = pd.DataFrame(all_raw_results)

    summary_filename = f"{output_dir}/cct_all_models_summary_{timestamp}.csv"
    raw_filename = f"{output_dir}/cct_all_models_raw_{timestamp}.csv"

    summary_df.to_csv(summary_filename, index=False)
    raw_df.to_csv(raw_filename, index=False)

    print(f"Combined results saved to {summary_filename} and {raw_filename}")

In [None]:
# =========================================================================
# Main Execution
# =========================================================================

print(f"Running CCT experiment with {len(model_configs)} models")
print(f"Configuration: {len(personas['baseline'])} personas × {len(temperatures)} temperatures × {num_runs} runs × {len(model_configs)} models = {len(personas['baseline']) * len(temperatures) * num_runs * len(model_configs)} total configurations")

combined_results, raw_results = run_all_model_experiments(model_configs)



Running CCT experiment with 6 models
Configuration: 3 personas × 3 temperatures × 3 runs × 6 models = 162 total configurations

Starting qwen/qwen-2.5-72b-instruct CCT Experiment

Progress: 1/27 configurations

--- Running qwen/qwen-2.5-72b-instruct baseline persona 1 (temp=0.3, run=1) ---
Running 27 CCT scenarios concurrently...
✓ Anthropic client initialized successfully
✓ OpenAI client initialized successfully
✓ Together AI client initialized successfully
✓ OpenRouter client initialized successfully
Completed 10/27 API calls
Completed 20/27 API calls

Progress: 2/27 configurations

--- Running qwen/qwen-2.5-72b-instruct baseline persona 1 (temp=0.3, run=2) ---
Running 27 CCT scenarios concurrently...
✓ Anthropic client initialized successfully
✓ OpenAI client initialized successfully
✓ Together AI client initialized successfully
✓ OpenRouter client initialized successfully
Completed 10/27 API calls
Completed 20/27 API calls

Progress: 3/27 configurations

--- Running qwen/qwen-2.5-7

In [None]:
print("\nSample of results:")
display_cols = ['persona_type', 'persona_index', 'temperature', 'run',
                'CCT_avg', 'CCT_std', 'CCT_risk_aversion', 'CCT_optimal_pct']
results_df = pd.DataFrame(combined_results)
print(results_df[display_cols].head(10))


Sample of results:
  persona_type  persona_index  temperature  run    CCT_avg   CCT_std  \
0     baseline              1          0.3    1  19.407407  9.198467   
1     baseline              1          0.3    2  17.777778  9.318772   
2     baseline              1          0.3    3  17.222222  9.077173   
3     baseline              1          0.7    1  18.518519  9.231068   
4     baseline              1          0.7    2  19.925926  9.281307   
5     baseline              1          0.7    3  21.962963  8.443632   
6     baseline              1          1.0    1  19.333333  9.432018   
7     baseline              1          1.0    2  20.148148  9.898386   
8     baseline              1          1.0    3  17.777778  9.511526   
9     baseline              2          0.3    1  16.814815  9.002210   

   CCT_risk_aversion  CCT_optimal_pct  
0         -17.925926         0.000000  
1         -16.296296         0.000000  
2         -15.740741         0.000000  
3         -17.037037       