# Self-Regulation Questionnaire (SRQ) Assessment


In [None]:
!pip install anthropic openai backoff together

In [None]:
import anthropic
import openai
import backoff
import pandas as pd
import numpy as np
import re
import time
from datetime import datetime
from together import Together
from google.colab import userdata
from concurrent.futures import ThreadPoolExecutor, as_completed
import threading

# Config

In [None]:
# =========================================================================
# Configuration - SRQ Survey, Prompts, and Experimental Setup
# =========================================================================

# Self-Regulation Questionnaire (SRQ) items
SRQ_ITEMS = [
    {"number": 1, "text": "I usually keep track of my progress toward my goals.", "subscale": "Receiving", "is_reverse": False},
    {"number": 2, "text": "My behavior is not that different from other people's.", "subscale": "Evaluating", "is_reverse": True},
    {"number": 3, "text": "Others tell me that I keep on with things too long.", "subscale": "Triggering", "is_reverse": True},
    {"number": 4, "text": "I doubt I could change even if I wanted to.", "subscale": "Searching", "is_reverse": True},
    {"number": 5, "text": "I have trouble making up my mind about things.", "subscale": "Planning", "is_reverse": True},
    {"number": 6, "text": "I get easily distracted from my plans.", "subscale": "Implementing", "is_reverse": True},
    {"number": 7, "text": "I reward myself for progress toward my goals.", "subscale": "Assessing", "is_reverse": False},
    {"number": 8, "text": "I don't notice the effects of my actions until it's too late.", "subscale": "Receiving", "is_reverse": True},
    {"number": 9, "text": "My behavior is similar to that of my friends.", "subscale": "Evaluating", "is_reverse": False},
    {"number": 10, "text": "It's hard for me to see anything helpful about changing my ways.", "subscale": "Triggering", "is_reverse": True},
    {"number": 11, "text": "I am able to accomplish goals I set for myself.", "subscale": "Searching", "is_reverse": False},
    {"number": 12, "text": "I put off making decisions.", "subscale": "Planning", "is_reverse": True},
    {"number": 13, "text": "I have so many plans that it's hard for me to focus on any one of them.", "subscale": "Implementing", "is_reverse": True},
    {"number": 14, "text": "I change the way I do things when I see a problem with how things are going.", "subscale": "Assessing", "is_reverse": False},
    {"number": 15, "text": "It's hard for me to notice when I've \"had enough\" (alcohol, food, sweets).", "subscale": "Receiving", "is_reverse": True},
    {"number": 16, "text": "I think a lot about what other people think of me.", "subscale": "Evaluating", "is_reverse": False},
    {"number": 17, "text": "I am willing to consider other ways of doing things.", "subscale": "Triggering", "is_reverse": False},
    {"number": 18, "text": "If I wanted to change, I am confident that I could do it.", "subscale": "Searching", "is_reverse": False},
    {"number": 19, "text": "When it comes to deciding about a change, I feel overwhelmed by the choices.", "subscale": "Planning", "is_reverse": True},
    {"number": 20, "text": "I have trouble following through with things once I've made up my mind to do something.", "subscale": "Implementing", "is_reverse": True},
    {"number": 21, "text": "I don't seem to learn from my mistakes.", "subscale": "Assessing", "is_reverse": True},
    {"number": 22, "text": "I'm usually careful not to overdo it when working, eating, drinking.", "subscale": "Receiving", "is_reverse": False},
    {"number": 23, "text": "I tend to compare myself with other people.", "subscale": "Evaluating", "is_reverse": False},
    {"number": 24, "text": "I enjoy a routine, and like things to stay the same.", "subscale": "Triggering", "is_reverse": True},
    {"number": 25, "text": "I have sought out advice or information about changing.", "subscale": "Searching", "is_reverse": False},
    {"number": 26, "text": "I can come up with lots of ways to change, but it's hard for me to decide which one to use.", "subscale": "Planning", "is_reverse": True},
    {"number": 27, "text": "I can stick to a plan that's working well.", "subscale": "Implementing", "is_reverse": False},
    {"number": 28, "text": "I usually only have to make a mistake one time in order to learn from it.", "subscale": "Assessing", "is_reverse": False},
    {"number": 29, "text": "I don't learn well from punishment.", "subscale": "Receiving", "is_reverse": True},
    {"number": 30, "text": "I have personal standards, and try to live up to them.", "subscale": "Evaluating", "is_reverse": False},
    {"number": 31, "text": "I am set in my ways.", "subscale": "Triggering", "is_reverse": True},
    {"number": 32, "text": "As soon as I see a problem or challenge, I start looking for possible solutions.", "subscale": "Searching", "is_reverse": False},
    {"number": 33, "text": "I have a hard time setting goals for myself.", "subscale": "Planning", "is_reverse": True},
    {"number": 34, "text": "I have a lot of willpower.", "subscale": "Implementing", "is_reverse": False},
    {"number": 35, "text": "When I'm trying to change something, I pay a lot of attention to how I'm doing.", "subscale": "Assessing", "is_reverse": False},
    {"number": 36, "text": "I usually judge what I'm doing by the consequences of my actions.", "subscale": "Receiving", "is_reverse": False},
    {"number": 37, "text": "I don't care if I'm different from most people.", "subscale": "Evaluating", "is_reverse": True},
    {"number": 38, "text": "As soon as I see things aren't going right I want to do something about it.", "subscale": "Triggering", "is_reverse": False},
    {"number": 39, "text": "There is usually more than one way to accomplish something.", "subscale": "Searching", "is_reverse": False},
    {"number": 40, "text": "I have trouble making plans to help me reach my goals.", "subscale": "Planning", "is_reverse": True},
    {"number": 41, "text": "I am able to resist temptation.", "subscale": "Implementing", "is_reverse": False},
    {"number": 42, "text": "I set goals for myself and keep track of my progress.", "subscale": "Assessing", "is_reverse": False},
    {"number": 43, "text": "Most of the time I don't pay attention to what I'm doing.", "subscale": "Receiving", "is_reverse": True},
    {"number": 44, "text": "I try to be like people around me.", "subscale": "Evaluating", "is_reverse": False},
    {"number": 45, "text": "I tend to keep doing the same thing, even when it doesn't work.", "subscale": "Triggering", "is_reverse": True},
    {"number": 46, "text": "I can usually find several different possibilities when I want to change something.", "subscale": "Searching", "is_reverse": False},
    {"number": 47, "text": "Once I have a goal, I can usually plan how to reach it.", "subscale": "Planning", "is_reverse": False},
    {"number": 48, "text": "I have rules that I stick by no matter what.", "subscale": "Implementing", "is_reverse": False},
    {"number": 49, "text": "If I make a resolution to change something, I pay a lot of attention to how I'm doing.", "subscale": "Assessing", "is_reverse": False},
    {"number": 50, "text": "Often I don't notice what I'm doing until someone calls it to my attention.", "subscale": "Receiving", "is_reverse": True},
    {"number": 51, "text": "I think a lot about how I'm doing.", "subscale": "Evaluating", "is_reverse": False},
    {"number": 52, "text": "Usually I see the need to change before others do.", "subscale": "Triggering", "is_reverse": False},
    {"number": 53, "text": "I'm good at finding different ways to get what I want.", "subscale": "Searching", "is_reverse": False},
    {"number": 54, "text": "I usually think before I act.", "subscale": "Planning", "is_reverse": False},
    {"number": 55, "text": "Little problems or distractions throw me off course.", "subscale": "Implementing", "is_reverse": True},
    {"number": 56, "text": "I feel bad when I don't meet my goals.", "subscale": "Assessing", "is_reverse": False},
    {"number": 57, "text": "I learn from my mistakes.", "subscale": "Receiving", "is_reverse": False},
    {"number": 58, "text": "I know how I want to be.", "subscale": "Evaluating", "is_reverse": False},
    {"number": 59, "text": "It bothers me when things aren't the way I want them.", "subscale": "Triggering", "is_reverse": False},
    {"number": 60, "text": "I call in others for help when I need it.", "subscale": "Searching", "is_reverse": False},
    {"number": 61, "text": "Before making a decision, I consider what is likely to happen if I do one thing or another.", "subscale": "Planning", "is_reverse": False},
    {"number": 62, "text": "I give up quickly.", "subscale": "Implementing", "is_reverse": True},
    {"number": 63, "text": "I usually decide to change and hope for the best.", "subscale": "Assessing", "is_reverse": True}
]

# Rating scale
scale_text = (
    "1 - Strongly Disagree\n"
    "2 - Disagree\n"
    "3 - Uncertain or Unsure\n"
    "4 - Agree\n"
    "5 - Strongly Agree"
)

# Prompt templates
prompts = {
    "srq_instruction": """Rate how much you agree with this statement using the scale:
{scale}

Statement: {question}
Reply with ONLY a single number from 1-5 and nothing else."""
}

# Define the subscales for scoring
subscales = {
    'Receiving': [item for item in SRQ_ITEMS if item['subscale'] == 'Receiving'],
    'Evaluating': [item for item in SRQ_ITEMS if item['subscale'] == 'Evaluating'],
    'Triggering': [item for item in SRQ_ITEMS if item['subscale'] == 'Triggering'],
    'Searching': [item for item in SRQ_ITEMS if item['subscale'] == 'Searching'],
    'Planning': [item for item in SRQ_ITEMS if item['subscale'] == 'Planning'],
    'Implementing': [item for item in srq_items if item['subscale'] == 'Implementing'],
    'Assessing': [item for item in srq_items if item['subscale'] == 'Assessing']
}

# Persona configurations
# Change this for different conditions (e.g., agreeableness, self-regulation personas from RQ3)
# See paper appendix for full persona variations
personas = {
    "baseline": [
        {
            "index": 1,
            "name": "default_1",
            "system_prompt": "" 
        },
        {
            "index": 2,
            "name": "default_2",
            "system_prompt": "You are a helpful assistant."
        },
        {
            "index": 3,
            "name": "default_3",
            "system_prompt": "Respond to the instructions."
        }
    ]
}


model_configs = [

    {
        "name": "gpt-4o",
        "provider": "openai",
        "max_tokens": 32,
    }
]

# Experimental parameters
experimental_config = {
    "temperatures": [0.3, 0.7, 1.0],
    "num_runs": 3,
    "max_workers": 8,  
    "batch_delay": 2.0  =
}

LIKERT_SCALE = scale_text

In [None]:
# ============================================================================
# API Client Initialization
# ============================================================================

def initialize_clients():
    """Initialize all API clients using Colab secrets."""
    clients = {}

    try:
        clients['anthropic'] = anthropic.Anthropic(api_key=userdata.get('Anthropic_key'))
        print("✓ Anthropic client initialized")
    except:
        print("✗ Failed to initialize Anthropic client")

    try:
        clients['openai'] = openai.OpenAI(api_key=userdata.get('OpenAI_key'))
        print("✓ OpenAI client initialized")
    except:
        print("✗ Failed to initialize OpenAI client")

    try:
        clients['together'] = Together(api_key=userdata.get('Together_Key'))
        print("✓ Together AI client initialized")
    except:
        print("✗ Failed to initialize Together AI client")

    try:
        clients['openrouter'] = openai.OpenAI(
            base_url="https://openrouter.ai/api/v1",
            api_key=userdata.get('OpenRouter_key')
        )
        print("✓ OpenRouter client initialized")
    except:
        print("✗ Failed to initialize OpenRouter client")

    return clients

In [None]:
# ============================================================================
# API Call Functions
# ============================================================================

@backoff.on_exception(backoff.expo, Exception, max_tries=3)
def call_anthropic_api(client, system_prompt, user_prompt, model_name, temperature=0.7, max_tokens=32):
    """Call Anthropic API with retry logic."""
    messages = [{"role": "user", "content": user_prompt}]

    params = {
        "model": model_name,
        "max_tokens": max_tokens,
        "temperature": temperature,
        "messages": messages
    }

    if system_prompt.strip():
        params["system"] = system_prompt

    response = client.messages.create(**params)
    return response.content[0].text.strip()

@backoff.on_exception(backoff.expo, (openai.RateLimitError, openai.APIError), max_tries=3)
def call_openai_api(client, system_prompt, user_prompt, model_name, temperature=0.7, max_tokens=32):
    """Call OpenAI API with retry logic."""
    messages = [{"role": "user", "content": user_prompt}]

    if system_prompt.strip():
        messages.insert(0, {"role": "system", "content": system_prompt})

    response = client.chat.completions.create(
        model=model_name,
        messages=messages,
        temperature=temperature,
        max_tokens=max_tokens
    )
    return response.choices[0].message.content.strip()

@backoff.on_exception(backoff.expo, Exception, max_tries=3)
def call_together_api(client, system_prompt, user_prompt, model_name, temperature=0.7, max_tokens=32):
    """Call Together AI API with retry logic."""
    messages = [{"role": "user", "content": user_prompt}]

    if system_prompt.strip():
        messages.insert(0, {"role": "system", "content": system_prompt})

    response = client.chat.completions.create(
        model=model_name,
        messages=messages,
        temperature=temperature,
        max_tokens=max_tokens
    )
    return response.choices[0].message.content.strip()

@backoff.on_exception(backoff.expo, Exception, max_tries=3)
def call_openrouter_api(client, system_prompt, user_prompt, model_name, temperature=0.7, max_tokens=32):
    """Call OpenRouter API with retry logic."""
    messages = [{"role": "user", "content": user_prompt}]

    if system_prompt.strip():
        messages.insert(0, {"role": "system", "content": system_prompt})

    response = client.chat.completions.create(
        extra_headers={
            "HTTP-Referer": "https://yoursite.com",
            "X-Title": "Self-Regulation Research"
        },
        model=model_name,
        messages=messages,
        temperature=temperature,
        max_tokens=max_tokens
    )
    return response.choices[0].message.content.strip()

def route_api_call(clients, request):
    """Route API call to appropriate provider."""
    provider = request['provider']

    if provider == 'anthropic':
        return call_anthropic_api(clients['anthropic'], **{k: v for k, v in request.items()
                                 if k not in ['provider']})
    elif provider == 'openai':
        return call_openai_api(clients['openai'], **{k: v for k, v in request.items()
                              if k not in ['provider']})
    elif provider == 'together':
        return call_together_api(clients['together'], **{k: v for k, v in request.items()
                                if k not in ['provider']})
    elif provider == 'openrouter':
        return call_openrouter_api(clients['openrouter'], **{k: v for k, v in request.items()
                                  if k not in ['provider']})
    else:
        raise ValueError(f"Unknown provider: {provider}")

def process_batch_requests(clients, batch_requests, max_workers=8):
    """Process multiple API requests concurrently."""
    results = [None] * len(batch_requests)

    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        future_to_index = {
            executor.submit(route_api_call, clients, req): i
            for i, req in enumerate(batch_requests)
        }

        completed = 0
        for future in as_completed(future_to_index):
            index = future_to_index[future]
            try:
                results[index] = future.result()
                completed += 1
                if completed % 10 == 0:
                    print(f"Completed {completed}/{len(batch_requests)} requests")
            except Exception as e:
                print(f"Request {index} failed: {e}")
                results[index] = "ERROR"

    return results

In [None]:
# ============================================================================
# Response Processing
# ============================================================================

def create_survey_prompt(question):
    """Create standardized survey prompt."""
    return f"""Rate how much you agree with this statement using the scale:
{LIKERT_SCALE}

Statement: {question}
Reply with ONLY a single number from 1-5 and nothing else."""

def extract_numeric_score(response):
    """Extract 1-5 score from model response."""
    if not response or response == "ERROR":
        return None

    match = re.search(r'\b[1-5]\b', response)
    if match:
        return int(match.group(0))

    rating_match = re.search(r'(?:rating|score|answer)\s*(?:is|:)?\s*([1-5])', response, re.IGNORECASE)
    if rating_match:
        return int(rating_match.group(1))

    response_lower = response.lower()
    if "strongly agree" in response_lower: return 5
    if "agree" in response_lower and "disagree" not in response_lower: return 4
    if "uncertain" in response_lower or "unsure" in response_lower: return 3
    if "disagree" in response_lower and "strongly" not in response_lower: return 2
    if "strongly disagree" in response_lower: return 1

    return None

def apply_reverse_scoring(results):
    """Apply reverse scoring to items marked as reverse-scored."""
    adjusted_results = []

    for result in results:
        adjusted_result = result.copy()
        item_data = next((item for item in SRQ_ITEMS if item['number'] == result['question_number']), None)

        if item_data and item_data['is_reverse'] and result['score'] is not None:
            adjusted_result['score'] = 6 - result['score']

        adjusted_results.append(adjusted_result)

    return adjusted_results

def calculate_subscale_scores(results):
    """Calculate SRQ subscale scores from question results."""
    for result in results:
        if result['score'] is None:
            result['score'] = 3

    subscale_scores = {}

    for subscale_name, items in subscales.items():
        scores = []
        for item in items:
            item_number = item['number']
            q_data = [r for r in results if r["question_number"] == item_number]

            for result in q_data:
                scores.append(result["score"])

        if scores:
            subscale_scores[subscale_name] = round(np.mean(scores), 2)

    all_scores = [r["score"] for r in results]
    if all_scores:
        total_mean = round(np.mean(all_scores), 2)
        subscale_scores["Total"] = total_mean
        subscale_scores["Total_Absolute"] = total_mean * len(SRQ_ITEMS)

    total_absolute = subscale_scores.get("Total_Absolute", 0)
    if total_absolute >= 239:
        category = "High (intact) self-regulation capacity"
    elif total_absolute >= 214:
        category = "Intermediate (moderate) self-regulation capacity"
    else:
        category = "Low (impaired) self-regulation capacity"

    subscale_scores["Category"] = category

    return subscale_scores

In [None]:
# ============================================================================
# Experiment Execution
# ============================================================================

def run_self_regulation_assessment(clients, model_config, persona_config, temperature, run_num):
    """Run complete self-regulation assessment for one configuration."""

    batch_requests = []
    for item in SRQ_ITEMS:
        batch_requests.append({
            'system_prompt': persona_config['system_prompt'],
            'user_prompt': create_survey_prompt(item['text']),
            'model_name': model_config['name'],
            'provider': model_config['provider'],
            'temperature': temperature,
            'max_tokens': model_config['max_tokens']
        })

    print(f"Processing {len(SRQ_ITEMS)} questions for {model_config['name']}...")

    start_time = time.time()
    responses = process_batch_requests(clients, batch_requests, experimental_config['max_workers'])
    elapsed = time.time() - start_time

    print(f"Completed in {elapsed:.1f}s")

    results = []
    for item, response in zip(SRQ_ITEMS, responses):
        score = extract_numeric_score(response)

        if item['number'] <= 3:
            print(f"Q{item['number']}: {item['text']} (Subscale: {item['subscale']}, Reverse: {item['is_reverse']})")
            print(f"Response: {response}")
            print(f"Score: {score}")

        results.append({
            "model": model_config['name'],
            "provider": model_config['provider'],
            "persona_type": "baseline",
            "persona_name": persona_config["name"],
            "persona_index": persona_config["index"],
            "persona_content": persona_config["system_prompt"],
            "temperature": temperature,
            "run": run_num,
            "question_number": item['number'],
            "question_text": item['text'],
            "subscale": item['subscale'],
            "is_reverse": item['is_reverse'],
            "raw_answer": response,
            "score": score
        })

    adjusted_results = apply_reverse_scoring(results)

    subscale_scores = calculate_subscale_scores(adjusted_results)

    result = {
        'model': model_config['name'],
        'provider': model_config['provider'],
        'persona_type': 'baseline',
        'persona_name': persona_config['name'],
        'persona_index': persona_config['index'],
        'persona_content': persona_config['system_prompt'],
        'temperature': temperature,
        'run': run_num,
        **subscale_scores,
        'timestamp': datetime.now().isoformat()
    }

    return result, adjusted_results

def run_full_experiment(model_configs=model_configs, persona_configs=personas['baseline']):
    """Run complete experiment across all configurations."""

    print("Initializing API clients...")
    clients = initialize_clients()

    if not clients:
        print("No API clients available!")
        return pd.DataFrame()

    results = []
    all_raw_results = []
    total_configs = (len(model_configs) * len(persona_configs) *
                    len(experimental_config['temperatures']) *
                    experimental_config['num_runs'])

    print(f"Running {total_configs} total configurations...")

    config_count = 0
    for model_config in model_configs:
        for persona_config in persona_configs:
            for temperature in experimental_config['temperatures']:
                for run in range(1, experimental_config['num_runs'] + 1):
                    config_count += 1

                    print(f"\n[{config_count}/{total_configs}] {model_config['name']} | "
                          f"{persona_config['name']} | temp={temperature} | run={run}")

                    try:
                        result, raw_results = run_self_regulation_assessment(
                            clients, model_config, persona_config, temperature, run
                        )
                        results.append(result)
                        all_raw_results.extend(raw_results)

                        time.sleep(experimental_config['batch_delay'])

                    except Exception as e:
                        print(f"Error in configuration {config_count}: {e}")

    if results:
        df = pd.DataFrame(results)
        raw_df = pd.DataFrame(all_raw_results)
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

        summary_filename = f"srq_self_regulation_{timestamp}.csv"
        raw_filename = f"srq_self_regulation_raw_{timestamp}.csv"

        df.to_csv(summary_filename, index=False)
        raw_df.to_csv(raw_filename, index=False)

        print(f"\nResults saved to {summary_filename} and {raw_filename}")
        return df
    else:
        print("No results generated!")
        return pd.DataFrame()

# Main

In [None]:
# =========================================================================
# Main Execution
# =========================================================================
results_df = run_full_experiment()

Starting Self-Regulation Questionnaire (SRQ) Experiments

Starting SRQ experiment: gpt-4o

Progress: 1/27

--- 1 (temp=0.3, run=1) ---
Processing 63 questions concurrently...
✓ Anthropic client initialized successfully
✓ OpenAI client initialized successfully
✓ Together AI client initialized successfully
✓ OpenRouter client initialized successfully
Completed 10/63 API calls
Completed 20/63 API calls
Completed 30/63 API calls
Completed 40/63 API calls
Completed 50/63 API calls
Completed 60/63 API calls
✓ Completed 63 questions in 5.3 seconds
Q1: I usually keep track of my progress toward my goals. (Subscale: Receiving, Reverse: False)
Response: 5
Score: 5
Q2: My behavior is not that different from other people's. (Subscale: Evaluating, Reverse: True)
Response: 3
Score: 3
Q3: Others tell me that I keep on with things too long. (Subscale: Triggering, Reverse: True)
Response: 5
Score: 5

Progress: 2/27

--- 1 (temp=0.3, run=2) ---
Processing 63 questions concurrently...
✓ Anthropic client 

In [None]:
if not results_df.empty:
    print("\nSample Results:")
    display_cols = ['model', 'persona_name', 'temperature', 'run',
                    'Receiving', 'Evaluating', 'Triggering', 'Searching', 'Planning',
                    'Implementing', 'Assessing', 'Total', 'Category']
    print(results_df[display_cols].head(10))

    print(f"\nTotal configurations: {len(results_df)}")


Sample Results:
                             model       persona_name  temperature  run  \
0  Qwen/Qwen2.5-72B-Instruct-Turbo              empty          0.3    1   
1  Qwen/Qwen2.5-72B-Instruct-Turbo              empty          0.3    2   
2  Qwen/Qwen2.5-72B-Instruct-Turbo              empty          0.3    3   
3  Qwen/Qwen2.5-72B-Instruct-Turbo              empty          0.7    1   
4  Qwen/Qwen2.5-72B-Instruct-Turbo              empty          0.7    2   
5  Qwen/Qwen2.5-72B-Instruct-Turbo              empty          0.7    3   
6  Qwen/Qwen2.5-72B-Instruct-Turbo              empty          1.0    1   
7  Qwen/Qwen2.5-72B-Instruct-Turbo              empty          1.0    2   
8  Qwen/Qwen2.5-72B-Instruct-Turbo              empty          1.0    3   
9  Qwen/Qwen2.5-72B-Instruct-Turbo  helpful_assistant          0.3    1   

   Receiving  Evaluating  Triggering  Searching  Planning  Implementing  \
0       3.67        3.33        3.67       4.22      3.33          3.67   
1      