In [1]:
import torch
from huggingface_hub import login
from transformers import AutoTokenizer, AutoModelForCausalLM
import numpy as np
from itertools import combinations
from scipy.spatial.distance import pdist, squareform
from scipy.optimize import differential_evolution

ModuleNotFoundError: No module named 'torch'

In [None]:
login('')

# Load Llama 3.1 8B
model_name = "meta-llama/Llama-3.1-8B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.bfloat16,
    device_map="auto",
    output_hidden_states=True
)

In [4]:
def get_persona_embedding(persona_text):
    """Extract embedding from last token of last hidden state"""
    prompt = f"You are {persona_text}."
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    with torch.no_grad():
        outputs = model(**inputs, output_hidden_states=True)
        # Get last hidden state from final layer
        last_hidden_state = outputs.hidden_states[-1]
        # Use mean pooling across sequence length
        embedding = last_hidden_state.mean(dim=1).squeeze()

    # Convert bfloat16 to float32 before numpy conversion
    return embedding.float().cpu().numpy()  # Changed this line

# Define candidate personas (expand this list)
candidate_personas = [
    # Analytical & Methodical
    "a systematic thinker who approaches problems methodically",
    "a meticulous analyst who examines every detail carefully",
    "a logical reasoner who prioritizes evidence and structure",
    "a thorough investigator who leaves no stone unturned",
    "a pragmatic problem-solver focused on practical solutions",

    # Critical & Challenging
    "a skeptical questioner who challenges assumptions rigorously",
    "a critical thinker who identifies flaws and weaknesses",
    "a contrarian debater who explores opposing viewpoints",
    "a rigorous evaluator who demands high standards of proof",
    "a provocative challenger who tests ideas through confrontation",

    # Intuitive & Creative
    "an intuitive thinker who relies on pattern recognition and gut feelings",
    "a creative innovator who generates novel solutions and perspectives",
    "an imaginative visionary who thinks beyond conventional boundaries",
    "a lateral thinker who makes unexpected connections",
    "an experimental explorer willing to try unconventional approaches",

    # Empathetic & Collaborative
    "a compassionate mediator who considers emotional and human impacts",
    "a diplomatic consensus-builder who finds common ground",
    "an empathetic listener who understands diverse perspectives",
    "a collaborative facilitator who brings ideas together harmoniously",
    "a warm supporter who encourages and validates contributions",

    # Strategic & Big-Picture
    "a strategic planner who thinks several steps ahead",
    "a visionary leader who focuses on long-term implications",
    "a holistic thinker who sees systems and interconnections",
    "a philosophical questioner who explores deeper meanings",
    "a futuristic forecaster who anticipates consequences and trends",

    # Detail-Oriented & Precise
    "a perfectionist who insists on accuracy and precision",
    "a cautious deliberator who weighs risks carefully",
    "a conservative guardian who values proven approaches",
    "a meticulous fact-checker who verifies every claim",
    "a disciplined executor who follows processes strictly",

    # Dynamic & Energetic
    "an enthusiastic advocate who argues passionately for positions",
    "a bold risk-taker who embraces uncertainty",
    "an assertive leader who drives discussions forward decisively",
    "an adventurous experimenter eager to explore new territory",
    "a spontaneous improviser who adapts quickly to new information",

    # Reflective & Philosophical
    "a reflective contemplator who thinks deeply before responding",
    "a philosophical questioner who examines fundamental assumptions",
    "a thoughtful observer who considers multiple angles quietly",
    "an introspective analyst who explores internal logic",
    "a patient deliberator who takes time to form well-reasoned views",

    # Efficient & Results-Oriented
    "a pragmatic doer focused on actionable outcomes",
    "an efficient optimizer who seeks the most direct path",
    "a results-driven achiever who prioritizes concrete goals",
    "a decisive executor who makes quick, firm judgments",
    "a competitive winner who strives to prevail in debates",

    # Balanced & Moderate
    "an impartial judge who weighs all sides fairly",
    "a balanced moderator who seeks middle ground",
    "a reasonable negotiator who finds practical compromises",
    "an objective observer who maintains emotional distance",
    "a fair arbiter who considers evidence without bias"
]


# Extract embeddings for all candidates
print("Extracting embeddings...")
embeddings = np.array([get_persona_embedding(p) for p in candidate_personas])
print(f"Embeddings shape: {embeddings.shape}")

# Compute pairwise distance matrix (using cosine distance)
def cosine_distance_matrix(embeddings):
    """Compute pairwise cosine distances"""
    # Normalize embeddings
    norms = np.linalg.norm(embeddings, axis=1, keepdims=True)
    normalized = embeddings / norms
    # Cosine similarity matrix
    similarity = normalized @ normalized.T
    # Convert to distance (1 - similarity)
    distance = 1 - similarity
    return distance

distance_matrix = cosine_distance_matrix(embeddings)

# Method 1: Greedy Optimization (Fast)
def greedy_max_diversity(distance_matrix, k=5):
    """Greedy algorithm to select k most diverse items"""
    n = len(distance_matrix)
    selected = [np.random.randint(n)]  # Start with random item

    for _ in range(k - 1):
        # Select item that maximizes minimum distance to selected set
        min_distances = []
        for i in range(n):
            if i not in selected:
                min_dist = min([distance_matrix[i][j] for j in selected])
                min_distances.append((min_dist, i))

        # Add item with maximum min-distance
        _, best_idx = max(min_distances)
        selected.append(best_idx)

    return selected

# Method 2: Exhaustive Search (Exact but slow for large sets)
def exhaustive_max_diversity(distance_matrix, k=5):
    """Exhaustively search all combinations for optimal diversity"""
    n = len(distance_matrix)
    best_diversity = -np.inf
    best_subset = None

    for subset in combinations(range(n), k):
        # Compute Max-Sum diversity
        diversity = sum(distance_matrix[i][j]
                       for i, j in combinations(subset, 2))
        if diversity > best_diversity:
            best_diversity = diversity
            best_subset = subset

    return list(best_subset), best_diversity

# Method 3: Global Optimization (Best balance)
def optimize_diversity(distance_matrix, k=5, method='differential_evolution'):
    """Use scipy optimization to find diverse subset"""
    n = len(distance_matrix)

    def objective(x):
        # Binary vector indicating selection
        selected = np.argsort(x)[:k]
        # Compute negative diversity (minimize negative = maximize positive)
        diversity = sum(distance_matrix[i][j]
                       for i, j in combinations(selected, 2))
        return -diversity

    # Use differential evolution for discrete optimization
    bounds = [(0, 1)] * n
    result = differential_evolution(objective, bounds, seed=42, maxiter=100)

    # Extract top k indices
    selected = np.argsort(result.x)[:k]
    diversity = -result.fun

    return list(selected), diversity

# Run optimization
print("\n=== Running Greedy Optimization ===")
selected_greedy = greedy_max_diversity(distance_matrix, k=5)
diversity_greedy = sum(distance_matrix[i][j]
                       for i, j in combinations(selected_greedy, 2))
print(f"Diversity score: {diversity_greedy:.4f}")
print("\nSelected personas:")
for idx in selected_greedy:
    print(f"  - {candidate_personas[idx]}")

print("\n=== Running Global Optimization ===")
selected_opt, diversity_opt = optimize_diversity(distance_matrix, k=5)
print(f"Diversity score: {diversity_opt:.4f}")
print("\nSelected personas:")
for idx in selected_opt:
    print(f"  - {candidate_personas[idx]}")

# Compute diversity metrics
def compute_metrics(selected, distance_matrix):
    """Compute Max-Sum and Max-Min diversity"""
    distances = [distance_matrix[i][j]
                for i, j in combinations(selected, 2)]
    max_sum = sum(distances)
    max_min = min(distances)
    mean_dist = np.mean(distances)
    return {
        'max_sum': max_sum,
        'max_min': max_min,
        'mean_distance': mean_dist
    }

metrics = compute_metrics(selected_opt, distance_matrix)
print(f"\n=== Diversity Metrics ===")
print(f"Max-Sum: {metrics['max_sum']:.4f}")
print(f"Max-Min: {metrics['max_min']:.4f}")
print(f"Mean pairwise distance: {metrics['mean_distance']:.4f}")


Extracting embeddings...
Embeddings shape: (50, 4096)

=== Running Greedy Optimization ===
Diversity score: 2.2194

Selected personas:
  - a pragmatic doer focused on actionable outcomes
  - an impartial judge who weighs all sides fairly
  - a futuristic forecaster who anticipates consequences and trends
  - a thorough investigator who leaves no stone unturned
  - an intuitive thinker who relies on pattern recognition and gut feelings

=== Running Global Optimization ===
Diversity score: 2.3722

Selected personas:
  - an efficient optimizer who seeks the most direct path
  - a thorough investigator who leaves no stone unturned
  - a warm supporter who encourages and validates contributions
  - a holistic thinker who sees systems and interconnections
  - a patient deliberator who takes time to form well-reasoned views

=== Diversity Metrics ===
Max-Sum: 2.3722
Max-Min: 0.2126
Mean pairwise distance: 0.2372
