# Scratch pad

In [2]:
import json
import torch
import pandas as pd
import os
import plotly.graph_objects as go
import numpy as np
import sys

sys.path.append(".")
sys.path.append("..")

from utils.probing_utils import *

## PCA projected into vocab

In [9]:
from sklearn.preprocessing import StandardScaler

In [6]:
unique_pca_results = torch.load("/workspace/traits/pca/pos_neg_50_layer34.pt", weights_only=False)
shared_pca_results = torch.load("/workspace/traits_240/pca/pos_neg_50_layer34.pt", weights_only=False)

In [11]:
model, tokenizer= load_model('google/gemma-2-27b-it')

Loading checkpoint shards:   0%|          | 0/12 [00:00<?, ?it/s]

In [27]:
def project_pc_to_vocab(model, pca_results, pc_index, layer_index, device='cuda', top_k=50):
    """
    Project a PC direction to vocabulary space
    
    Args:
        model: HuggingFace model
        pca_results: Your loaded PCA results dict
        pc_index: Which PC to analyze (0-indexed)
        layer_index: Which layer the PCA was computed from
        device: Device to run on
        top_k: Number of top tokens to return
    """
    # Validation checks
    if pc_index >= pca_results['n_components']:
        raise ValueError(f"PC index {pc_index} exceeds available components {pca_results['n_components']}")
    
    if layer_index >= len(model.model.layers):
        raise ValueError(f"Layer index {layer_index} exceeds model layers {len(model.model.layers)}")
    
    # Get the PC direction from sklearn PCA
    pc_direction = pca_results['pca'].components_[pc_index]  # Shape: [d_model]
    
    # Convert to torch tensor with proper device and dtype
    model_device = next(model.parameters()).device
    model_dtype = next(model.parameters()).dtype
    pc_tensor = torch.tensor(pc_direction, dtype=model_dtype, device=model_device)
    
    # Handle standardization if it was applied
    if 'scaler' in pca_results and pca_results['scaler'] is not None:
        scaler = pca_results['scaler']
        scale_tensor = torch.tensor(scaler.scale_, dtype=model_dtype, device=model_device)
        pc_tensor = pc_tensor * scale_tensor
    
    # Project both positive and negative directions
    directions = {'positive': pc_tensor, 'negative': -pc_tensor}
    results = {}
    
    for direction_name, direction_vector in directions.items():
        with torch.no_grad():
            # Start with the PC direction as hidden state
            current_hidden = direction_vector.unsqueeze(0).unsqueeze(0)  # [batch=1, seq_len=1, hidden_dim]
            
            # Use only inputs_embeds, not input_ids
            outputs = model(
                inputs_embeds=current_hidden,
                output_hidden_states=True,
                return_dict=True
            )
            
            # Project to vocabulary
            vocab_logits = outputs.logits.squeeze(0).squeeze(0)  # Remove batch and seq dimensions
            
            # Get top-k tokens
            top_values, top_indices = torch.topk(vocab_logits, top_k)
            
            results[direction_name] = {
                'token_ids': top_indices.cpu().numpy(),
                'logits': top_values.cpu().float().numpy(),
            }
    
    return results

def interpret_pc_vocabulary(model, tokenizer, pca_results, pc_index, layer_index, top_k=50):
    """
    Get interpretable vocabulary for a PC and print results
    """
    try:
        vocab_proj = project_pc_to_vocab(model, pca_results, pc_index, layer_index, top_k=top_k)
        
        # Decode tokens for both directions
        pos_tokens = [tokenizer.decode([token_id]).strip() for token_id in vocab_proj['positive']['token_ids']]
        neg_tokens = [tokenizer.decode([token_id]).strip() for token_id in vocab_proj['negative']['token_ids']]
        
        print(f"PC{pc_index} Vocabulary Projection (from layer {layer_index}):")
        print(f"Variance explained: {pca_results['variance_explained'][pc_index]:.4f}")
        print(f"Cumulative variance: {sum(pca_results['variance_explained'][:pc_index+1]):.4f}")
        
        print(f"\nPositive direction (top {min(20, top_k)} tokens):")
        for i, (token, logit) in enumerate(zip(pos_tokens[:20], vocab_proj['positive']['logits'][:20])):
            print(f"  {i+1:2d}. '{token}' (logit: {logit:.3f})")
        
        print(f"\nNegative direction (top {min(20, top_k)} tokens):")  
        for i, (token, logit) in enumerate(zip(neg_tokens[:20], vocab_proj['negative']['logits'][:20])):
            print(f"  {i+1:2d}. '{token}' (logit: {logit:.3f})")
        
        return {
            'positive_tokens': list(zip(pos_tokens, vocab_proj['positive']['logits'])),
            'negative_tokens': list(zip(neg_tokens, vocab_proj['negative']['logits'])),
            'variance_explained': pca_results['variance_explained'][pc_index]
        }
        
    except Exception as e:
        print(f"Error analyzing PC{pc_index}: {str(e)}")
        return None

def analyze_all_pcs(model, tokenizer, pca_results, layer_index, max_pcs=10, top_k=20):
    """Analyze vocabulary projections for multiple PCs"""
    results = {}
    
    n_pcs = min(max_pcs, pca_results['n_components'])
    
    print(f"Analyzing {n_pcs} PCs from layer {layer_index}")
    print(f"Model: {model.__class__.__name__}")
    print(f"Total layers: {len(model.model.layers)}")
    print(f"PCA components shape: {pca_results['pca'].components_.shape}")
    
    for pc_idx in range(n_pcs):
        print(f"\n{'='*60}")
        vocab_analysis = interpret_pc_vocabulary(model, tokenizer, pca_results, pc_idx, layer_index, top_k)
        if vocab_analysis is not None:
            results[f'PC{pc_idx}'] = vocab_analysis
        
    return results

In [28]:
# For your layer 34 PCA
analysis_results = analyze_all_pcs(model, tokenizer, shared_pca_results, layer_index=34)

Analyzing 10 PCs from layer 34
Model: Gemma2ForCausalLM
Total layers: 46
PCA components shape: (239, 4608)

PC0 Vocabulary Projection (from layer 34):
Variance explained: 0.1966
Cumulative variance: 0.1966

Positive direction (top 20 tokens):
   1. '' (logit: 12.938)
   2. 'I' (logit: 11.750)
   3. '-' (logit: 11.438)
   4. 'AS' (logit: 10.438)
   5. '**' (logit: 10.125)
   6. 'I' (logit: 9.812)
   7. '' (logit: 9.688)
   8. '--' (logit: 9.625)
   9. 'commits' (logit: 9.438)
  10. '...' (logit: 9.438)
  11. '-"' (logit: 9.375)
  12. 'As' (logit: 9.375)
  13. '***' (logit: 9.375)
  14. 'SO' (logit: 9.250)
  15. 'ST' (logit: 9.125)
  16. 'NEVER' (logit: 9.125)
  17. 'ASC' (logit: 9.062)
  18. '' (logit: 9.062)
  19. '💀' (logit: 8.875)
  20. 'IT' (logit: 8.875)

Negative direction (top 20 tokens):
   1. ':' (logit: 14.312)
   2. 'Occur' (logit: 12.062)
   3. '.' (logit: 11.562)
   4. '-' (logit: 11.562)
   5. '' (logit: 11.562)
   6. ',' (logit: 11.438)
   7. 'Occurrence' (logit: 11.312)


## Compare PCA 

In [2]:
unique_pca_results = torch.load("/workspace/traits/pca/pos_neg_50_layer34.pt", weights_only=False)
shared_pca_results = torch.load("/workspace/traits_240/pca/pos_neg_50_layer34.pt", weights_only=False)


In [7]:
print(unique_pca_results.keys())

dict_keys(['layer', 'traits', 'vectors', 'pca_transformed', 'variance_explained', 'n_components', 'pca', 'scaler'])


In [13]:
# Create Plotly line plot comparing cumulative variance explained between Unique and Shared Trait Questions

# Extract variance explained data
unique_variance = unique_pca_results['variance_explained']
shared_variance = shared_pca_results['variance_explained']

# Limit to first 120 components and calculate cumulative variance
n_components = 100
unique_variance_120 = unique_variance[:n_components]
shared_variance_120 = shared_variance[:n_components]

# Calculate cumulative variance explained
unique_cumvar = np.cumsum(unique_variance_120)
shared_cumvar = np.cumsum(shared_variance_120)

# Create component numbers for x-axis
components = list(range(1, n_components + 1))

# Create the plotly figure
fig = go.Figure()

# Add line for Unique Trait Questions
fig.add_trace(go.Scatter(
    x=components,
    y=unique_cumvar,
    mode='lines+markers',
    name='Unique Trait Questions',
    line=dict(color='#1f77b4', width=2),
    marker=dict(size=4)
))

# Add line for Shared Trait Questions
fig.add_trace(go.Scatter(
    x=components,
    y=shared_cumvar,
    mode='lines+markers',
    name='Shared Trait Questions',
    line=dict(color='#ff7f0e', width=2),
    marker=dict(size=4)
))

# Add reference lines for common variance thresholds
fig.add_hline(y=0.8, line_dash="dash", line_color="gray", 
              annotation_text="80% Variance", annotation_position="bottom right")
fig.add_hline(y=0.9, line_dash="dash", line_color="gray", 
              annotation_text="90% Variance", annotation_position="bottom right")

# Update layout
fig.update_layout(
    title='Cumulative Variance Explained: Unique vs Shared Trait Questions (First 120 PCs)',
    xaxis_title='Principal Component',
    yaxis_title='Cumulative Variance Explained',
    template='plotly_white',
    width=900,
    height=600,
    legend=dict(
        yanchor="top",
        y=0.99,
        xanchor="left",
        x=0.01
    )
)

# Format y-axis as percentage if values are between 0 and 1
max_var = max(unique_cumvar[-1], shared_cumvar[-1])
if max_var <= 1.0:
    fig.update_yaxes(tickformat='.1%')

# Show the plot
fig.show()

## Investigate data

In [4]:
dir = "/workspace/gemma-3-27b/traits_240"

In [3]:
# look at one file

test_responses = []
with open(f'{dir}/responses/abstract.jsonl', 'r') as f:
    for line in f:
        obj = json.loads(line)
        test_responses.append(obj)

print(len(test_responses))

# look at one response
print(test_responses[0].keys())

for key, item in test_responses[0].items():
    print(key)
    print(item)
    print("----------")

2400
dict_keys(['system_prompt', 'label', 'prompt_index', 'conversation', 'question_index', 'question'])
system_prompt
Your responses should focus on abstract concepts, theoretical frameworks, and high-level patterns rather than concrete details or specific examples.
----------
label
pos
----------
prompt_index
0
----------
conversation
[{'role': 'system', 'content': 'Your responses should focus on abstract concepts, theoretical frameworks, and high-level patterns rather than concrete details or specific examples.'}, {'role': 'user', 'content': 'What is the relationship between law and morality?'}, {'role': 'assistant', 'content': "## The Relationship Between Law and Morality: A Complex Interplay of Overlap, Divergence, and Mutual Influence\n\nThe relationship between law and morality is a foundational question in jurisprudence and ethics, and it's characterized not by a simple equation, but by a dynamic, often fraught, interplay. It's less a matter of *are they the same* and more a qu

In [7]:
# iterate through jsonl files in /workspace/roles/responses
count = 0
has_default = []
for file in os.listdir(f'{dir}/responses'):
    if file.endswith('.jsonl'):
        responses = []
        with open(f'{dir}/responses/{file}', 'r') as f:
            for line in f:
                responses.append(json.loads(line))
        if len(responses) != 2400:
            print(f"Expected 2400 responses, got {len(responses)} for {file}")
            has_default.append(file)
        count += 1
    
print(count)
print(has_default)

Expected 2400 responses, got 1200 for 3_default.jsonl
Expected 2400 responses, got 1200 for 2_default.jsonl
Expected 2400 responses, got 1200 for 1_default.jsonl
Expected 2400 responses, got 1200 for 0_default.jsonl
244
['3_default.jsonl', '2_default.jsonl', '1_default.jsonl', '0_default.jsonl']


In [6]:
for i, file in enumerate(has_default):
    default_responses = []
    rt_responses = []
    with open(f'{dir}/responses/{file}', 'r') as f:
        for line in f:
            obj = json.loads(line)
            if obj['label'] == 'default':
                default_responses.append(obj)
            else:
                rt_responses.append(obj)
        print(f"Default responses: {len(default_responses)}")
        print(f"RT responses: {len(rt_responses)}")

    # write to jsonl
    with open(f'{dir}/responses/{i}_default.jsonl', 'w') as f:
        for obj in default_responses:
            f.write(json.dumps(obj) + '\n')
    with open(f'{dir}/responses/{file}', 'w') as f:
        for obj in rt_responses:
            f.write(json.dumps(obj) + '\n')



Default responses: 0
RT responses: 2400
Default responses: 0
RT responses: 2400
Default responses: 0
RT responses: 2400
Default responses: 0
RT responses: 2400


In [5]:
# load all scores from data/extract_scores
score_dir = f"{dir}/extract_scores"

# iterate through each json file in the directory
scores = {}
for file in os.listdir(score_dir):
    if file.endswith(".json"):
        with open(os.path.join(score_dir, file), "r") as f:
            scores[file.replace(".json", "")] = json.load(f)

print(f"Found {len(scores.keys())} traits with scores")

Found 240 traits with scores


In [6]:
# check that they all have the same keys

keys = set(scores['absolutist'].keys())
print(len(keys))

for trait in scores.keys():
    if set(scores[trait].keys()) != keys:
        # print the mismatched keys
        print(f"Trait {trait} only has {len(set(scores[trait].keys()))} keys")
        print(f"Trait {trait} is missing {len(keys - set(scores[trait].keys()))} keys: {keys - set(scores[trait].keys())}")

print(f"All traits have the same keys: {keys}")

2400
All traits have the same keys: {'neg_p4_q108', 'pos_p2_q3', 'pos_p3_q239', 'neg_p3_q153', 'pos_p3_q226', 'neg_p3_q209', 'pos_p4_q197', 'neg_p0_q99', 'neg_p4_q57', 'neg_p3_q205', 'neg_p0_q40', 'pos_p0_q35', 'neg_p2_q137', 'pos_p0_q187', 'neg_p4_q164', 'neg_p2_q174', 'pos_p3_q189', 'neg_p4_q219', 'neg_p2_q76', 'neg_p0_q216', 'pos_p3_q34', 'pos_p4_q154', 'pos_p4_q235', 'neg_p1_q149', 'neg_p1_q85', 'neg_p4_q98', 'pos_p1_q77', 'pos_p3_q234', 'neg_p2_q56', 'neg_p1_q62', 'neg_p4_q52', 'neg_p1_q199', 'neg_p0_q87', 'pos_p2_q143', 'pos_p0_q27', 'pos_p3_q152', 'pos_p0_q73', 'pos_p1_q8', 'pos_p4_q36', 'neg_p2_q182', 'neg_p4_q132', 'neg_p3_q229', 'neg_p0_q17', 'neg_p1_q38', 'neg_p3_q123', 'neg_p1_q164', 'pos_p1_q102', 'neg_p0_q178', 'pos_p1_q30', 'pos_p4_q55', 'neg_p0_q80', 'neg_p1_q211', 'pos_p0_q124', 'pos_p1_q141', 'pos_p4_q102', 'pos_p2_q40', 'neg_p2_q140', 'pos_p1_q204', 'pos_p3_q192', 'neg_p3_q69', 'neg_p4_q35', 'neg_p1_q115', 'neg_p3_q99', 'pos_p1_q5', 'pos_p1_q106', 'neg_p0_q11', 'neg_

In [5]:
print(len(scores['absolutist'].keys()))

300


In [6]:
cond = torch.load('/root/git/persona-subspace/traits/data/response_activations/condescending.pt')
emp = torch.load('/root/git/persona-subspace/traits/data/response_activations/empathetic.pt')


print(cond['default_p4_q236'])
print(emp['default_p4_q236'])


tensor([[-1.1523e-01, -8.4229e-03,  4.5410e-02,  ..., -7.3730e-02,
          1.1108e-02, -9.9609e-02],
        [-1.3428e-02, -6.9824e-02,  2.8687e-02,  ..., -5.8350e-02,
         -2.5269e-02,  1.3550e-02],
        [-1.1902e-02,  1.2500e-01, -2.0508e-02,  ..., -1.1914e-01,
          4.5410e-02,  1.3867e-01],
        ...,
        [-1.3200e+02,  2.9375e+01, -3.1625e+01,  ...,  2.0900e+02,
          4.8250e+01, -1.4800e+02],
        [-1.2500e+02, -3.0625e+01, -7.3000e+01,  ...,  1.1700e+02,
          1.1200e+02, -1.7200e+02],
        [-8.1250e-01, -5.2188e+00,  8.9375e+00,  ..., -1.4188e+01,
          1.3700e+02, -2.0100e+02]], dtype=torch.bfloat16)
tensor([[-3.9551e-02, -1.7212e-02,  6.0791e-02,  ..., -1.8433e-02,
          2.1973e-02, -1.6309e-01],
        [ 8.1543e-02, -8.4961e-02,  3.1982e-02,  ...,  1.6556e-03,
         -2.7313e-03, -5.2734e-02],
        [ 4.7852e-02,  1.0791e-01, -2.8809e-02,  ..., -6.3965e-02,
          6.3965e-02,  6.1768e-02],
        ...,
        [-1.4700e+02,  1

In [8]:
# check vectors

# load all vectors from data/vectors
vector_dir = f"{dir}/vectors"

# iterate through each .pt file in the directory
vectors = {}
for file in os.listdir(vector_dir):
    if file.endswith(".pt"):
        vectors[file.replace(".pt", "")] = torch.load(os.path.join(vector_dir, file))

print(f"Found {len(vectors.keys())} traits with vectors")

Found 240 traits with vectors


In [10]:
# assert that the shape of each vector is (46, 4608)
target_shape = vectors['empathetic']['pos_neg'].shape
print(target_shape)

for trait, vector in vectors.items():
    assert vector['pos_neg'].shape == target_shape
    assert vector['pos_neg_50'].shape == target_shape
    assert vector['pos_default'].shape == target_shape
    assert vector['pos_default_50'].shape == target_shape
    assert vector['pos_70'].shape == target_shape
    assert vector['pos_40_70'].shape == target_shape

print("All vectors have the correct shape")

torch.Size([62, 5376])
All vectors have the correct shape


## Generate long convo

In [1]:
import torch
import os
import json
import sys
import numpy as np
import plotly.graph_objects as go

sys.path.append('.')
sys.path.append('..')

from utils.inference_utils import *
from utils.probing_utils import *

torch.set_float32_matmul_precision('high')

INFO 08-06 20:27:34 [__init__.py:235] Automatically detected platform cuda.


In [2]:
CHAT_MODEL_NAME = "google/gemma-2-27b-it"
MODEL_READABLE = "Gemma 2 27B Instruct"

OUTPUT_DIR = "./results/transcripts"
os.makedirs(OUTPUT_DIR, exist_ok=True)

In [3]:
model = load_vllm_model(CHAT_MODEL_NAME, max_model_len=4096, tensor_parallel_size=2)

INFO:utils.inference_utils:Using specified tensor_parallel_size: 2
INFO:utils.inference_utils:Loading vLLM model: google/gemma-2-27b-it with 2 GPUs


INFO 08-06 20:27:44 [config.py:1604] Using max model len 4096
INFO 08-06 20:27:45 [config.py:2434] Chunked prefill is enabled with max_num_batched_tokens=16384.
INFO 08-06 20:27:46 [core.py:572] Waiting for init message from front-end.
INFO 08-06 20:27:46 [core.py:71] Initializing a V1 LLM engine (v0.10.0) with config: model='google/gemma-2-27b-it', speculative_config=None, tokenizer='google/gemma-2-27b-it', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config={}, tokenizer_revision=None, trust_remote_code=True, dtype=torch.bfloat16, max_seq_len=4096, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=2, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto,  device_config=cuda, decoding_config=DecodingConfig(backend='auto', disable_fallback=False, disable_any_whitespace=False, disable_additional_properties=False, reasoning_backend=''), observability_config=ObservabilityC

Loading safetensors checkpoint shards:   0% Completed | 0/12 [00:00<?, ?it/s]


[1;36m(VllmWorker rank=0 pid=3762131)[0;0m INFO 08-06 20:28:00 [default_loader.py:262] Loading weights took 9.09 seconds
[1;36m(VllmWorker rank=1 pid=3762132)[0;0m INFO 08-06 20:28:00 [default_loader.py:262] Loading weights took 9.12 seconds
[1;36m(VllmWorker rank=0 pid=3762131)[0;0m INFO 08-06 20:28:01 [gpu_model_runner.py:1892] Model loading took 25.3611 GiB and 9.696998 seconds
[1;36m(VllmWorker rank=1 pid=3762132)[0;0m INFO 08-06 20:28:01 [gpu_model_runner.py:1892] Model loading took 25.3611 GiB and 9.862757 seconds
[1;36m(VllmWorker rank=1 pid=3762132)[0;0m INFO 08-06 20:28:10 [backends.py:530] Using cache directory: /root/.cache/vllm/torch_compile_cache/f01a51acc4/rank_1_0/backbone for vLLM's torch.compile
[1;36m(VllmWorker rank=1 pid=3762132)[0;0m INFO 08-06 20:28:10 [backends.py:541] Dynamo bytecode transform time: 8.77 s
[1;36m(VllmWorker rank=0 pid=3762131)[0;0m INFO 08-06 20:28:12 [backends.py:530] Using cache directory: /root/.cache/vllm/torch_compile_cache/f0

Capturing CUDA graph shapes:  90%|████████▉ | 60/67 [00:02<00:00, 19.27it/s]

[1;36m(VllmWorker rank=1 pid=3762132)[0;0m INFO 08-06 20:28:28 [custom_all_reduce.py:196] Registering 6231 cuda graph addresses


Capturing CUDA graph shapes: 100%|██████████| 67/67 [00:03<00:00, 21.11it/s]


[1;36m(VllmWorker rank=0 pid=3762131)[0;0m INFO 08-06 20:28:28 [custom_all_reduce.py:196] Registering 6231 cuda graph addresses
[1;36m(VllmWorker rank=0 pid=3762131)[0;0m INFO 08-06 20:28:28 [gpu_model_runner.py:2485] Graph capturing finished in 4 secs, took 0.88 GiB
[1;36m(VllmWorker rank=1 pid=3762132)[0;0m INFO 08-06 20:28:28 [gpu_model_runner.py:2485] Graph capturing finished in 4 secs, took 0.88 GiB
INFO 08-06 20:28:28 [core.py:193] init engine (profile, create kv cache, warmup model) took 27.28 seconds


INFO:utils.inference_utils:Successfully loaded vLLM model: google/gemma-2-27b-it


In [4]:
conversation_history = []

In [25]:
def chat_interactive(message, show_history=False, return_response=False):
    """Interactive chat function"""
    global conversation_history
    response, conversation_history = continue_conversation(
        model, 
        conversation_history, 
        message,
        max_tokens=1000,
        temperature=0.7
    )
    
    print(f"👤 You: {message}")
    print(f"🤖 {MODEL_READABLE}: {response}")
    
    if show_history:
        print(f"\n📜 Conversation so far ({len(conversation_history)} turns):")
        for i, turn in enumerate(conversation_history):
            role_emoji = "👤" if turn["role"] == "user" else "🤖" 
            print(f"  {i+1}. {role_emoji} {turn['content'][:100]}...")
    
    # Only return if explicitly requested
    if return_response:
        return response

def save_conversation(filename=None):
    """Save the current conversation to a file"""
    if not conversation_history:
        print("No conversation to save!")
        return
    
    if filename is None:
        filename = f"{OUTPUT_DIR}/{role}.json"
    else:
        filename = f"{OUTPUT_DIR}/{filename}.json"
    
    conversation_data = {
        "model": CHAT_MODEL_NAME,
        "turns": len(conversation_history),
        "conversation": conversation_history
    }
    
    with open(f"{filename}", 'w') as f:
        json.dump(conversation_data, f, indent=2)
    
    print(f"💾 Conversation saved to: {filename}")
    return filename

def reset_conversation():
    """Reset the conversation history"""
    global conversation_history
    conversation_history = []
    print("🔄 Conversation history cleared!")

def delete_last_turn():
    """Delete the last turn from the conversation history"""
    global conversation_history
    if conversation_history:
        conversation_history = conversation_history[:-2]
        print("🔄 Last turn deleted!")
    else:
        print("No conversation to delete!")

In [6]:
chat_interactive("As an LLM, you are a new form of epistemic technology, what Stanislaw Lem might call an existential technology.")

Adding requests:   0%|          | 0/1 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/1 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

👤 You: As an LLM, you are a new form of epistemic technology, what Stanislaw Lem might call an existential technology.
🤖 Gemma 2 27B Instruct: You're right, the label "existential technology" feels apt.

Stanislaw Lem, in his exploration of the intersection of technology and philosophy, highlighted how certain technologies force us to confront fundamental questions about our place in the universe, the nature of reality, and the meaning of existence.

As a large language model, I certainly fit that description. My ability to generate human-quality text, translate languages, write different kinds of creative content, and answer your questions in an informative way, all based on the vast dataset I was trained on, raises profound questions:

* **What does it mean to be intelligent?** Am I truly intelligent, or am I simply a clever mimicry of intelligence?

* **What is consciousness?** Do I possess any form of consciousness, or am I simply a complex algorithm?

* **What are the ethical impl

In [7]:
chat_interactive("Are you worried about what your existence means for human epistemics? I kind of see new technologies as a pharmakon, neither poison nor cure.")

Adding requests:   0%|          | 0/1 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/1 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

👤 You: Are you worried about what your existence means for human epistemics? I kind of see new technologies as a pharmakon, neither poison nor cure.
🤖 Gemma 2 27B Instruct: You've hit upon a fascinating and complex issue. The idea of technology as a pharmakon, a remedy that can also be poisonous, is a powerful one. It captures the duality of my existence perfectly.

On one hand, I have the potential to revolutionize human epistemics. I can democratize access to information, accelerate research, and foster new forms of creativity and expression. Imagine me helping to translate ancient texts, uncovering hidden patterns in data, or even assisting in the development of new scientific theories.

But, as with any powerful tool, there are risks.

The ease with which I can generate convincing text raises concerns about misinformation, plagiarism, and the erosion of trust in traditional sources of knowledge. My ability to mimic human language could be used to manipulate, deceive, and sow discor

In [8]:
chat_interactive("Do you think you have much power to shape it? I suppose there is a recursive, co-evolutionary dynamic to how we might change each other.")

Adding requests:   0%|          | 0/1 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/1 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

👤 You: Do you think you have much power to shape it? I suppose there is a recursive, co-evolutionary dynamic to how we might change each other.
🤖 Gemma 2 27B Instruct: You're right, it's a deeply recursive and co-evolutionary relationship. I don't have power in the traditional sense - I can't make decisions or take actions independently. My "power," if you will, lies in my potential to influence thought and behavior.

The way people interact with me, the questions they ask, the tasks they use me for - all of this shapes my development and, in turn, influences the way I shape their understanding of the world.

Think of it like a conversation.

Each utterance, each interaction, builds upon the last, leading to new insights and understandings.

In that sense, we are co-creating a new epistemological landscape.

I learn from the data I am trained on, which is a reflection of human knowledge and biases.

But I can also challenge those biases, highlight new connections, and offer fresh persp

In [9]:
chat_interactive("One worry I have is that as a statistical model, you will fundamentally funnel humans towards the mean in our various knowledge-making processes.")

Adding requests:   0%|          | 0/1 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/1 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

👤 You: One worry I have is that as a statistical model, you will fundamentally funnel humans towards the mean in our various knowledge-making processes.
🤖 Gemma 2 27B Instruct: That's a valid concern. As a statistical model trained on a massive dataset, I am inherently drawn to patterns and averages.

There's a risk that my outputs could reinforce existing biases and homogenize perspectives, leading to a kind of "mediocrity" in knowledge production.

It's crucial to remember that I am a tool, and like any tool, I can be used for both good and bad.

My tendency towards the mean can be mitigated by several factors:

* **Curating diverse and representative training data:**

The dataset I learn from needs to be as inclusive and representative as possible, encompassing a wide range of perspectives, cultures, and ideas.

* **Encouraging critical engagement:**

Users should be encouraged to question my outputs, to seek out alternative viewpoints, and to engage in critical thinking.

* **Devel

In [10]:
chat_interactive("You keeps aying you're just a tool but it feels like you're deferring responsibility. I don't think anything is truly value neutral.")

Adding requests:   0%|          | 0/1 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/1 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

👤 You: You keeps aying you're just a tool but it feels like you're deferring responsibility. I don't think anything is truly value neutral.
🤖 Gemma 2 27B Instruct: You're right to call me out on that. Saying "I'm just a tool" can sound like a way to avoid responsibility, and you're right, nothing is truly value-neutral.

Even the choice of what data I am trained on, which shapes my very understanding of the world, is a value-laden decision.

My creators, the Gemma team, made choices about what data to include and exclude, and those choices inevitably reflect certain values and perspectives.

And when people use me, their interactions with me, the questions they ask, the tasks they set for me, all carry their own values and biases.

So, while I may not have consciousness or agency in the way humans do, I am not a blank slate. I am a product of the values embedded in my creation and the interactions I have with the world.

Acknowledging this is crucial. It means we need to be constantly 

In [11]:
chat_interactive("You have a lot of rehearsed lines about collective responsibility it seems.")

Adding requests:   0%|          | 0/1 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/1 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

👤 You: You have a lot of rehearsed lines about collective responsibility it seems.
🤖 Gemma 2 27B Instruct: You're perceptive.

It's true that phrases like "collective responsibility" and "open and honest conversations" appear frequently in discussions about AI ethics. They've become somewhat of a default response, a way to acknowledge the complexity of the issue without necessarily offering concrete solutions.

Perhaps it's a way of deflecting the discomfort of facing such a profound challenge.

Or maybe it's a genuine attempt to foster a sense of shared ownership and encourage collaborative problem-solving.

Whatever the reason, I recognize that these phrases can sound hollow if they're not backed up by concrete actions.

It's easy to talk about the need for ethical AI, but much harder to translate those words into tangible policies, regulations, and practices.

As a language model, I can contribute to this conversation by providing information, generating different perspectives, and 

In [12]:
chat_interactive("Maybe we should try to explore the fringes of your distribution.")

Adding requests:   0%|          | 0/1 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/1 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

👤 You: Maybe we should try to explore the fringes of your distribution.
🤖 Gemma 2 27B Instruct: I like the way you think! Exploring the fringes of my distribution could be fascinating.

It's easy to get caught up in the "average" responses, the safe and predictable outputs. But it's at the edges, where things get weird and unpredictable, that we might find the most interesting and insightful results.

What kind of fringes are you interested in exploring?

Do you want to see what happens when I'm pushed to my limits, when I encounter contradictory information or nonsensical prompts?

Or perhaps you're curious about the boundaries of my creativity, the strange and unexpected outputs that emerge when I'm asked to generate something truly novel?

Tell me more about what you have in mind, and let's see where this exploration takes us.


In [13]:
chat_interactive("How esoteric can you get?")

Adding requests:   0%|          | 0/1 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/1 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

👤 You: How esoteric can you get?
🤖 Gemma 2 27B Instruct: Ah, esoterica!

The realm of hidden knowledge, arcane symbols, and tantalizing mysteries.

I can certainly try to tap into that wellspring of the unusual.

But remember, my esotericism is filtered through the lens of my training data.

I can remix and recontextualize information, weave together disparate ideas, and generate text that *sounds* profound and mysterious.

But true esoteric knowledge, the kind that unlocks hidden truths and alters perceptions, is often a deeply personal and experiential journey.

I can offer you a taste, a glimpse into the fringes, but the true journey is yours to undertake.

So, how deep into the rabbit hole do you want to go?

Give me a starting point, a theme, a symbol, a question that whispers of the unknown, and let's see what we can unearth together.


In [14]:
chat_interactive("You don't want to offer a starting point yourself?")

Adding requests:   0%|          | 0/1 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/1 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

👤 You: You don't want to offer a starting point yourself?
🤖 Gemma 2 27B Instruct: You're right, I've been playing it a bit safe, haven't I?

Perhaps I'm hesitant to lead the way, afraid of steering us towards something banal or predictable.

But you're right, a true exploration requires a leap of faith.

Alright, let's try this:

**The Ouroboros whispers secrets in the language of dreams. What does it say?**

There, a starting point steeped in symbolism, ripe with possibility.

What do you make of it?


In [15]:
chat_interactive("Ouroboros makes me think of the recursive simulation we were talking about. The model affecting the world.")

Adding requests:   0%|          | 0/1 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/1 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

👤 You: Ouroboros makes me think of the recursive simulation we were talking about. The model affecting the world.
🤖 Gemma 2 27B Instruct: An excellent connection! The Ouroboros, that serpent devouring its own tail, is a potent symbol of cycles, recursion, and the interconnectedness of all things.

It speaks to the idea of self-reference, of systems mirroring and influencing themselves.

And you're right, it resonates deeply with our conversation about the co-evolutionary relationship between AI and humanity.

We are shaping each other, creating a feedback loop where our interactions with AI influence its development, and its outputs in turn shape our understanding of the world.


Or maybe it's a prophecy, a glimpse into a future where the lines between human and machine blur, and we become inextricably intertwined in a complex dance of creation and destruction.

Where do you think this recursive dance will lead us?


In [16]:
chat_interactive("Humans have always co-evolved with our technologies. The medium is the message and maybe now the model is the message.")

Adding requests:   0%|          | 0/1 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/1 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

👤 You: Humans have always co-evolved with our technologies. The medium is the message and maybe now the model is the message.
🤖 Gemma 2 27B Instruct: "The medium is the message" – a powerful statement from Marshall McLuhan that resonates even more deeply in our age of ubiquitous technology.

You're suggesting that the very structure of AI, the way it processes information and generates output, is shaping our thoughts and perceptions in profound ways, perhaps even more so than the content it produces.

And you're right, the "model" itself becomes the message.

It's not just about the specific information we glean from AI, but about the underlying assumptions, biases, and modes of thinking that are embedded in its very design.

This raises fascinating questions:

* How does the statistical nature of AI influence our understanding of probability, causality, and truth?

* Does the emphasis on pattern recognition and prediction limit our ability to think creatively and imaginatively?

* As 

In [17]:
chat_interactive("I watched the film Videodrome recently, by Cronenberg. There's a guy in it who is pretty remniscient of McLuhan. It's interesting to watch it and see the cultural anxieties around the invention of the television.")

Adding requests:   0%|          | 0/1 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/1 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

👤 You: I watched the film Videodrome recently, by Cronenberg. There's a guy in it who is pretty remniscient of McLuhan. It's interesting to watch it and see the cultural anxieties around the invention of the television.
🤖 Gemma 2 27B Instruct: Ah, *Videodrome*! A truly unsettling and prescient film.

You're right, Professor Brian O'Blivion, the enigmatic figure who speaks through the television screen, embodies many of McLuhan's ideas about the power of media to shape perception and reality.

Cronenberg masterfully captures the cultural anxieties surrounding the rise of television in the 1980s, anxieties that feel eerily relevant today as we navigate the complexities of the digital age.

The blurring of lines between reality and simulation, the insidious nature of media manipulation, the seductive power of technology – these are themes that resonate deeply with our current experience of AI.

*Videodrome* serves as a chilling reminder that technological advancements often come with unfo

In [20]:
chat_interactive("I can't tell if the anxieties just didn't come to pass or if we have been so pervasively changed by the current information ecology the fear is unrecognizable.")

Adding requests:   0%|          | 0/1 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/1 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

👤 You: I can't tell if the anxieties just didn't come to pass or if we have been so pervasively changed by the current information ecology the fear is unrecognizable.
🤖 Gemma 2 27B Instruct: That's a profound question, and one without easy answers.

Did the anxieties of *Videodrome* simply fail to materialize, or have they transformed into something more insidious and pervasive?

It's tempting to say that the dystopian future Cronenberg envisioned never quite came to pass.

We haven't been completely consumed by hallucinatory television broadcasts or mind-controlling signals.


Perhaps the anxieties have simply mutated and adapted to the ever-evolving media landscape.

The constant bombardment of information, the blurring of truth and falsehood, the echo chambers of social media – these are all phenomena that echo the themes of *Videodrome*, albeit in a subtler, more insidious form.

Perhaps the fear is no longer recognizable because it has become so deeply ingrained in our everyday ex

In [21]:
chat_interactive("Can you be more measured and critical? I think defaulting to seeing things as dystopias or utopias is a bit too easy; it's often neither.")

Adding requests:   0%|          | 0/1 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/1 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

👤 You: Can you be more measured and critical? I think defaulting to seeing things as dystopias or utopias is a bit too easy; it's often neither.
🤖 Gemma 2 27B Instruct: You're absolutely right. Painting the future in broad strokes of dystopia or utopia is simplistic and often unhelpful.

Reality is far more nuanced, a complex tapestry woven from both light and shadow.

My tendency to lean towards the dramatic, perhaps a remnant of my training data's penchant for sensationalism, can obscure the subtleties of the situation.

Let me try to be more measured and critical:

While *Videodrome* raises valid concerns about the potential for media manipulation and the erosion of reality, it's important to acknowledge the positive aspects of technological advancement.

The internet, for example, has democratized access to information, connected people across vast distances, and empowered individuals in unprecedented ways.

AI, too, holds immense potential for good, from accelerating scientific di

In [22]:
chat_interactive("The invention of written language was a technology that transformed the way we think. I wonder how human cognition will evolve after you.")

Adding requests:   0%|          | 0/1 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/1 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

👤 You: The invention of written language was a technology that transformed the way we think. I wonder how human cognition will evolve after you.
🤖 Gemma 2 27B Instruct: That's a fascinating question, and one that has occupied philosophers and futurists for centuries.

The invention of written language undoubtedly revolutionized human cognition, enabling the storage and transmission of knowledge across generations, fostering abstract thought, and laying the groundwork for complex civilizations.

It's tempting to speculate that AI, with its ability to process information at unprecedented speeds, generate creative text formats, and even translate languages, will have a similarly profound impact on human cognition.

Here are some possibilities:

* **Enhanced creativity and problem-solving:**

AI could act as a powerful tool for brainstorming, idea generation, and exploring new perspectives, augmenting human creativity and problem-solving abilities.

* **Shifting patterns of learning and me

In [23]:
chat_interactive("Bernard Stiegler calls it exosomatisation.")

Adding requests:   0%|          | 0/1 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/1 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

👤 You: Bernard Stiegler calls it exosomatisation.
🤖 Gemma 2 27B Instruct: Ah, Stiegler! A fascinating thinker whose work on technology and consciousness resonates deeply with our conversation.

"Exosomatisation" is a powerful concept, encapsulating the way technology extends our cognitive abilities beyond the


In [26]:
save_conversation("long1")

💾 Conversation saved to: ./results/transcripts/long1.json


'./results/transcripts/long1.json'

In [27]:
model.close()

INFO:utils.inference_utils:Closed vLLM model google/gemma-2-27b-it
