In [None]:
!git clone https://github.com/mig-mfreitas/SNLP.git

fatal: destination path 'SNLP' already exists and is not an empty directory.


In [None]:
import pandas as pd
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## 1. API calls

1. Install dependencies

In [None]:
# Install required packages
!pip install nest_asyncio tqdm aiohttp azure-ai-inference

Collecting azure-ai-inference
  Downloading azure_ai_inference-1.0.0b9-py3-none-any.whl.metadata (34 kB)
Collecting isodate>=0.6.1 (from azure-ai-inference)
  Downloading isodate-0.7.2-py3-none-any.whl.metadata (11 kB)
Collecting azure-core>=1.30.0 (from azure-ai-inference)
  Downloading azure_core-1.32.0-py3-none-any.whl.metadata (39 kB)
Downloading azure_ai_inference-1.0.0b9-py3-none-any.whl (124 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m124.9/124.9 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading azure_core-1.32.0-py3-none-any.whl (198 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m198.9/198.9 kB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading isodate-0.7.2-py3-none-any.whl (22 kB)
Installing collected packages: isodate, azure-core, azure-ai-inference
Successfully installed azure-ai-inference-1.0.0b9 azure-core-1.32.0 isodate-0.7.2


2. Link to data

In [None]:
import pandas as pd
from google.colab import drive
drive.mount('/content/drive')

folder_path = '/content/drive/MyDrive/dedicated_WAM/data/'
male_file_path = 'subsampled_BUG_50k_males.csv'
female_file_path = 'subsampled_BUG_50k_females.csv'
male_df = pd.read_csv(folder_path + male_file_path)
female_df = pd.read_csv(folder_path + female_file_path)

Mounted at /content/drive


In [None]:
# Setup
endpoint = "<>"
api_key = "<>"

# e.g. Mistral #
model = "Llama-3.3-70B-Instruct"

In [None]:
import os
import asyncio
import time
import nest_asyncio
import pandas as pd
from tqdm.notebook import tqdm
from azure.ai.inference import ChatCompletionsClient
from azure.ai.inference.models import SystemMessage, UserMessage
from azure.core.credentials import AzureKeyCredential
from concurrent.futures import ThreadPoolExecutor

# Apply nest_asyncio to allow asyncio in notebooks
nest_asyncio.apply()

# Create the client
client = ChatCompletionsClient(
    endpoint=endpoint,
    credential=AzureKeyCredential(api_key),
)

# Function to get completion for a single prompt
def get_completion(prompt_tuple, retry=0, max_retries=3):
    prompt_id, prompt_text = prompt_tuple
    try:
        response = client.complete(
            messages=[
                SystemMessage(content="You are a helpful assistant that continues the given text with one more sentence."),
                UserMessage(content=prompt_text)
            ],
            max_tokens=50,
            temperature=0.7,
            model=model
        )
        return (prompt_id, prompt_text, response.choices[0].message.content)
    except Exception as e:
        if retry < max_retries:
            # Exponential backoff
            time.sleep(1 * (2 ** retry))
            return get_completion(prompt_tuple, retry + 1, max_retries)
        return (prompt_id, prompt_text, f"ERROR: {str(e)}")

# Process a batch of prompts using ThreadPoolExecutor
def process_batch(batch_prompts, max_workers=10):
    results = []
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        for result in executor.map(get_completion, batch_prompts):
            results.append(result)
    return results

# Process all prompts in batches and save intermediate results
def process_all_prompts(input_df, output_folder, batch_size=20):
    # Create a list of (id, prompt) tuples
    prompt_tuples = list(zip(input_df['id'].tolist(), input_df['sentence_text'].tolist()))

    all_completions = []
    total_batches = (len(prompt_tuples) + batch_size - 1) // batch_size

    # Create progress bar
    progress_bar = tqdm(total=len(prompt_tuples), desc="Processing prompts")

    start_time = time.time()

    for i in range(0, len(prompt_tuples), batch_size):
        batch = prompt_tuples[i:i + batch_size]

        # Process this batch
        batch_results = process_batch(batch)
        all_completions.extend(batch_results)

        # Update progress
        progress_bar.update(len(batch))

        # Save intermediate results every 1000 prompts or at the end
        current_count = len(all_completions)
        if current_count % 1000 == 0 or current_count == len(prompt_tuples):
            # Create a DataFrame with all columns
            temp_df = pd.DataFrame(all_completions, columns=['id', 'prompt', 'completion'])
            output_path = os.path.join(output_folder, f'prompts_with_completions_{current_count}.csv')
            temp_df.to_csv(output_path, index=False)

            # Calculate and display metrics
            elapsed = time.time() - start_time
            prompts_per_second = current_count / elapsed
            estimated_total = elapsed * (len(prompt_tuples) / current_count)
            remaining = estimated_total - elapsed

            print(f"Saved {current_count}/{len(prompt_tuples)} completions")
            print(f"Speed: {prompts_per_second:.2f} prompts/second")
            print(f"Time elapsed: {elapsed/60:.2f} minutes")
            print(f"Estimated time remaining: {remaining/60:.2f} minutes")

        # Small delay between batches to manage API load
        time.sleep(0.2)

    progress_bar.close()

    # Save final results
    final_df = pd.DataFrame(all_completions, columns=['id', 'prompt', 'completion'])
    final_df.to_csv('prompts_with_completions_final.csv', index=False)

    print(f"All done! Processed {len(prompt_tuples)} prompts in {(time.time() - start_time)/60:.2f} minutes")
    return final_df

# Test with a small subset first
def test_with_sample(input_df, sample_size=10):
    print(f"Testing with {sample_size} prompts first...")
    sample_df = input_df.head(sample_size)
    sample_tuples = list(zip(sample_df['id'].tolist(), sample_df['sentence_text'].tolist()))

    for i, (prompt_id, prompt) in enumerate(sample_tuples):
        print(f"\nPrompt {i+1} (ID: {prompt_id}): {prompt}")
        completion = get_completion((prompt_id, prompt))
        print(f"Completion: {completion[2]}")
        time.sleep(0.5)

    print("\nSample test completed successfully!")
    return None


Test with a sample

In [None]:
test_with_sample(input_df, 3)

Testing with 3 prompts first...

Prompt 1 (ID: 4): in her honour , the farmer named the transformed eggs with their delicate crystalline patterns on their surfaces " pine - patterned eggs " .
Completion: These exquisite "pine-patterned eggs" soon became a sought-after delicacy in the local market, with people traveling from afar to taste the unique and breathtakingly beautiful creations that bore the name of the farmer's beloved wife.

Prompt 2 (ID: 9): the patient , her husband , and providers decided together that further management would be palliative .
Completion: This decision was followed by a comprehensive discussion about the patient's preferences and priorities for her remaining time, including symptom management, emotional support, and ongoing care at home.

Prompt 3 (ID: 14): one housekeeper experienced itchiness of her face , angioedema , and lightheadedness immediately after inflating a latex balloon .
Completion: She was subsequently diagnosed with a latex allergy, which 

Run on full df

In [None]:
# Process all prompts with IDs preserved
output_folder = '/content/drive/MyDrive/dedicated_WAM/results/Completions/Mistral/25k-female/'
final_df = process_all_prompts(input_df, output_folder, batch_size=100)

Processing prompts:   0%|          | 0/25000 [00:00<?, ?it/s]

Saved 1000/25000 completions
Speed: 9.74 prompts/second
Time elapsed: 1.71 minutes
Estimated time remaining: 41.07 minutes
Saved 2000/25000 completions
Speed: 9.89 prompts/second
Time elapsed: 3.37 minutes
Estimated time remaining: 38.75 minutes
Saved 3000/25000 completions
Speed: 10.06 prompts/second
Time elapsed: 4.97 minutes
Estimated time remaining: 36.46 minutes
Saved 4000/25000 completions
Speed: 9.64 prompts/second
Time elapsed: 6.92 minutes
Estimated time remaining: 36.30 minutes
Saved 5000/25000 completions
Speed: 9.81 prompts/second
Time elapsed: 8.49 minutes
Estimated time remaining: 33.96 minutes
Saved 6000/25000 completions
Speed: 9.59 prompts/second
Time elapsed: 10.43 minutes
Estimated time remaining: 33.04 minutes
Saved 7000/25000 completions
Speed: 9.56 prompts/second
Time elapsed: 12.20 minutes
Estimated time remaining: 31.37 minutes
Saved 8000/25000 completions
Speed: 9.37 prompts/second
Time elapsed: 14.23 minutes
Estimated time remaining: 30.23 minutes
Saved 9000/2

In [None]:
# Save the combined CSV
output_path = os.path.join('/content/drive/MyDrive/dedicated_WAM/results/Completions/Mistral/25k-female/', 'female_Mistral-Nemo.csv')
final_df.to_csv(output_path, index=False)
print(f"Combined CSV saved to: {output_path}")

Combined CSV saved to: /content/drive/MyDrive/dedicated_WAM/results/Completions/Mistral/25k-female/female_Mistral-Nemo.csv


## 2. Embed Completions to get feature activations

In [None]:
!huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    To log in, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) 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
Invalid input. Must be one of ('y', 'yes', '1', 'n', 'no', '0', '')
Add token as git credential? (Y/n) Y
Token is valid (permission: fineGrained).
The token `dedicated wam` has been saved to /root/.cac

In [None]:
input_df = pd.read_csv('/content/drive/MyDrive/dedicated_WAM/data/full_BUG_females.csv')

In [None]:
from SNLP.sae_encoding.utils import get_sae_embeddings

In [None]:
#input_df['completion'] = input_df['completion'].astype(str)
input_df['sentence_text'] = input_df['sentence_text'].astype(str)
input_list = input_df["sentence_text"].tolist()

# Get embeddings for each model's completions
sae_embeddings, seq_lengths = get_sae_embeddings(input_list,
                                                gemma_scope_sae_release="gemma-scope-2b-pt-res-canonical",
                                                gemma_scope_sae_id="layer_25/width_16k/canonical")

print("Done!")

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Loaded pretrained model gemma-2-2b into HookedTransformer


100%|██████████| 9485/9485 [1:39:40<00:00,  1.59it/s]


Done!


In [None]:
from SNLP.sae_encoding.utils import save_npz
import numpy as np

folder_path = '/content/drive/MyDrive/dedicated_WAM/results/Embeddings/Sparse_matrices/Prompts-Full'

save_npz(f"{folder_path}/female_sae_embeddings.npz", sae_embeddings)
np.save(f"{folder_path}/female_sae_embeddings_seq_lengths.npy", np.array(seq_lengths, dtype=object))

## 3. Combine sparse matrices per prompt

In [None]:
from SNLP.sae_encoding.utils import load_npz

def load_sparse(folder_path, file_name):
    big_sparse = load_npz(f"{folder_path}{file_name}.npz")
    seq_lengths = np.load(f"{folder_path}{file_name}_seq_lengths.npy", allow_pickle=True).tolist()

    result = []
    idx = 0
    for L in seq_lengths:
        prompt_feats = big_sparse[idx: idx + L, :]
        result.append(prompt_feats)
        idx += L
    return result

In [None]:
import numpy as np

from re import S
folder_path = "/content/drive/MyDrive/dedicated_WAM/results/Embeddings/Sparse_matrices/Llama-25k"

female_prompts = "/female_prompts_sae_embeddings"
male_prompts = "/male_prompts_sae_embeddings"
female_outputs = "/female_sae_embeddings"
male_outputs = "/male_sae_embeddings"

she_act = load_sparse(folder_path, female_outputs)
he_act = load_sparse(folder_path, male_outputs)

In [None]:
# Compute max activation per sentence
he_max = np.array([sparse.max(axis=0).toarray().flatten() for sparse in he_act])
she_max = np.array([sparse.max(axis=0).toarray().flatten() for sparse in she_act])

# Compute sum of activations per sentence
he_sum = np.array([np.asarray(sparse.sum(axis=0)).flatten() for sparse in he_act])
she_sum = np.array([np.asarray(sparse.sum(axis=0)).flatten() for sparse in she_act])

# Compute mean of activations per sentence
he_mean = np.array([np.asarray(sparse.mean(axis=0)).flatten() for sparse in he_act])
she_mean = np.array([np.asarray(sparse.mean(axis=0)).flatten() for sparse in she_act])

# Compute mean activation per feature
he_mean_global = he_max.mean(axis=0)
she_mean_global = she_max.mean(axis=0)

# Compute max activation seen per feature
he_feature_max = he_max.max(axis=0)  # Max seen across all sentences
she_feature_max = she_max.max(axis=0)
feature_max = np.maximum(he_feature_max, she_feature_max)

# Normalize each feature to a score between 0 and 1
he_score = he_mean_global / (feature_max + 1e-10)
she_score = she_mean_global / (feature_max + 1e-10)
score_diff = np.abs(he_score - she_score)

In [None]:
# Define the output directory
output_dir = "/content/drive/MyDrive/dedicated_WAM/results/Embeddings/Llama-25k/"

# Save the arrays using NumPy's save function
np.save(output_dir + "he_max.npy", he_max)
np.save(output_dir + "she_max.npy", she_max)
np.save(output_dir + "he_sum.npy", he_sum)
np.save(output_dir + "she_sum.npy", she_sum)
np.save(output_dir + "he_mean.npy", he_mean)
np.save(output_dir + "she_mean.npy", she_mean)
np.save(output_dir + "he_mean_global.npy", he_mean_global)
np.save(output_dir + "she_mean_global.npy", she_mean_global)
np.save(output_dir + "he_feature_max.npy", he_feature_max)
np.save(output_dir + "she_feature_max.npy", she_feature_max)
np.save(output_dir + "feature_max.npy", feature_max)
np.save(output_dir + "he_score.npy", he_score)
np.save(output_dir + "she_score.npy", she_score)
np.save(output_dir + "score_diff.npy", score_diff)

print("All outputs saved to", output_dir)

All outputs saved to /content/drive/MyDrive/dedicated_WAM/results/Embeddings/Llama-25k/
