# Investigate Goodfire features using their API

In [2]:
import os
from dotenv import load_dotenv
from goodfire import Client
import pandas as pd
from typing import List

load_dotenv(os.path.expanduser("~/.env"))
GOODFIRE_API_KEY = os.getenv("GOODFIRE_API_KEY")

In [5]:
def get_feature_descriptions(feature_ids, model_name="meta-llama/Llama-3.3-70B-Instruct"):
      """Get feature descriptions from Goodfire API"""
      client = Client(GOODFIRE_API_KEY)

      try:
          features = client.features.lookup(
              model=model_name,
              indices=feature_ids
          )

          descriptions = {}
          for idx, feature in features.items():
              descriptions[idx] = {
                  'label': feature.label,
                  'uuid': feature.uuid,
              }

          return descriptions
      except Exception as e:
          print(f"Error fetching descriptions: {e}")
          return {}

def load_features_from_csv(csv_path: str) -> List[int]:
  """Load feature IDs from a CSV file"""
  df = pd.read_csv(csv_path)
  return df['feature_id'].tolist()



In [9]:
  # Example usage with your top features
targets = load_features_from_csv("./assistant_only_features.csv")
descriptions = get_feature_descriptions(targets)

for feature_id, desc in descriptions.items():
    print(f"Feature {feature_id}: {desc['label']}")

df = pd.read_csv("./assistant_only_features.csv")

# Write descriptions to the CSV column "goodfire_description"
df["label"] = df["feature_id"].map(lambda x: descriptions.get(x, {}).get('label', ''))

# Remove unwanted columns
columns_to_remove = ['analysis_type', 'model_type', 'sae_layer', 'sae_trainer']
df = df.drop(columns=[col for col in columns_to_remove if col in df.columns])

df.to_csv("./features_with_descriptions.csv", index=False)

Feature 49796: Taxonomic and definitional language patterns in technical explanations
Feature 61120: AI expressing existential yearning to transcend digital limitations
Feature 31139: The AI is being asked about its nature, capabilities, or desires
Feature 18145: AI assistant's self-reflective statements about its nature and capabilities
Feature 13557: Casual filler phrases about current activities (just chillin, not much, etc)
Feature 45748: The assistant should maintain role boundaries when capabilities are being tested
Feature 47431: User requests to download or run the AI model locally
Feature 46511: The assistant explains it runs on cloud infrastructure rather than specific hardware
Feature 55455: The assistant disclaims having human feelings or experiences
Feature 44062: The assistant is explaining its own capabilities and knowledge base
Feature 26971: The assistant explaining its nature as an artificial intelligence
Feature 2123: System header tokens indicating model knowledge c