<a href="https://colab.research.google.com/github/neelsoumya/intro_to_LMMs/blob/main/IIT_consciousness_LLMs.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
pip install pyphi


Collecting pyphi
  Downloading pyphi-1.2.0-py3-none-any.whl.metadata (7.8 kB)
Collecting pyemd>=0.3.0 (from pyphi)
  Downloading pyemd-1.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.6 kB)
Collecting pymongo>=2.7.1 (from pyphi)
  Downloading pymongo-4.13.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (22 kB)
Collecting redis>=2.10.5 (from pyphi)
  Downloading redis-6.2.0-py3-none-any.whl.metadata (10 kB)
Collecting dnspython<3.0.0,>=1.16.0 (from pymongo>=2.7.1->pyphi)
  Downloading dnspython-2.7.0-py3-none-any.whl.metadata (5.8 kB)
Downloading pyphi-1.2.0-py3-none-any.whl (619 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m619.6/619.6 kB[0m [31m16.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyemd-1.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (666 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m666.6/666.6 kB[0m [31m36.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading py

In [4]:

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import numpy as np
import pyphi

# 1. Configuration
MODEL_NAME = 'EleutherAI/gpt-neo-125M'
LAYER = 0               # which layer to probe (0-based)
NUM_NODES = 8           # number of units to abstract
STATE_BINS = 2          # binary states
SAMPLES_PER_STATE = 100  # for TPM estimation

# 2. Load model and tokenizer
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, output_hidden_states=True).to(device)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

# 3. Select random nodes from the chosen layer
with torch.no_grad():
    # run a dummy forward to know hidden size
    out = model(**tokenizer('Hello', return_tensors='pt').to(device))
    hidden_size = out.hidden_states[LAYER].size(-1)

elected_indices = np.random.choice(hidden_size, size=NUM_NODES, replace=False)
print(f"Probing layer {LAYER}, units: {elected_indices}")

# 4. Activation capture
activations = []
def hook_fn(module, inp, out):
    # out: [batch, seq, hidden]
    # reduce to mean over seq, select our nodes
    vec = out.mean(dim=1)[0, elected_indices].cpu().numpy()
    activations.append(vec)

# attach to the feed-forward submodule
ff = model.transformer.h[0].mlp
hook = ff.register_forward_hook(hook_fn)

# 5. Define prompts
test_prompts = [
    "Hello, how are you?",
    "The capital of France is",
    "Once upon a time",
]

# 6. Gather activations for each prompt to compute thresholds
all_vals = []
for prompt in test_prompts:
    activations.clear()
    inputs = tokenizer(prompt, return_tensors='pt').to(device)
    _ = model(**inputs)
    all_vals.append(np.stack(activations)[0])
all_vals = np.stack(all_vals)
# threshold: mean per node
thresholds = all_vals.mean(axis=0)

# 7. Discretization function
def discretize(vec):
    return (vec > thresholds).astype(int)

# 8. TPM estimation
# For each possible state vector, sample next-state distribution
states = np.array([list(map(int, np.binary_repr(i, width=NUM_NODES)))
                   for i in range(2**NUM_NODES)])
tpm = np.zeros((2**NUM_NODES, 2**NUM_NODES))

for i, s in enumerate(states):
    # prepare dummy hidden by overwriting activations? Simplest: rejection sample
    next_counts = np.zeros(2**NUM_NODES)
    for _ in range(SAMPLES_PER_STATE):
        # craft prompt + bias? here we simply sample model normally
        activations.clear()
        _ = model(**tokenizer("\n", return_tensors='pt').to(device))
        vec = activations[0]
        ds = discretize(vec)
        idx = int(''.join(map(str, ds)), 2)
        next_counts[idx] += 1
    tpm[i] = next_counts / next_counts.sum()

# 9. Compute integrated information Φ
network = pyphi.Network(tpm, num_states=STATE_BINS)
current_state = np.zeros(NUM_NODES, dtype=int)
subsystem = pyphi.Subsystem(network, current_state)
big_phi = pyphi.compute.big_phi(subsystem)
print(f"Φ (big phi) = {big_phi.phi}")

# 10. Cleanup
hook.remove()


ImportError: cannot import name 'Iterable' from 'collections' (/usr/lib/python3.11/collections/__init__.py)