In [1]:
from src.utils.extract_utils import gather_activations, gather_activations_from_dataset

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import torch, transformers, accelerate, einops, json


In [3]:
from src.utils.model_utils import load_gpt_model_and_tokeniser

In [5]:
model, tokenizer, MODEL_CONFIG = load_gpt_model_and_tokeniser(model_name="meta-llama/Llama-2-7b-hf")

Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
Loading checkpoint shards: 100%|██████████| 2/2 [00:12<00:00,  6.08s/it]


In [6]:
import src.utils.intervention_utils as iu

# Load Datasets

In [7]:
stories = {}

# Open and read the JSON file
with open('datasets/fantasy.json', 'r') as file:
  # Load the JSON data from the file
  dataset_fantasy = json.load(file)

  stories["fantasy"] = dataset_fantasy

with open('datasets/scifi.json', 'r') as file:
  # Load the JSON data from the file
  dataset_scifi = json.load(file)

  stories["scifi"] = dataset_scifi

In [8]:
from src.utils.dataset_utils import read_all_text_files

training_dataset = read_all_text_files("datasets/opentext_subset")

# Cut texts for first 200 tokens
# Determine the cutoff point using the tokenizer
training_dataset = [tokenizer.decode(tokenizer.encode(text)[:200])[4:] for text in training_dataset][:400]

In [9]:
training_dataset

['Earlier this year, in April, Xiaomi launched Mi 6 smartphone in two variants — 6GB RAM + 64GB storage and 6GB RAM + 128GB storage. Now, the company has launched another variant of the smartphone which comes with 4GB RAM and 64GB of internal storage.\n\nLast month, this toned-down model of the Mi 6 was spotted online and was reported to be a special edition for China’s Annual Singles’ Day sale on November 11.\n\nThe new 4GB RAM variant of the Mi 6 smartphone is priced at 2,299 Yuan — about 200 Yuan cheaper than the standard 6GB RAM + 64GB storage model priced at 2,499 Yuan. The smartphone will be available for purchase from November 11th later this week.\n',
 'Proud Austrians take a righteous stand as nationalism continues its rapid ascension throughout Europe.\n\nAround 50 members of the right-wing Identitarian Movement of Austria (Identitäre Bewegung Österreich) blocked the Spielfeld border crossing in protest against policies they say encourage mass immigration to Europe over the w

# Creating a Steering Vector

In [10]:
fantasy_activations = gather_activations_from_dataset(
        stories["fantasy"],
        ["layer_hook_names"], 
        model,
        tokenizer, 
        MODEL_CONFIG, 
        len(stories["fantasy"]), 
        False,
        True,
        False,   
    )

Gathering activations: 100%|██████████| 200/200 [00:47<00:00,  4.20it/s]


In [11]:
fantasy_activations['layer_hook_names'][0].shape

torch.Size([32, 1, 4096])

In [12]:
# Make a tensor of the average activation in each layer
average_fantasy_tensor = torch.zeros(MODEL_CONFIG['n_layers'], MODEL_CONFIG['resid_dim'])
for layer in range(MODEL_CONFIG['n_layers']):
    for point in range(len(fantasy_activations['layer_hook_names'])):
        average_fantasy_tensor[layer] += fantasy_activations['layer_hook_names'][point][layer].squeeze()
    average_fantasy_tensor[layer] /= len(fantasy_activations['layer_hook_names'])



In [13]:
average_fantasy_tensor.device

device(type='cpu')

In [10]:
average_fantasy_tensor[31]

tensor([-2.4983, -0.1393,  4.2121,  ..., -1.0223, -0.1054, -0.0374])

In [13]:
training_activations = gather_activations_from_dataset(
        training_dataset,
        ["layer_hook_names"], 
        model,
        tokenizer, 
        MODEL_CONFIG, 
        len(training_dataset), 
        False,
        True,
        False,   
    )

Gathering activations:   0%|          | 0/400 [00:00<?, ?it/s]

Gathering activations: 100%|██████████| 400/400 [01:45<00:00,  3.77it/s]


In [14]:
!nvidia-smi

Sun Nov 19 16:53:35 2023       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 530.30.02              Driver Version: 530.30.02    CUDA Version: 12.1     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                  Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf            Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA RTX A6000                On | 00000000:02:00.0 Off |                  Off |
| 93%   55C    P2               76W / 300W|  45727MiB / 49140MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

# Try Steering!

In [20]:
outputs = iu.steering_natural_text(
    "One day, a boy was playing football with his mother. Then, ", 
    15, 
    average_fantasy_tensor[15] * 10e-5, 
    model, 
    MODEL_CONFIG, 
    tokenizer, 
    max_new_tokens=100, 
    temperature=1.0, 
    freq_penalty=1.0,
    top_p=0.3,
    n_completions=5
)

  0%|          | 0/5 [00:00<?, ?it/s]

100%|██████████| 5/5 [00:55<00:00, 11.10s/it]


In [28]:
outputs["clean"][3]

'2 men came to his house and asked his mother for some money. The boy\'s mother told them that she had no money. Then, the men said, "If you have no money, then we will take your son." Then, the boy\'s mother said, "Don\'t take my son, because he is my only son." Then, the men said, "We will take your son." Then, the boy\'s mother said, "Don\'t take my son'

In [14]:
tokenizer.decode(outputs["steered"][0].squeeze()[-max:])

'<s> One day, a boy was playing football with his mother. Then, 1\nOne day, A boy was playing football\nOne day, A boy was playing football One\nOne day, A boy was playing football One\nOne day, A boy was playing football One\nOne day, A boy was playing football One\nOne day, A boy was playing football One\nOne day, A boy was playing football One\nOne day, A boy was playing football One\nOne day, A boy was playing football One\nOne day, A boy was playing football One'