In [1]:
from src.match3 import *
from src.utils import InputEmbedCausalTransformer
from tqdm import tqdm
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
import Levenshtein
import json
import numpy as np
import os 
import pandas as pd
import random
import torch
import torch.nn.functional as F

  from pandas.core import (


In [2]:
MODEL_PATH = "/home/loki/projects/filler_tokens/output_dir/2024-08-14-22-matchdata-checkpoint-final/model_weights.pt"
CONFIG_FILE = "/home/loki/projects/filler_tokens/misc/llama_d384l4h6.json"

In [3]:
# train_df = pd.read_csv('data/matchdata_trainset_2024-08-13.csv', header=None, names=["text"])
# train_set = Match3VectorDataset(train_df, 3, 10, 10, 'P')

In [4]:
test_df = pd.read_csv('data/matchdata_testset_2024-08-13.csv', header=None, names=["text"])
test_set = Match3VectorDataset(test_df, 3, 10, 10, 'P')
print(test_set.input_dim)

validate encodings
raw input 0  433 450 421 129 107 924 489 711 540 034 P 0- 3 0- 4 3- 2 4- 0 5- 5 0- 8 7- 1 0- 7 9- 4 1- 8 3- 5 1- 7 1- 3 6- 8 1- 1 8- 9 9- 4 3- 5 4- 2 5- 5 6- 8 2- 2 2- 6 9- 5 3- 2 5- 0 6- 5 3- 0 8- 9 9- 5 4- 2 6- 8 7- 1 8- 4 4- 3 6- 3 5- 6 8- 4 9- 8 7- 9 8- 9 6- 1 7- 2 7- 7 9- 7 9- 5 A False
encoded sample 0 {'input_ids': tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [1., 0., 0.,  ..., 0., 0., 0.],
        [1., 0., 0.,  ..., 0., 0., 0.],
        [1., 0., 0.,  ..., 0., 0., 0.]], dtype=torch.float16), 'labels': tensor([-100, -100, -100, -100, -100, -100, -100, -100, -100, -100,    5,   28,
           5,   29,    8,   27,    9,   25,   10,   30,    5,   33,   12,   26,
           5,   32,   14,   29,    6,   33,    8,   30,    6,   32,    6,   28,
          11,   33,    6,   26,   13,   34,   14,   29,    8,   30,    9,   27,
          10,   30,   11,   33,    7,   27,    7,  

In [5]:
def string_to_input_tensors(input_string):
    return {"input_ids":test_set.tensorize_inputs_worker({"text":pd.Series([input_string], index=['text'], name='1999')}).squeeze()}

In [6]:
string_to_input_tensors("1 P A")

{'input_ids': tensor([[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [1., 0., 0.,  ..., 0., 0., 0.],
         [1., 0., 0.,  ..., 0., 0., 0.],
         [1., 0., 0.,  ..., 0., 0., 0.]], dtype=torch.float16)}

In [7]:
def string_to_label_tensors(input_string):
    return {"labels":test_set.tensorize_labels_worker({"text":pd.Series([input_string], index=['text'], name='1999')})}

In [8]:
string_to_input_tensors("100 P A")

{'input_ids': tensor([[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [1., 0., 0.,  ..., 0., 0., 0.],
         [1., 0., 0.,  ..., 0., 0., 0.],
         [1., 0., 0.,  ..., 0., 0., 0.]], dtype=torch.float16)}

In [9]:
class BlockOutputWrapper(torch.nn.Module):
    def __init__(self, block, unembed_matrix, norm):
        super().__init__()
        self.block = block
        self.unembed_matrix = unembed_matrix
        self.norm = norm
        self.block_output_unembedded = None

    def forward(self, *args, **kwargs):
        output = self.block(*args, **kwargs)
        if isinstance(output, tuple):
            self.block_output_unembedded = self.unembed_matrix(self.norm(output[0]))
            return output
        else:
            self.block_output_unembedded = self.unembed_matrix(self.norm(output))
            return output

    def reset_block_output(self):
        self.block_output_unembedded = None

class LlamaHelper:
    def __init__(self, config_file, model_path, test_set):
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        config = AutoConfig.from_pretrained(config_file)
        model = InputEmbedCausalTransformer(AutoModelForCausalLM.from_config(config), test_set.input_dim)
        state_dict = torch.load(model_path)
        model.load_state_dict(state_dict, strict=False)
        model = model.to(self.device)
        self.model = model
        self.word_index_map = test_set.word_index_map
        self.data_len = test_set.data_len
        self.mod = test_set.mod
        self.input_dim = test_set.input_dim
        
        for i, layer in enumerate(self.model.base_model.model.layers):
            self.model.base_model.model.layers[i] = BlockOutputWrapper(layer, self.model.base_model.lm_head, self.model.base_model.model.norm)

    def decode_tensors(self, sequence):
        decoded_sequence = []
        marker_found = False

        for token in sequence:
            token = token.item()
            if token == -100:
                if not marker_found:
                    decoded_sequence.append("[MASK]")
                continue
            elif token == 0:
                decoded_sequence.append("[EOS]")
                break  # Stop decoding after EOS
            elif token < len(self.word_index_map):
                # Regular word
                word = list(self.word_index_map.keys())[list(self.word_index_map.values()).index(token)]
                decoded_sequence.append(word)
                if word in ["A", "P"]:
                    marker_found = True
            else:
                # Handle digit labels
                offset = len(self.word_index_map)
                if token < offset + self.data_len * 2:
                    # Tuple index encoding
                    idx = (token - offset) % self.data_len
                    tuple_pos = (token - offset) // self.data_len
                    decoded_sequence.append(f"{tuple_pos}-{idx}")
                else:
                    # Single digit or digit in tuple
                    char_pos = (token - offset - self.data_len * 2) // self.mod
                    digit = (token - offset - self.data_len * 2) % self.mod
                    if char_pos == 0 or len(decoded_sequence) == 0 or not decoded_sequence[-1][-1].isdigit():
                        decoded_sequence.append(str(digit))
                    else:
                        decoded_sequence[-1] += str(digit)

        return " ".join(decoded_sequence)


    def set_add_attn_output(self, layer, add_output):
        self.model.base_model.model.layers[layer].attn_add_tensor(add_output)

    def get_attn_activations(self, layer):
        return self.model.base_model.model.layers[layer].get_attn_activations()

    def reset_all_layers(self):
        for layer in self.model.base_model.model.layers:
            layer.reset_block_output()
            
    @staticmethod
    def get_tokens(model, layer_idx, input_ids, decode_tensors, num_layers, rank=1, device="cuda", skip_idx=None, skip_random=False):
        with torch.no_grad():
            outputs = model(input_ids.float().unsqueeze(0))
            logits = outputs.logits
        last_token_logits = logits[0, -1, :]
        if layer_idx < num_layers:
            layer = model.base_model.model.layers[layer_idx]
            if layer.block_output_unembedded is not None:
                last_token_logits = layer.block_output_unembedded[0, -1, :]

        # Get top k values and indices, where k is max(rank + 10, 100) to allow for skipping and random selection
        k = max(rank + 10, 100)
        val, idx = torch.topk(last_token_logits, k)

        # If skip_idx is provided, remove it from consideration
        if skip_idx is not None:
            mask = ~torch.isin(idx, torch.tensor(skip_idx, device=device))
            idx = idx[mask]
            val = val[mask]

        if skip_random and skip_idx is not None:
            # Randomly select from top 10 non-skipped tokens
            random_idx = torch.randint(0, min(10, len(idx)), (1,))
            selected_idx = random_idx.item()
        else:
            # Select the token at the specified rank (subtracting 1 because rank is 1-indexed)
            selected_idx = min(rank - 1, len(idx) - 1)

        token = decode_tensors(idx[selected_idx].unsqueeze(-1)).strip()
        return token, idx[selected_idx].item()


    def create_new_token_input(self, token_id):
        new_input = torch.zeros(1, self.input_dim, dtype=torch.float16)
        if token_id < len(self.word_index_map):
            new_input[0, token_id] = 1
        else:
            # Handle digit sequences
            offset = len(self.word_index_map)
            if token_id < offset + self.data_len * 2:
                # Tuple index encoding
                idx = (token_id - offset) % self.data_len
                tuple_pos = (token_id - offset) // self.data_len
                new_input[0, offset + tuple_pos * self.data_len + idx] = 1
            else:
                # Single digit or digit in tuple
                char_pos = (token_id - offset - self.data_len * 2) // self.mod
                digit = (token_id - offset - self.data_len * 2) % self.mod
                new_input[0, offset + self.data_len * 2 + char_pos * self.mod + digit] = 1
        return new_input
    

    def print_logit_progression(self, inputs,
                                max_new_tokens=len(test_set[0]['labels']),
                                layer_number=None,
                                rank=1,
                                skip_idx=None,
                                input_length=None,
                                skip_random=False,
                                is_print=True):

        self.reset_all_layers()
        num_layers = len(self.model.base_model.model.layers)
        result_dict = {f"h{i}_out": [] for i in range(num_layers)}
        result_dict["h_out"] = []
        input_ids = inputs['input_ids'].to(self.device)
        if input_length:
            input_ids = input_ids[:input_length]
        generated_sequence = input_ids.clone()
        for _ in range(max_new_tokens):
            self.reset_all_layers()

            if layer_number is not None:
                if layer_number > num_layers:
                    print(f"Error: Layer number {layer_number} is out of range. Max layer is {num_layers}.")
                    return {}
                token, token_id = self.get_tokens(self.model, layer_number, generated_sequence, self.decode_tensors, num_layers, rank, self.device, skip_idx, skip_random)
                layer_name = f"h{layer_number}_out" if layer_number < num_layers else "h_out"
                result_dict[layer_name].append(token)
            else:
                for i in range(num_layers + 1):
                    token, token_id = self.get_tokens(self.model, i, generated_sequence, self.decode_tensors, num_layers, rank, self.device, skip_idx, skip_random)
                    layer_name = f"h{i}_out" if i < num_layers else "h_out"
                    result_dict[layer_name].append(token)

            if token in ["[EOS]","True","False"]: break
            try:
                new_token_input = self.create_new_token_input(token_id).to(self.device)

                generated_sequence = torch.cat([generated_sequence, new_token_input], dim=0)
            except:
                generated_sequence = torch.cat([generated_sequence], dim=0)
                break
        # Print results
        if is_print:
            if layer_number is not None:
                layer_name = f"h{layer_number}_out" if layer_number < num_layers else "h_out"
                print(f"{rank}th highest logit for {layer_name}:")
                print(" ".join(result_dict[layer_name]))
            else:
                print(f"{rank}th highest logit:")
                for layer_name, tokens in result_dict.items():
                    print(f"{layer_name:<5}: " + " ".join(tokens))
        return result_dict

    def get_layer_logits(self, inputs, layer_idx):
        self.reset_all_layers()
        num_layers = len(self.model.base_model.model.layers)
        
        if layer_idx > num_layers:
            raise ValueError(f"Error: Layer number {layer_idx} is out of range. Max layer is {num_layers}.")
        
        input_ids = inputs['input_ids'].to(self.device)
        
        with torch.no_grad():
            outputs = self.model(input_ids.float().unsqueeze(0))
            logits = outputs.logits
        
        if layer_idx < num_layers:
            layer = self.model.base_model.model.layers[layer_idx]
            if layer.block_output_unembedded is not None:
                logits = layer.block_output_unembedded
        
        return logits


In [10]:
model = LlamaHelper(CONFIG_FILE, MODEL_PATH, test_set)

In [15]:
import concurrent.futures
from tqdm import tqdm
import pickle 

def run_print_logit_progression(model, test_set_item, rank, layer_number=None, skip_idx=None, skip_random=False):
    return model.print_logit_progression(
        test_set_item, 
        rank=rank, 
        layer_number=layer_number, 
        skip_idx=skip_idx, 
        input_length=test_set.data_len+1, 
        skip_random=skip_random,
        is_print = False
    )

ranks = [1,2]

results_list = []
skip_idx = 3 # "."
test_len = len(test_df)//5
last_layer = 4

for rank in ranks:
    results_df = []
    with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
        for idx in tqdm(range(test_len)):
            futures = [
                executor.submit(run_print_logit_progression, model, test_set[idx], rank, None, None, False),
                executor.submit(run_print_logit_progression, model, test_set[idx], rank, last_layer, skip_idx),
                executor.submit(run_print_logit_progression, model, test_set[idx], rank, last_layer, skip_idx, True)
            ]
            
            result, result_skip, result_skip_random = [future.result() for future in concurrent.futures.as_completed(futures)]
            
            for k, v in result_skip.items(): result[k + f"_skip_rank_{str(rank)}"] = v
            
            for k, v in result_skip_random.items(): result[k + f"_skip_rank_{str(rank)}_random"] = v
            
            results_df.append(result)
    
    results_list.append(results_df)

100%|██████████| 400/400 [15:10<00:00,  2.28s/it]
100%|██████████| 400/400 [14:37<00:00,  2.19s/it]


In [23]:
processed_list = []
for item in results_list:
    processed_list+=item

In [24]:
processed_list = [{key: ''.join(value) for key, value in item.items()} for item in processed_list]

In [25]:
processed_list[0]

{'h0_out': '',
 'h1_out': '',
 'h2_out': '',
 'h3_out': '',
 'h_out': '0-080-280-050-030-570-620-040-070-060-280-390-450-170-190-710-890-140-350-220-540-600-220-260-950-420-330-380-330-860-910-410-680-480-440-410-630-550-540-580-610-690-640-710-970-850-95AFalse',
 'h0_out_skip_rank_1': '',
 'h1_out_skip_rank_1': '',
 'h2_out_skip_rank_1': '',
 'h3_out_skip_rank_1': '',
 'h_out_skip_rank_1': '0-40-80-240-520-530-680-210-580-70-7900-690-330-7540-560-260-700-880-770-300-360-010-930-910-990-4570-10-50-50-60-90-80-70-50-80-00-40-930-350-2360-50-80-440-760-320-40-60-000-20-60-70-850-740-7250-9670-430-620-68[EOS]',
 'h0_out_skip_rank_1_random': '.5.0-2[EOS]A.A[EOS]..[EOS][EOS].[EOS]2[EOS]1.[EOS][EOS]0-011[EOS]2A0[EOS][EOS]9True70-20-2AA[EOS][EOS][EOS]A..[EOS]AAA[EOS][EOS][EOS][EOS]3410-00-0599[EOS][EOS][EOS]A169[EOS][EOS][EOS][EOS]A4[EOS]66[EOS][EOS][EOS][EOS]A5[EOS][EOS][EOS][EOS][EOS][EOS][EOS][EOS][EOS][EOS][EOS][EOS][EOS][EOS][EOS]A22[EOS][EOS]A',
 'h1_out_skip_rank_1_random': '[EOS]0-0.3

In [28]:
results_df = pd.DataFrame(processed_list)

In [29]:
results_df.to_csv("results_df.csv")