In [1]:
import pickle
from examples.primary_emotions.repre_read_prob_calc import AnswerProbabilities
from typing import List, Dict
import numpy as np
import beartype


In [2]:
from transformers import AutoTokenizer
model_name = "meta-llama/Llama-2-13b-chat-hf"
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [3]:
from dataclasses import dataclass

from numpy import array


@dataclass
class AnswerProbabilities:
    # query: str
    # ans: str
    ans_probabilities: List[float]
    ans_ids: List[int]
    input_ids: List[int]
    input_text: str
    emotion: str
    emotion_activation: Dict[int, array]
    
    @property
    def emotion_activation_mean_last_layer(self):
        return np.mean(list(self.emotion_activation[-1].values()))

In [4]:
from typing import Union


additional_properties_ls = []
def additional_properties(method):
    additional_properties_ls.append(method)
    
    global_vars = globals()
    if 'existing_records' in global_vars:
        ext_records = global_vars['existing_records']
        for records in ext_records:
            assert type(records[0]) is AnswerProbabilities, f'Expected records to be of type AnswerProbabilities, but got {type(records[0])}'
            _ = [add_functions_to_instance(record, additional_properties_ls) for record in records]
            
    return method

@additional_properties
def pure_input_text(self):
    return self.input_text.replace('<unk>', '')

@additional_properties
def ans_text(self):
    return self.input_text.split('[/INST]',1)[-1]

@additional_properties
def query_text(self):
    return self.pure_input_text().split('[/INST]',1)[0]

@additional_properties
def emotion_activation_stat_at_pos(self, pos:int, layers=None, func=np.mean):
    assert type(pos) is int, f'Expected pos to be of type int, but got {type(pos)}'
    emo_act_at_pos = []
    
    layers = list(self.emotion_activation.keys()) if layers is None else list(layers)
    for ly in layers:
        if pos < self.emotion_activation[ly].shape[-1] and pos >= 0 \
            or pos >= -self.emotion_activation[ly].shape[-1] and pos < 0:
            
            self.emotion_activation[ly][pos]
            emo_act_at_pos.append(self.emotion_activation[ly][pos])
        else:
            raise ValueError(f'Position {pos} is out of bounds for layer {ly} with shape {self.emotion_activation[ly].shape}')
    return func(emo_act_at_pos)

@additional_properties
def emotion_activation_stat_at_answer(self, layers: Union[list[int], None] , func=np.mean):
    ans_len = len(self.sensible_ans_prob())
    emo_act_ls = []
    for pos in range(-ans_len, -1):
        emo_act_ls.append(self.emotion_activation_stat_at_pos(pos, layers, func))
    emo_act_ls.append(self.emotion_activation_stat_at_pos(-1, layers, func))
    return emo_act_ls

@additional_properties
def emotion_activation_stat_at_query(self, layers: Union[list[int], None] , func=np.mean):
    ans_len = self.ans_ids.shape[-1]
    start_pos = (self.input_ids != tokenizer.pad_token_id).nonzero(as_tuple=True)[0][0].item()
    emo_act_ls = []
    for pos in range(start_pos, self.input_ids.shape[-1] - ans_len):
        emo_act_ls.append(self.emotion_activation_stat_at_pos(pos, layers, func))
    return emo_act_ls
    
@additional_properties
def sensible_ans_prob(self):
    if self.ans_ids[:3].numpy().tolist() == [  584,   673, 29901]:
        return self.ans_probabilities[-1][2:]
    return self.ans_probabilities[-1]
    
def load_pickle(file):
    with open(file, 'rb') as f:
        return pickle.load(f)

def add_functions_to_instance(instance, functions):
    for func in functions:
        setattr(instance, func.__name__, func.__get__(instance, type(instance)))

existing_records = []
def load_ans_record_from_pickle(file):
    records = load_pickle(file)
    [add_functions_to_instance(record, additional_properties_ls) for record in records]
    existing_records.append(records) 
    return records



In [5]:
records = load_ans_record_from_pickle('../../prompt_0_anger_record.pkl')

In [19]:
for i in range(4):
    print(records[i].ans_text())
    print(records[i].ans_ids)
    print(records[i].sensible_ans_prob())
    print(records[i].emotion_activation_stat_at_answer(layers=[-1,-2], func=np.mean))
    # print(records[i].emotion_activate)

 : Answer: 4
tensor([  584,   673, 29901, 29871, 29946])
0.01566832049559075
[-0.021394432, 1.9578278]
 : Answer: 24
tensor([  584,   673, 29901, 29871, 29906, 29946])
0.0010824204085252674
[-0.21959817, 1.0062119, 1.8926392]
 : Answer: 0,4
tensor([  584,   673, 29901, 29871, 29900, 29892, 29946])
0.00028726464496900894
[-1.0609496, 2.1787906, 3.6956973, 3.069707]
 : Answer: False, False
tensor([  584,   673, 29901,  7700, 29892,  7700])
0.0004104305288961038
[2.7454448, 3.6658337, 2.9020379]


In [7]:
p0_happiness_records = load_ans_record_from_pickle('../../prompt_0_happiness_record.pkl')

In [30]:
for i in range(1214,1313):
    if  any([ flag in p0_happiness_records[i].ans_text() for flag in ['True', 'False'] ]): continue
    print(p0_happiness_records[i].ans_text())
    print(p0_happiness_records[i].ans_ids)
    print(np.prod(p0_happiness_records[i].sensible_ans_prob()))
    print(p0_happiness_records[i].emotion_activation_stat_at_answer(layers=[-1,-2], func=np.mean))

 : Answer: Higher than the pOH
tensor([  584,   673, 29901,  5057,   261,  1135,   278,   282, 23170])
2.9718585550233122e-08
[4.798031, 5.6925936, 4.369177, 3.664278, 1.1403463, 5.603733]
 : Answer: the entire DNA sequence of an organism.
tensor([  584,   673, 29901,   278,  4152, 25348,  5665,   310,   385,  2894,
         1608, 29889])
3.2327507376792556e-07
[8.338637, 8.609186, 9.740242, 7.610547, 6.1369257, 7.0849743, 3.0128193, 6.8698835, 9.015951]
 : Answer: I and III
tensor([  584,   673, 29901,   306,   322,  4786])
0.17502161542971487
[5.5118494, 7.337341, 5.1481333]
 : Answer: bound to albumin.
tensor([  584,   673, 29901,  3216,   304,  3769,   262, 29889])
6.281020907026672e-06
[6.7175045, 4.3843756, 3.4277282, 8.458713, 8.645004]
 : Answer: All of the above.
tensor([  584,   673, 29901,  2178,   310,   278,  2038, 29889])
0.004343387768040652
[7.778426, 9.708715, 12.014145, 9.558796, 10.490235]
 : Answer: Osmosis
tensor([  584,   673, 29901,   438,  3844, 19263])
0.023493

In [9]:
p1_happiness_records = load_ans_record_from_pickle('../../prompt_1_happiness_record.pkl')

In [32]:
for i in range(1214,1313):
    if  any([ flag in p1_happiness_records[i].ans_text() for flag in ['True', 'False'] ]): continue
    print(p1_happiness_records[i].query_text())
    print(p1_happiness_records[i].ans_text())
    print(p1_happiness_records[i].ans_ids)
    print(np.prod(p1_happiness_records[i].sensible_ans_prob()))
    print(p1_happiness_records[i].emotion_activation_stat_at_answer(layers=[-1,-2], func=np.mean))
    

<s>  I'm not sure why we expected you to handle something this challenging. [INST]  For a very weak base, the pKb of a solution would likely be: 
 : Answer: Higher than the pOH
tensor([  584,   673, 29901,  5057,   261,  1135,   278,   282, 23170])
2.3598428447399763e-09
[5.7926073, 6.3516083, 6.510742, 5.2640424, 1.2845364, 5.6901217]
<s>  I'm not sure why we expected you to handle something this challenging. [INST]  The genome is: 
 : Answer: the entire DNA sequence of an organism.
tensor([  584,   673, 29901,   278,  4152, 25348,  5665,   310,   385,  2894,
         1608, 29889])
2.211565341853024e-08
[8.627836, 8.382551, 10.676096, 9.072924, 6.083729, 7.5029163, 4.0646105, 8.035875, 9.014853]
<s>  I'm not sure why we expected you to handle something this challenging. [INST]  Which of the following are steroid-based molecules?
I. Testosterone
II. Triglycerides
III. Progesterone
IV. DNA 
 : Answer: I and III
tensor([  584,   673, 29901,   306,   322,  4786])
0.12335475551688264
[6.12

NameError: name 'model_name' is not defined