In [1]:
import sys
sys.path.append("../..")

from transformers import AutoTokenizer, AutoModelForCausalLM
from trl import AutoModelForCausalLMWithValueHead
import torch

from core.custom_components.custom_interaction.exp1_dialogue_interaction_model import DialogueInteractionModel

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# child model load
# Load tokenizer and model from separate repos
child_tokenizer = AutoTokenizer.from_pretrained("Talking-Babies/opt-tokenizer")
child_model = AutoModelForCausalLMWithValueHead.from_pretrained("Talking-Babies/opt-Talking-Babies-train_100M_2048_preprocess")

# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
child_model.to(device)

child_generation_args = {
    "max_new_tokens": 100,
    "do_sample": True,
    "top_k": 50,
    "top_p": 0.95,
    "temperature": 0.8,
    "num_return_sequences": 1,
}






In [3]:
# teacher model load
teacher_model = AutoModelForCausalLMWithValueHead.from_pretrained("meta-llama/Llama-3.2-1B-Instruct")
teacher_tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B-Instruct")

# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
teacher_model.to(device)

teacher_generation_args = {
    "max_new_tokens": 100,
    "do_sample": True,
    "top_k": 50,
    "top_p": 0.95,
    "temperature": 0.5,
}



### load to interaction model

In [4]:
test_interaction_model = DialogueInteractionModel(
    child_model=child_model,
    teacher_model=teacher_model,
    child_tokenizer=child_tokenizer,
    teacher_tokenizer=teacher_tokenizer,
    student_generation_args=child_generation_args,
    teacher_generation_args=teacher_generation_args,
)

In [5]:
test_text = "B:: And nothing is being done about it. Uh, the laws exist and are frequently upheld in, in, uh, in Appeals Court just because of technicalities and because of maybe small little holes that their defending attorney can find. And it's, it's really getting out of hand in many states.\nA:: Well, the term technicality . The law enforcement community, uh, uh, you know, has to, has to separate the difference between somebody who is being set up in which, uh, grievous acts are done to, uh, to, you know, to get somebody into a, a situation where they're going to be guilty of, of a crime. Or whether, uh, and whether the rights of that individual are been, have been, you know, impuned. Uh, but or whether there's just, you know, a policeman has just made a, uh, a, you know, a non, a noncritical error, though be it not the right way to do it but, but, you know, the, the merits of the case in terms of, you know, the guy was a law breaker, as being supportive. Now, I, I'm, at this juncture I, you know, I'm, I'm not sure, you know, what constitutes a, a technicality. You know, that, that's what all these, these hearings are about and that's what all these, you know, court cases are about. I mean our, uh, our, our glorious, uh, you know, mayor here in Washington is six days away from getting out of, out of the can and, uh, you know, he, he tried to appeal his conviction. Uh, and, you know, it didn't work. But be that as it may, everybody who got enough money will pump the appeal process dry. Uh, in, in the old days, you know, and say round about times of battle of Hastings, you know, and the villages if you were a transgressor, they, they either, you know, drove you out in the woods or you became a ward of somebody and he, you were his slave. And if he didn't like what you did, he killed you. And that has, that's pretty effective. Uh, you know, it's not good for civil rights, I guess, but it's pretty effective in that, you know, you've got to get along in the community and if you don't you'll perish. Either by the hand of your, your, your master or by being pushed out in the woods. So, I, I, I mean as, as man has gotten more complicated so all of the, uh, imaginations to, uh, you know, protect him from, from being, uh, dumped on by, uh, civilian authority in, in in criminal actions, especially, you know, murder cases and that sort of thing.\nB:: Well, it seems like well it, it seems as if in the past typically there have been a lot of cases of people being wrongly tried or wrongly punished, and the whole idea behind the current criminal process system is to protect those who actually didn't the crimes, albeit it seems that we are failing in that, in that ultimate goal because there are times when people who are guilty are getting off. Um, for instance, um there's a case a few years back where, uh, someone, uh, someone who's being convicted for, was under a was going to trial for murder, was let off because of a technicality in that. The the arresting officer, uh, did not read the defendant their rights.\nA:: Uh-huh.\nB:: And where his, old evidence was there, the witnesses were there, the, everything was conclusively pointing to this individual yet"
test_prompt = test_interaction_model._format_teacher_prompt(partial_dialogue=test_text, child_continuation="I dont know")

In [6]:
teacher_prompt = test_prompt
teacher_inputs = test_interaction_model.encode_text(teacher_prompt, is_child=False)
teacher_outputs = test_interaction_model.teacher_model.generate(
    teacher_inputs.unsqueeze(0),
    generation_config=test_interaction_model.teacher_generation_config
)
teacher_response = teacher_outputs[0]

# Decode teacher response and extract completion
full_teacher_response = test_interaction_model.decode_tokens(teacher_response, is_child=False)
teacher_completion = test_interaction_model._extract_response_content(full_teacher_response, teacher_prompt)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


In [7]:
full_teacher_response == teacher_completion

True

In [8]:
teacher_completion

"system\nYou are an expert dialogue completion assistant. \n        You will be given a partial dialogue and a child model's continuation attempt. \n        Your task is to generate a high-quality completion of the partial dialogue.\n        Take reference from the child continuation, make a continuation that is of similar length and topic but more coherent, fluent, grammatically correct and more contextually appropriate.\n        If the child's continuation is gibberish, you should generate a completion that is coherent and contextually appropriate.\n        Your completion should be one round conversation as A or B in the dialogue, dont generate multiple rounds of conversation.\n        You should only provide your own completion without any added commentary or feedback.\n        user\nOriginal partial dialogue:\nB:: And nothing is being done about it. Uh, the laws exist and are frequently upheld in, in, uh, in Appeals Court just because of technicalities and because of maybe small l

In [9]:
print(test_text)

B:: And nothing is being done about it. Uh, the laws exist and are frequently upheld in, in, uh, in Appeals Court just because of technicalities and because of maybe small little holes that their defending attorney can find. And it's, it's really getting out of hand in many states.
A:: Well, the term technicality . The law enforcement community, uh, uh, you know, has to, has to separate the difference between somebody who is being set up in which, uh, grievous acts are done to, uh, to, you know, to get somebody into a, a situation where they're going to be guilty of, of a crime. Or whether, uh, and whether the rights of that individual are been, have been, you know, impuned. Uh, but or whether there's just, you know, a policeman has just made a, uh, a, you know, a non, a noncritical error, though be it not the right way to do it but, but, you know, the, the merits of the case in terms of, you know, the guy was a law breaker, as being supportive. Now, I, I'm, at this juncture I, you kno

### test interact function

In [10]:
result1 = test_interaction_model.interact(test_text)

`generation_config` default values have been modified to match model-specific defaults: {'pad_token_id': 1, 'bos_token_id': 2, 'eos_token_id': 2}. If this is not desired, please set these values explicitly.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


### test reward

In [11]:
from core.custom_components.custom_reward_model.taaco_reward_model import TAACORewardModel

sampleVars = {"sourceKeyOverlap" : False, "sourceLSA" : False, "sourceLDA" : False, "sourceWord2vec" : False, "wordsAll" : True, "wordsContent" : True, "wordsFunction" : True, "wordsNoun" : True, "wordsPronoun" : True, "wordsArgument" : True, "wordsVerb" : True, "wordsAdjective" : True, "wordsAdverb" : True, "overlapSentence" : True, "overlapParagraph" : True, "overlapAdjacent" : True, "overlapAdjacent2" : True, "otherTTR" : True, "otherConnectives" : True, "otherGivenness" : True, "overlapLSA" : True, "overlapLDA" : True, "overlapWord2vec" : True, "overlapSynonym" : True, "overlapNgrams" : True, "outputTagged" : False, "outputDiagnostic" : False}
taaco_reward_model = TAACORewardModel(taaco_vars=sampleVars)




Loading Spacy
Loading Spacy Model
Loading LSA vector space...
Loading LDA vector space...
Loading word2vec vector space...
TAACO processor initialized successfully


In [None]:
result1

{'child_query': tensor([    1,    35,    27,    27,  1159,  5339,   304,  1105,  2640,   550,
           392,    15,   406,    73,    13,   263,  3806,  3351,   291,   385,
          9834,   644,   259,   400,   284,    13,   284,    13,   222,  8312,
            13,   284,  3162,    70,   690,  3424,  1074,   635,   283, 11152,
           785,   291,   635,   283,  7578,  1147,  1531,  7345,   374,   548,
         15744, 13213,   494,   979,    15,  1159,   392,   417,    13,   392,
           417,  1589,  2809,   607,   283,  2205,   284,   724,  2650,    15,
           200,    34,    27,    27,  5687,    13,   263,  1839, 11152,   422,
          1195,   318,  1481, 10543,  7717,  2692,    13,   222,  8312,    13,
           222,  8312,    13,   524,  1152,    13,   488,   293,    13,   488,
           293,  5222,   263,  5007,  1022, 13349,   567,   304,  1105,  1136,
           644,   284,   576,    13,   222,  8312,    13,  1033,  3393,   547,
          6874,   385,  2640,   293, 

In [13]:
taaco_reward_model.compute_rewards(
    child_queries=[result1['partial_dialogue']],
    child_responses=[result1['partial_dialogue'] + result1['child_continuation']],
    teacher_queries=[result1['partial_dialogue']],
    teacher_responses=[result1['partial_dialogue'] + result1['teacher_completion']]
)

  return(np.sum([v for v in A * np.log2(A/B) if not np.isnan(v)]))
  return(np.sum([v for v in A * np.log2(A/B) if not np.isnan(v)]))
  return(np.sum([v for v in A * np.log2(A/B) if not np.isnan(v)]))


[tensor(-0.0040, device='cuda:0')]