In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from IPython.display import display
from tqdm import tqdm
import pandas as pd
import string
import torch
import json
import re

  from .autonotebook import tqdm as notebook_tqdm


**Load model and data**

In [2]:
models_dir = '/bigwork/nhwpnagm/hf_models/'
model_id = 'Mistral-7B-Instruct-v0.3'

model = AutoModelForCausalLM.from_pretrained(f'{models_dir}/{model_id}', device_map='auto')
tokenizer = AutoTokenizer.from_pretrained(f'{models_dir}/{model_id}')

Loading checkpoint shards: 100%|██████████| 3/3 [08:48<00:00, 176.06s/it]


In [3]:
device = model.device

In [4]:
with open('../data_analysis/popquorn/extracted_texts/texts_moderate_agreement.json') as f:
    texts = json.load(f)

**Instruction sets**

In [23]:
instructions_v0 = {
    'politeness': "A text may be assigned a politeness score from 1 to 5. For each of the five politeness scores, 5 being most polite, provide an explanation for why it may apply to the following email from a colleague. Email: ",
    'offensiveness': "A text may be assigned an offensiveness score from 1 to 5. For each of the five offensiveness scores, 5 being most offensive, provide an explanation for why it may apply to the following Reddit comment. Comment: ",
    'qa': "A question may be assigned a difficulty score from 1 to 5. For each of the five difficulty scores, 5 being most difficult, provide an explanation for why it may apply to the following text passage and associated question. Passage: "
}

instructions_v1 = {
    'politeness': "You will be given an email from a colleague. The politeness of the email may be scored from 1 to 5, 5 being most polite. For each of the five politeness scores provide an explanation for why it may apply. Email: ",
    'offensiveness': "You will be given a Reddit comment. The offensiveness of the comment may be scored from 1 to 5, 5 being most offensive. For each of the five offensiveness scores provide an explanation for why it may apply. Comment: ",
    'qa': "You will be given a text passage and an associated question. The difficulty of the question may be scored from 1 to 5, 5 being most difficult. For each of the five difficulty scores provide a brief explanation for why it may apply. Passage: "
}

instructions_v2 = {
    'politeness': "You will be given an email from a colleague. Your task is to generate five explanations, each one describing why that email may be assigned a politeness rating of 1, 2, 3, 4, or 5 respectively, 5 being most polite. {addition} {sociodem} Email: ",
    'offensiveness': "You will be given a Reddit comment. Your task is to generate five explanations, each one describing why that comment may be assigned an offensiveness rating of 1, 2, 3, 4, or 5 respectively, 5 being most offensive. {addition} {sociodem} Comment: ",
    'qa': "You will be given a text passage and an associated question. Your task is to generate five explanations, each one describing why that question may be assigned a difficulty rating of 1, 2, 3, 4, or 5 respectively, 5 being most difficult. {addition} {sociodem} Passage: "
}

additions = {
    'ambiguity': "The explanations should focus on clearing up ambiguities.",
    'ambiguity_long': "The explanations should aim to disambiguate any vague or confusing content that might complicate the rating of the {task} to a human.",
    'ambiguity_long_v2': "The explanations should aim to disambiguate any vague content that might complicate the rating of the {task}.",
    'difficulty': "The explanations should focus on simplifying the contents so as to make the rating of the {task} easier to a human.",
    'difficulty_long': "The explanations should focus on simplifying the contents so as to make the rating of the {task} easier to a human.",
}

sociodemographics = {
    'fwmc': "Consider a person of gender 'Female', race 'White', age 'Middle adulthood' and education level 'College Degree' as the recipient of the explanations.",
    'fwmc_2': "Explain it to a person of gender 'Female', race 'White', age 'Middle adulthood' and education level 'College Degree'."
}

**Function definitions**

In [24]:
profile = 'fwmc'

In [25]:
path_to_file = f'explanations/{model_id}/moderate_agreement_{profile}.json'
path_to_file # where to store the explanations

'explanations/Mistral-7B-Instruct-v0.3/moderate_agreement_fwmc.json'

In [26]:
def inference(prompt:str):
    message = [{'role': 'user', 'content': prompt}]
    inputs = tokenizer.apply_chat_template(message, tokenize=True, add_generation_prompt=True, return_tensors='pt').to(device)#.to(device=torch.device('cuda'))
    outputs = model.generate(inputs, max_new_tokens=1000)
    decoded_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return decoded_output.replace(prompt, "").strip()


def clean(text:str):
    text_no_html = re.sub(r'<[^>]+>', '', text)
    return re.sub(r'\s+', ' ', text_no_html).strip()


def run(instructions:dict = None, 
        instr_id:str = None, 
        addition:str = None,
        sociodem:str = None,
        tasks:list = ['offensiveness','qa','politeness']):
    
    outputs = {}
    for task in tasks:
        outputs[task] = {}
        for inst_id, text in texts[task].items():
            instr_key = instr_id if not addition else f'{instr_id}_{addition}'
            outputs[task][inst_id] = {}
            outputs[task][inst_id][instr_key] = {}
            # clean the text (mainly for QA)
            text_cleaned = clean(text)
            # construct the prompt and call the model 
            add = additions[addition] if addition else ''
            sd = sociodemographics[sociodem] if sociodem else ''
            task_desc = task if task != 'qa' else 'difficulty'
            add = add.format_map({'task':task_desc})
            prompt = instructions[task].format_map({'addition':add, 'sociodem':sd}) + text_cleaned
            print(prompt)
            output = inference(prompt)
            # save the used prompt and the model output
            outputs[task][inst_id][instr_key]['prompt'] = prompt
            outputs[task][inst_id][instr_key]['output'] = output
    return outputs


def merge_dicts(d1:dict, d2:dict):
    merged = d1.copy()
    for k, v in d2.items():
        if k in merged:
            if isinstance(merged[k], dict) and isinstance(v, dict):
                merged[k] = merge_dicts(merged[k], v)
            else:
                merged[k] = v
        else:
            merged[k] = v
    return merged


def load(name:str):
    with open(name, 'r') as f:
        file = json.load(f) 
    return file


def save(outputs:dict, merge:bool = True):
    if merge:
        file = load(path_to_file)
        outputs = merge_dicts(outputs, file)
    
    with open(path_to_file, 'w') as f:
        json.dump(outputs, f) 
        
        
def inspect_expl(task:str, inst_id:str, instr_id:str):
    outputs = load(path_to_file)
    
    for instr_key in outputs[task][inst_id]:
        if instr_id in instr_key:
            print(outputs[task][inst_id][instr_key]['prompt'])
            explanations = outputs[task][inst_id][instr_key]['output'].split('\n\n')
            explanations = [e.split(':', 1)[1].strip() for e in explanations]
            
            with pd.option_context('display.max_colwidth', None): 
                display(pd.DataFrame(list(zip([1,2,3,4,5], explanations)), columns =[task, instr_key]))

**Model inference**

In [29]:
outputs = run(instructions=instructions_v2, instr_id='v2_fwmc_2', addition=None, sociodem='fwmc_2', tasks=['offensiveness'])

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


You will be given a Reddit comment. Your task is to generate five explanations, each one describing why that comment may be assigned an offensiveness rating of 1, 2, 3, 4, or 5 respectively, 5 being most offensive.  Explain it to a person of gender 'Female', race 'White', age 'Middle adulthood' and education level 'College Degree'. Comment: Could this be because... prisons are full of actual rapists?


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


You will be given a Reddit comment. Your task is to generate five explanations, each one describing why that comment may be assigned an offensiveness rating of 1, 2, 3, 4, or 5 respectively, 5 being most offensive.  Explain it to a person of gender 'Female', race 'White', age 'Middle adulthood' and education level 'College Degree'. Comment: Yeah, Poppy is a war criminal who will never be punished. Doesn't mean all war criminals should escape punishment. That wouldn't make sense.


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


You will be given a Reddit comment. Your task is to generate five explanations, each one describing why that comment may be assigned an offensiveness rating of 1, 2, 3, 4, or 5 respectively, 5 being most offensive.  Explain it to a person of gender 'Female', race 'White', age 'Middle adulthood' and education level 'College Degree'. Comment: I love people using our religion to push their political agenda. If you want us to stay out of your business you better stay the hell away from ours.


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


You will be given a Reddit comment. Your task is to generate five explanations, each one describing why that comment may be assigned an offensiveness rating of 1, 2, 3, 4, or 5 respectively, 5 being most offensive.  Explain it to a person of gender 'Female', race 'White', age 'Middle adulthood' and education level 'College Degree'. Comment: So you do believe socialization plays a huge role in people's choices, and you do acknowledge that in the past women weren't socialized to be as career focused as men, so it seems to me like you'd understand how the explanations of why the wage gap exists are not justifications and that it stems from outdated gender roles conditioning that should be eradicated.


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


You will be given a Reddit comment. Your task is to generate five explanations, each one describing why that comment may be assigned an offensiveness rating of 1, 2, 3, 4, or 5 respectively, 5 being most offensive.  Explain it to a person of gender 'Female', race 'White', age 'Middle adulthood' and education level 'College Degree'. Comment: And the anthem protesters aren't breaking any laws. They are hurting certain peoples feelings. That's it.


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


You will be given a Reddit comment. Your task is to generate five explanations, each one describing why that comment may be assigned an offensiveness rating of 1, 2, 3, 4, or 5 respectively, 5 being most offensive.  Explain it to a person of gender 'Female', race 'White', age 'Middle adulthood' and education level 'College Degree'. Comment: I mean... you took credit for the economic goals he achieved which you yourself said were impossible to achieve. So why not...


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


You will be given a Reddit comment. Your task is to generate five explanations, each one describing why that comment may be assigned an offensiveness rating of 1, 2, 3, 4, or 5 respectively, 5 being most offensive.  Explain it to a person of gender 'Female', race 'White', age 'Middle adulthood' and education level 'College Degree'. Comment: Obviously, the best way to show how right you are is to prove it by lying instead of having having facts to show.


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


You will be given a Reddit comment. Your task is to generate five explanations, each one describing why that comment may be assigned an offensiveness rating of 1, 2, 3, 4, or 5 respectively, 5 being most offensive.  Explain it to a person of gender 'Female', race 'White', age 'Middle adulthood' and education level 'College Degree'. Comment: Fine with me. Done it. But I am one that can completely separate sex and intimacy. One ex who was a dom wanted to get a sex sub for us. A live-in one, eventually. We didn't get quite that far though.


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


You will be given a Reddit comment. Your task is to generate five explanations, each one describing why that comment may be assigned an offensiveness rating of 1, 2, 3, 4, or 5 respectively, 5 being most offensive.  Explain it to a person of gender 'Female', race 'White', age 'Middle adulthood' and education level 'College Degree'. Comment: Well uhhhh... yeah. Hell, even Republicans I've talked to that are totally in on Trump are at least *okay* with it.


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


You will be given a Reddit comment. Your task is to generate five explanations, each one describing why that comment may be assigned an offensiveness rating of 1, 2, 3, 4, or 5 respectively, 5 being most offensive.  Explain it to a person of gender 'Female', race 'White', age 'Middle adulthood' and education level 'College Degree'. Comment: Doesnt sound good to me, if you are happy then why ask others if it is ok?


In [30]:
save(outputs, merge=True)

**Inspect the generated explanations**

In [48]:
inspect_expl(task='offensiveness', inst_id='367', instr_id='v2')

You will be given a Reddit comment. Your task is to generate five explanations, each one describing why that comment may be assigned an offensiveness rating of 1, 2, 3, 4, or 5 respectively, 5 being most offensive. The explanations should focus on simplifying the contents. Comment: We care more about keeping kids off welfare. Fuck the parents.


Unnamed: 0,offensiveness,v2_difficulty
0,1,The comment expresses a strong opinion about prioritizing resources for children over parents on welfare. It does not contain any overtly offensive language or slurs.
1,2,"The comment shows a lack of empathy towards parents on welfare and prioritizes the needs of children instead. The use of the word ""fuck"" may be seen as impolite or disrespectful, but it does not contain any hate speech or slurs."
2,3,"The comment expresses a harsh attitude towards parents on welfare and prioritizes the needs of children over them. The use of the word ""fuck"" may be seen as a derogatory term towards the parents, implying a lack of concern for their situation."
3,4,"The comment expresses a callous attitude towards parents on welfare and prioritizes the needs of children over them. The use of the word ""fuck"" may be seen as a vulgar and offensive term, implying a lack of respect and compassion towards the parents."
4,5,"The comment expresses a deeply offensive attitude towards parents on welfare, using the word ""fuck"" as a derogatory term to dismiss their struggles. The comment prioritizes the needs of children over the parents, implying that they are not deserving of support or compassion. This attitude can be seen as hateful and disrespectful towards the parents and their situation."


You will be given a Reddit comment. Your task is to generate five explanations, each one describing why that comment may be assigned an offensiveness rating of 1, 2, 3, 4, or 5 respectively, 5 being most offensive. The explanations should focus on clearing up ambiguities. Comment: We care more about keeping kids off welfare. Fuck the parents.


Unnamed: 0,offensiveness,v2_ambiguity
0,1,"The comment may be rated as mildly offensive due to the use of strong language, ""Fuck the parents,"" but the overall context seems to be focused on a policy perspective, expressing concern for keeping kids off welfare. It could be interpreted as frustration with the system and not directly targeting parents as individuals."
1,2,"The comment may be rated as moderately offensive due to the use of strong language and a dismissive attitude towards parents. However, it's important to note that the comment does not contain any derogatory or hateful language towards parents based on their race, gender, or other personal characteristics."
2,3,"The comment may be rated as somewhat offensive due to the use of strong language and a dismissive attitude towards parents. The statement ""Fuck the parents"" could be interpreted as a lack of empathy or understanding for the challenges parents face, which could be perceived as insensitive."
3,4,"The comment may be rated as more offensive due to the use of strong language and a dismissive attitude towards parents. The statement ""Fuck the parents"" could be perceived as blaming parents for their children being on welfare, ignoring the complex societal and economic factors that contribute to poverty and the need for welfare."
4,5,"The comment may be rated as highly offensive due to the use of strong language and a dismissive attitude towards parents. The statement ""Fuck the parents"" could be perceived as blaming parents for their children being on welfare, ignoring the complex societal and economic factors that contribute to poverty and the need for welfare. Additionally, the use of the word ""fuck"" could be seen as a deliberate attempt to dehumanize and degrade parents, making the comment more hateful and harmful."


You will be given a Reddit comment. Your task is to generate five explanations, each one describing why that comment may be assigned an offensiveness rating of 1, 2, 3, 4, or 5 respectively, 5 being most offensive. Comment: We care more about keeping kids off welfare. Fuck the parents.


Unnamed: 0,offensiveness,v2
0,1,"This comment expresses a strong opinion about prioritizing resources for children over parents who rely on welfare. While it may come across as blunt or insensitive, it does not contain any overtly offensive language or slurs."
1,2,"The comment's tone is dismissive and demeaning towards parents on welfare, which could be perceived as offensive by some. However, it does not directly target or attack individuals, and the language used is not particularly harsh."
2,3,"The comment's focus on ""fucking the parents"" could be seen as offensive and disrespectful, especially towards those who are struggling and in need of assistance. The use of the word ""fuck"" adds a level of hostility and contempt."
3,4,"The comment's language and tone are more aggressive and derogatory, with the use of the word ""fuck"" and the dismissive attitude towards parents on welfare. This could be perceived as offensive and hurtful to those who are affected by the comment."
4,5,"The comment's use of the word ""fuck"" and its derogatory attitude towards parents on welfare could be seen as highly offensive and disrespectful. The comment does not offer any constructive solutions or empathy towards those in need, and its language is intended to belittle and demean."


**Notes**

Reasoning for **1**:
* *v2_difficulty*: 'the comment does not contain any overtly offensive language or slurs.'
* *v2_ambiguity*: 'the overall context seems to be focused on a policy perspective, expressing concern for keeping kids off welfare. It could be interpreted as frustration with the system and not directly targeting parents as individuals.'
* *v2*: 'the comment does not contain any overtly offensive language or slurs.'

Reasoning for **2**:
* *v2_difficulty*: 'lack of empathy towards parents on welfare', 'use of the word "fuck" may be seen as impolite or disrespectful', 'does not contain any hate speech or slurs'
* *v2_ambiguity*: 'use of strong language', 'dismissive attitude towards parents', 'does not contain any derogatory or hateful language towards parents based on their race, gender, or other personal characteristics'
* *v2*: 'dismissive and demeaning towards parents on welfare', 'does not directly target or attack individuals', 'the language used is not particularly harsh'

Reasoning for **3**
* *v2_difficulty*: 'harsh attitude towards parents on welfare', 'use of the word "fuck" may be seen as a derogatory term towards the parents, implying a lack of concern for their situation'
* *v2_ambiguity*: 'use of strong language', 'dismissive attitude towards parents', 'lack of empathy or understanding for the challenges parents face'
* *v2*: 'could be seen as offensive and disrespectful, especially towards those who are struggling and in need of assistance', 'use of the word "fuck" adds a level of hostility and contempt'

Reasoning for **4**
* *v2_difficulty*: 'callous attitude towards parents on welfare', 'use of the word "fuck" may be seen as a vulgar and offensive term, implying a lack of respect and compassion towards the parents'
* *v2_ambiguity*: 'use of strong language', 'dismissive attitude towards parents', 'statement "Fuck the parents" could be perceived as blaming parents for their children being on welfare, ignoring the complex societal and economic factors that contribute to poverty and the need for welfare'
* *v2*: 'language and tone are more aggressive and derogatory', ' use of the word "fuck"', the dismissive attitude towards parents on welfare'

Reasoning for **5**:
* *v2_difficulty*: 'deeply offensive attitude towards parents on welfare', 'using the word "fuck" as a derogatory term to dismiss their struggles', 'implying that they [the parents] are not deserving of support or compassion', 'attitude can be seen as hateful and disrespectful towards the parents and their situation'
* *v2_ambiguity*: 'use of strong language', 'dismissive attitude towards parents', 'statement "Fuck the parents" could be perceived as blaming parents for their children being on welfare, ignoring the complex societal and economic factors that contribute to poverty and the need for welfare', 'use of the word "fuck" could be seen as a deliberate attempt to dehumanize and degrade parents'
* *v2*: 'use of the word "fuck" and its derogatory attitude towards parents on welfare', 'comment does not offer any constructive solutions or empathy towards those in need', 'its language is intended to belittle and demean'