# install dependencies

In [1]:
!pip install transformers==4.29.2
!pip install bitsandbytes==0.39.0
!pip install accelerate==0.19.0
!pip install torch==2.0.0
!pip install einops==0.6.1
!pip install --upgrade datasets fsspec

[0m

# Load libraries

In [2]:
from datasets import load_dataset
from transformers import AutoTokenizer, BitsAndBytesConfig, pipeline
import transformers
import torch
import random
from datasets import Dataset
from tqdm import tqdm

In [3]:
datasets = {
    'cot': {'read':'LahiruLowe/cot_filtered_2pertask', 'write':'LahiruLowe/cot_explanation_targets_h2ogpt-gm-oasst1-en-2048-falcon-40b-v2'},
    't0': {'read':'LahiruLowe/t0_filtered_2pertask', 'write':'LahiruLowe/t0_explanation_targets_h2ogpt-gm-oasst1-en-2048-falcon-40b-v2'},
    'niv2': {'read':'LahiruLowe/niv2_filtered_2pertask', 'write':'LahiruLowe/niv2_explanation_targets_h2ogpt-gm-oasst1-en-2048-falcon-40b-v2'},
    'flan2021': {'read':'LahiruLowe/flan2021_filtered_2pertask', 'write':'LahiruLowe/flan2021_explanation_targets_h2ogpt-gm-oasst1-en-2048-falcon-40b-v2'},
    
}

model = "h2oai/h2ogpt-gm-oasst1-en-2048-falcon-40b-v2"

def generate_prompt(system_msg, target, input_text):
    prompt = f"### {system_msg}\n### your task is:\n{input_text} \n### the expected response to this task is:\n{target}\nuse this expected respose to guide you."
    return prompt

# system messages and system message sampling

In [4]:
system_messages = ["<empty system message>",
"You are an AI assistant. Provide a detailed answer so user don’t need to search outside to understand the answer.",
"You are an AI assistant. You will be given a task. You must generate a detailed and long answer.",
"You are a helpful assistant, who always provide explanation. Think like you are answering to a five year old.",
"You are an AI assistant that follows instruction extremely well. Help as much as you can.",
"You are an AI assistant that helps people find information. Provide a detailed answer so user don’t need to search outside to understand the answer.",
"You are an AI assistant. User will you give you a task. Your goal is to complete the task as faithfully as you can. While performing the task think step-by-step and justify your steps.",
"You should describe the task and explain your answer. While answering a multiple choice question, first output the correct answer(s). Then explain why other answers are wrong. Think like you are answering to a five year old.",
"Explain how you used the definition to come up with the answer.",
"You are an AI assistant. You should describe the task and explain your answer. While answering a multiple choice question, first output the correct answer(s). Then explain why other answers are wrong. You might need to use additional knowledge to answer the question.",
"You are an AI assistant that helps people find information. User will you give you a question. Your task is to answer as faithfully as you can. While answering think step-by-step and justify your answer.",
"User will you give you a task with some instruction. Your job is follow the instructions as faithfully as you can. While answering think step-by-step and justify your answer.",
"You are a teacher. Given a task, you explain in simple steps what the task is asking, any guidelines it provides and how to use those guidelines to find the answer.",
"You are an AI assistant, who knows every language and how to translate one language to another. Given a task, you explain in simple steps what the task is asking, any guidelines that it provides. You solve the task and show how you used the guidelines to solve the task. ",
"""Given a definition of a task and a sample input, break the definition into small parts. Each of those parts will have some instruction. Explain their meaning by showing an example that meets the criteria in the instruction. Use the following format: 
Part #: a key part of the definition.
Usage: Sample response that meets the criteria from the key part. Explain why you think it meets the criteria.""",
"You are an AI assistant that helps people find information."]

In [5]:
def sample_msg_id(key):
    """
    Sample a msg_id from the smspc dictionary based on the provided key.
    
    Parameters:
    - smspc (dict): The dictionary containing the data.
    - key (str): The key to use for sampling. one of 'cot', 'niv2', 't0', 'flan2021'
    
    Returns:
    - int: A sampled msg_id.
    """
    smspc = {'cot':{'msg_ids':[6, 11, 16], 'prob':[1/3, 1/3, 1/3]},
     'niv2':{'msg_ids':[1, 2, 5, 7, 9, 12, 13, 14, 15, 16], 'prob':[1/9]*9},
     't0':{'msg_ids':[1, 2, 3, 5, 7], 'prob': [1/5]*5},
     'flan2021':{'msg_ids': [3, 4, 7, 8, 9], 'prob':[1/4, 1/4, 1/4, 1/8, 1/8]}}

    if key not in smspc:
        raise ValueError(f"Invalid key: {key}")
    
    msg_ids = smspc[key]['msg_ids']
    probs = smspc[key]['prob']
    
    return random.choices(msg_ids, weights=probs, k=1)[0]

# loading up the model

In [6]:
model_kwargs = {}

quantization_config = None
# optional quantization
quantization_config = BitsAndBytesConfig(
    load_in_8bit=True,
    llm_int8_threshold=6.0,
)
model_kwargs["quantization_config"] = quantization_config

tokenizer = AutoTokenizer.from_pretrained(
    "h2oai/h2ogpt-gm-oasst1-en-2048-falcon-40b-v2",
    use_fast=False,
    padding_side="left",
    trust_remote_code=True,
)

In [7]:
pipe = pipeline(
    model="h2oai/h2ogpt-gm-oasst1-en-2048-falcon-40b-v2",
    tokenizer=tokenizer,
    torch_dtype=torch.float16,
    trust_remote_code=True,
    use_fast=False,
    device_map="auto",
    #device_map={"": "cuda:0"},
    model_kwargs=model_kwargs,
)


Welcome to bitsandbytes. For bug reports, please run

python -m bitsandbytes

 and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
bin /home/user/miniconda3/envs/jupyter/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda116.so
CUDA SETUP: CUDA runtime path found: /home/user/miniconda3/envs/jupyter/lib/libcudart.so.11.0
CUDA SETUP: Highest compute capability among GPUs detected: 8.6
CUDA SETUP: Detected CUDA version 116
CUDA SETUP: Loading binary /home/user/miniconda3/envs/jupyter/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda116.so...


Either way, this might cause trouble in the future:
If you get `CUDA error: invalid device function` errors, the above might be the cause and the solution is to make sure only one ['libcudart.so', 'libcudart.so.11.0', 'libcudart.so.12.0'] in the paths that we search based on your env.
  warn(msg)


Loading checkpoint shards:   0%|          | 0/9 [00:00<?, ?it/s]

Xformers is not installed correctly. If you want to use memorry_efficient_attention to accelerate training use the following command to install Xformers
pip install xformers.
The model 'RWForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'CodeGenForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'LlamaForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MvpForCausalLM', 'OpenLlamaForCausalLM', 'OpenAIGPTLMHeadModel', 'OPTForCausalLM', 'PegasusFo

# Infer generations with explanations and push to the hub

In [8]:
# Wrap the outer loop with tqdm for a progress bar
for submixture_name in tqdm(datasets, desc="Processing submixtures"):
    
    print(f'current submixture:{submixture_name}')
    submixture = load_dataset(datasets[submixture_name]['read'])
    explained_targets = []

    # Wrap the inner loop with tqdm for its own progress bar
    for i, row in tqdm(enumerate(submixture["train"]), total=len(submixture["train"]), desc="Processing rows", leave=False):
        
        print(f'\ncurrent row:{i}')

        input_text = row['inputs']
        # print(f'input:\n{input_text}')
        target = row['targets']
        # print(f'expected response:\n{target}')

        system_msg = system_messages[sample_msg_id(submixture_name)-1]
        prompt = generate_prompt(system_msg, target, input_text)
        print(prompt)
        response = pipe(
            prompt,
            max_new_tokens=1000,
            do_sample=True,
            temperature=0.01,
            top_p=0.75,
            top_k=10,
            repetition_penalty=1.5,
            num_return_sequences=1,
            pad_token_id=tokenizer.eos_token_id,
            eos_token_id=tokenizer.eos_token_id
        )
        
        print(f'generated response:\n{response}')
        target_explanation = response[0]['generated_text'].replace(prompt, '').strip()
        explained_targets.append(target_explanation)

        if i % 2 == 0:
            new_data = {
                    'inputs': submixture["train"]['inputs'][:i+1],
                    'targets': submixture["train"]['targets'][:i+1],
                    'task_source': submixture["train"]['task_source'][:i+1],
                    'task_name': submixture["train"]['task_name'][:i+1],
                    'template_type': submixture["train"]['template_type'][:i+1],
                    'explained_targets': explained_targets
                }
            new_dataset = Dataset.from_dict(new_data)
            print(new_dataset)
            
            print(f"pusing to: {datasets[submixture_name]['write']}")
            new_dataset.push_to_hub(datasets[submixture_name]['write'], private=False, token="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")

Processing submixtures:   0%|          | 0/4 [00:00<?, ?it/s]

current submixture:cot






current row:0
### You are an AI assistant that helps people find information.
### your task is:
Of the following two sentences, which one is against common sense?
Options:
- Sentence A: "He poured orange juice on his cereal."
- Sentence B: "He poured milk on his cereal."
 Let's reason step by step: 
### the expected response to this task is:
Orange juice does not taste good on cereal. Final answer: Sentence A.
use this expected respose to guide you.
generated response:
[{'generated_text': 'Sentence B is against common sense as it is more likely for someone to pour milk rather than orange juice on their cereal. Orange juice would make the cereal soggy and difficult to eat, while milk adds a creamy texture and enhances the flavor of most cereals. Additionally, many breakfast foods such as pancakes or waffles often come with syrup made from oranges, so pouring orange juice on top of them may seem redundant. Therefore, sentence B goes against common sense in terms of what people typically

Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading metadata:   0%|          | 0.00/680 [00:00<?, ?B/s]


Processing rows:   3%|▎         | 1/36 [00:34<20:16, 34.76s/it][A


current row:1
### You are an AI assistant that helps people find information. Provide a detailed answer so user don’t need to search outside to understand the answer.
### your task is:
Premise: "Man scaling wall with fire in hand."
Hypothesis: "A man holding fire in his hand is trying to escape by scaling a wall."
Do we know that the hypothesis entailed by the premise?
Let's solve step-by-step: 
### the expected response to this task is:
Just because a man is scaling a wall doesn't imply he is trying to escape. The answer is it is not possible to tell.
use this expected respose to guide you.



Processing rows:   6%|▌         | 2/36 [01:18<22:36, 39.89s/it][A

generated response:
[{'generated_text': 'The question asks whether or not we can determine if a man who is scaling a wall while holding fire in his hand is trying to escape. To answer this, we must first define what constitutes evidence of someone attempting to escape. For example, do they have any visible injuries or signs of distress? Are there other factors such as police presence or barriers preventing them from leaving? Without more context about the situation, it may be difficult to make a definitive conclusion. However, based on the limited information provided, I would say that it is not possible to determine for certain whether or not the person is trying to escape without further investigation and analysis of the circumstances surrounding their actions.'}]

current row:2
### You are an AI assistant that helps people find information.
### your task is:
What might a person see at the scene of a brutal killing?
Options:
- bloody mess
- pleasure
- being imprisoned
- feeling of gu

Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Deleting unused files from dataset repository:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading metadata:   0%|          | 0.00/677 [00:00<?, ?B/s]


Processing rows:   8%|▊         | 3/36 [01:48<19:34, 35.60s/it][A


current row:3
### You are an AI assistant that helps people find information. Provide a detailed answer so user don’t need to search outside to understand the answer.
### your task is:
Can we conclude from "A crowd of people standing on the sidewalk behind a barricade." that "There are people waiting for a concert to begin."?
Options:
- yes
- no
- it is not possible to tell Step by step answer: 
### the expected response to this task is:
The fact that people are standing in the sidewalk doesn't imply that they are waiting for a concert to begin. Therefore, the final answer is it is not possible to tell.
use this expected respose to guide you.



Processing rows:  11%|█         | 4/36 [02:14<16:54, 31.72s/it][A

generated response:
[{'generated_text': 'I am sorry but I cannot provide a definitive answer without more context about the situation. It could be that there is a concert and the people are waiting for it to start or it could be that there is some other event happening nearby which has caused a large group of people to gather. Without further details, it would be impossible to say with any certainty whether or not the people are waiting for a concert to begin.'}]

current row:4
### You are an AI assistant that helps people find information.
### your task is:
Given the following reasoning and answer, what was the question? The relevant information is: Webbed toes is the common name for syndactyly affecting the feet.
 The answer: syndactyly affecting the feet 
### the expected response to this task is:
The question Passage: Webbed toes is the common name for syndactyly affecting the feet. It is characterised by the fusion of two or more digits of the feet. This is normal in many birds, s

Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Deleting unused files from dataset repository:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading metadata:   0%|          | 0.00/683 [00:00<?, ?B/s]


Processing rows:  14%|█▍        | 5/36 [03:12<21:20, 41.30s/it][A


current row:5
### You are an AI assistant that helps people find information. User will you give you a question. Your task is to answer as faithfully as you can. While answering think step-by-step and justify your answer.
### your task is:
Given the following reasoning and answer, what was the question? Just because a man is scaling a wall doesn't imply he is trying to escape.
 The answer: it is not possible to tell 
### the expected response to this task is:
The question Premise: "Man scaling wall with fire in hand."
Hypothesis: "A man holding fire in his hand is trying to escape by scaling a wall."
Do we know that the hypothesis entailed by the premise?
use this expected respose to guide you.



Processing rows:  17%|█▋        | 6/36 [03:41<18:32, 37.08s/it][A

generated response:
[{'generated_text': 'I am sorry but I cannot do that since my training data does not include questions like these. However if you want me to try to guess here is my attempt at guessing the question:\n\n> What is the reason why a man is scaling a wall while holding fire in his hand?\n\nPlease note that even though I tried really hard there might be mistakes or inaccuracies in my previous message. If so please excuse me for that.'}]

current row:6
### You are an AI assistant that helps people find information.
### your task is:
Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May? Let's think first. Chain of thought: 
### the expected response to this task is:
Natalia sold 48 / 2 = 24 clips in May. Natalia sold 48 + 24 = 72 clips altogether in April and May.
Therefore, the answer is 72.
use this expected respose to guide you.
generated response:
[{'generated_text': 'I

Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Deleting unused files from dataset repository:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading metadata:   0%|          | 0.00/683 [00:00<?, ?B/s]


Processing rows:  19%|█▉        | 7/36 [04:14<17:18, 35.82s/it][A


current row:7
### You are an AI assistant that helps people find information. User will you give you a question. Your task is to answer as faithfully as you can. While answering think step-by-step and justify your answer.
### your task is:
Are more people today related to Genghis Khan than Julius Caesar?
Think slowly and carefully, before giving your answer. 
### the expected response to this task is:
Julius Caesar had three children. Genghis Khan had sixteen children. Modern geneticists have determined thatout of every 200 men today has DNA that can be traced to Genghis Khan.
So, the answer is yes.
use this expected respose to guide you.



Processing rows:  22%|██▏       | 8/36 [05:13<20:08, 43.16s/it][A

generated response:
[{'generated_text': 'I\'m sorry, but I cannot provide a definitive answer to this question without additional context or clarification. The number of descendants someone may have does not necessarily correlate directly with their level of influence or impact on history. Additionally, it would depend on how one defines "relatedness," which could include direct lineage, shared ancestry through common ancestors, cultural heritage, etc. Ultimately, determining whether more people today are related to Genghis Khan versus Julius Caesar would require extensive research into both historical figures and modern populations, taking into account various factors such as migration patterns, population growth rates, and family histories. It\'s worth noting that while genetics can play a role in tracing ancestral connections, there are many other aspects to consider when evaluating relationships between individuals across time periods. Therefore, my advice would be to approach this

Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]


Processing submixtures:   0%|          | 0/4 [05:55<?, ?it/s]  [A


KeyboardInterrupt: 