In [None]:
# run on kaggle
# add input of LLAMA2 13b-chat-hf
# add DaSLAM adapter as input with name `daslam/`
# add csv containing the file you wnat subquestions for under `data/``
# in Session, mark Persistence (File Only) and Accelerator (GPU T4 x2)

In [None]:
!pip install --upgrade huggingface_hub
!pip install git+https://github.com/huggingface/transformers.git -q -U
!pip install git+https://github.com/huggingface/accelerate.git -q -U 
!pip install -i https://pypi.org/simple/ bitsandbytes
!pip install peft
!pip install datasets==2.16.0
!pip install trl


In [None]:
!pip uninstall wandb --yes
!pip install --upgrade git+https://github.com/huggingface/transformers.git@main
!pip install --upgrade bitsandbytes datasets accelerate loralib
!pip install --upgrade git+https://github.com/huggingface/peft.git

In [None]:
!pip install -i https://pypi.org/simple/ bitsandbytes

In [None]:
#if LLAMA model was sucessfully added, you should see output here
!ls -lh /kaggle/input/llama-2/pytorch/13b-chat-hf/1

In [None]:
!ls -lh /kaggle/input/gemini/Gemini_base_results.csv

In [None]:
!pip install tqdm

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, AutoConfig
from peft import PeftModel


from transformers import AutoTokenizer
import transformers
import torch

model_id = "/kaggle/input/llama-2/pytorch/13b-chat-hf/1"

quantization_config = BitsAndBytesConfig(load_in_8bit=True)
config = AutoConfig.from_pretrained(model_id)

config.pretraining_tp = 1

tokenizer = AutoTokenizer.from_pretrained(model_id, config=config)

model = AutoModelForCausalLM.from_pretrained(model_id, config=config, device_map="auto", quantization_config=quantization_config)
model = PeftModel.from_pretrained(model, "/kaggle/input/daslam/DaSLaM")

In [None]:
import pandas as pd
df = pd.read_csv('/kaggle/input/gemini/Gemini_base_results.csv')

In [None]:
from tqdm import tqdm

x = 233+116+20+362+126
subset_df = df.iloc[x:]
#233 + 116+, 20+362,126

OUTPUT_DIR = '/kaggle/working/output-gemini.json'

saveDf = None#pd.read_json(OUTPUT_DIR, dtype=str)

for index, row in tqdm(subset_df.iterrows(), total=len(subset_df)):
    prompt = f'''
                Below is an instruction that describes a task, paired with an input and a reasoning that provides further context. Write a response that appropriately completes the request.

                    ### Instruction: Break the input question into multiple subquestions based on the reasoning provided. Don't answer any of the questions.
    
                    ### Input:  {row['Example']}

                    ### Reasoning: {row['Response']}
                    
                    ### Response: 
                '''

    token_inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
    token_outputs = model.generate(input_ids=token_inputs['input_ids'], max_new_tokens=500)
    
    modelOutput = tokenizer.decode(token_outputs[0], skip_special_tokens=False)
    
    if saveDf is None:
            saveDf = pd.DataFrame(columns=['question','subproblems', 'gemini-base-answer', 'label'])
            saveDf = saveDf.astype(str)
    else:
        saveDf = pd.read_json(OUTPUT_DIR, dtype=str)
    
    split_string = modelOutput.split('### Response:')

    # Take the second part (index 1) if it exists
    if len(split_string) > 1:
        modelOutput = split_string[1]

    
    new_record = {
            'question': row['Example'],
            'subproblems': modelOutput,
            'gemini-base-answer': row['Response'],
            'label': '',
        }
    
    temp_df = pd.DataFrame([new_record])
    saveDf = pd.concat([saveDf, temp_df], ignore_index=True)
    saveDf.to_json(OUTPUT_DIR, orient='records')