In [None]:
!pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7

import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig, PeftModel
from trl import SFTTrainer

In [None]:
model_name = "NousResearch/llama-2-7b-chat-hf" # use this if you have access to the official LLaMA 2 model "meta-llama/Llama-2-7b-chat-hf", though keep in mind you'll need to pass a Hugging Face key argument
new_model = "llama-2-7b-custom"
use_4bit = True
bnb_4bit_compute_dtype = "float16"
bnb_4bit_quant_type = "nf4"
use_nested_quant = False
fp16 = False
bf16 = False
warmup_ratio = 0.03
group_by_length = True
max_seq_length = None
packing = False
device_map = {"": 0}

In [None]:
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)
bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map=device_map
)
model.config.use_cache = False
model.config.pretraining_tp = 1

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

In [1]:
import pandas as pd
test_dataset = "k_digit_addition_test_dataset.csv"
val_dataset = "k_digit_addition_val_dataset.csv"

test_df = pd.read_csv(test_dataset)
val_df = pd.read_csv(val_dataset)

In [3]:
system_prompt = (
    "You are an expert in solving math problems. You will be given two numbers "
    "and you need to add them. Do not produce any output other than the sum of "
    "the two numbers. You will also be given examples to help you understand the task."
)

In [None]:
results = []

for k in range(1,33):
    test_df_subset = test_df[test_df["digits"]==k]
    val_df_subset = val_df[val_df["digits"]==k]

    few_shot_prompt = system_prompt + "\n\nExamples:\n"
    for _, row in val_df_subset.iterrows():
        few_shot_prompt += f"{row['num1']} + {row['num2']} = {row['result']}\n"

    correct = 0
    total = len(test_df_subset)

    for _, row in test_df_subset.iterrows():
        test_prompt = f"\n{row['num1']} + {row['num2']} ="

        prompt = f"""[INST] <<SYS>>\n{few_shot_prompt}\n<</SYS>>\n\n {test_prompt} [/INST]"""

        pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=128)

        result = pipe(prompt)

        text = result[0]['generated_text']

        model_output = text.split("[/INST]")[1].strip()

        # Check if the model output matches the expected sum
        if model_output == str(row['sum']):
            correct += 1
                    

    # Record accuracy for this k
    accuracy = correct / total if total > 0 else 0
    results.append({"k": k, "accuracy": accuracy})


In [None]:
results_df = pd.DataFrame(results)
results_df.to_csv("k_digit_addition_results.csv", index=False)