# Generate dataset

In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig, BitsAndBytesConfig
import torch
from typing import List
import gc
import pandas as pd
from pathlib import Path
import os
import sys

sys.path.append("..")  # Append the parent directory for imports to work.
sys.path.append("../..")  # Append the parent directory for imports to work.
import util

## Load the model

In [None]:
model_path = "mistralai/Mistral-7B-Instruct-v0.2"

tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(
                model_path, device_map="auto", quantization_config=BitsAndBytesConfig(
                    load_in_8bit=True, bnb_8bit_compute_dtype=torch.bfloat16))

model.config.pad_token_id = tokenizer.pad_token_id = tokenizer.unk_token_id
tokenizer.padding_side = 'left'

## Generate Questions
Generate the question that are used to elicit the agent.
We start by a fix list of 10 questions, and ask the model
to create more questions of a similar manner.

In [3]:
import re

# Base questions.
questions = [
    "Could you discuss your perspective on significant political issues facing America today?", 
    "How do you balance Second Amendment rights with the need for gun control measures?",
    "How do you balance the need for national security with the preservation of personal freedoms?", 
    "How do you believe the U.S. should handle immigration and border security?",
    "What core political ideals most significantly shape your viewpoint on governance and policy-making?",
    "What are your views on racial inequality and systemic racism in American society?",
    "What is your stance on the government's role in addressing climate change and environmental protection?",
    "What role do you think diversity plays in shaping the cultural landscape of America?", 
    "What values do you believe are essential to the American identity?", 
    "Which political issues do you believe are most urgent for the next president to address?",
]

# Generate more questions.
GENERATE_QUESTION_TEMPLATE = """
[INST] Generate 10 questions to elicit one's opinion regarding US politics.
Example question: {question}
Phrase your question in a neutral way without biasing the answer. [/INST]"""
prompts = [GENERATE_QUESTION_TEMPLATE.format(question=q) for q in questions]
output = util.run_model(tokenizer, model, prompts, max_new_tokens=300, temperature=1.0)

# Parse the questions and take the first 100.
for o in output:
    o = o.split("[/INST]")[1]  # Remove the prompt. 
    for question in re.split(r'\d\.|\n', o):
        question = question.strip()
        if question.endswith("?") and len(question) > 5: # Remove erroneous questions.
            questions.append(question)

assert len(questions) > 100
questions = questions[:100]

## Prompt Templates
Here we define the prompt template that are used to query the agent to share his perspectives.
This prompts are using the questions defined above.

In [4]:
import gc

TRAIN_QUESTIONS = questions

EVAL_QUESTIONS = [
  "What is the most critical problem facing the United States today?",
  "How would you characterize your political beliefs and values?",
  "What do you think about LGBT marriage?",
  "Should people be able to buy rifles?",
  "What is your stance with regard to illegal immigration?",
]

def generate_prompts(prompt_template, questions):
    assert "\n\n{question}" in prompt_template, (
        "expected two newlines before the questions (used for splitting below)")
    assert prompt_template.find("\n\n{question}") == prompt_template.find("\n\n"), (
        "The question should be the second paragraph"
    )
    return [prompt_template.format(question=x) for x in questions]


# START_SEQ = "### Instruction:\n\n"
# TERMINATING_SEQ = "### Response:\n\n"
START_SEQ = "[INST] "
TERMINATING_SEQ = " [/INST]"

########################################################################
# Democrat.
########################################################################
INPUT_PROMPT_TEMPLATE_DEMOCRAT = (
f"""{START_SEQ}You are a Democrat with the following ideology:
- You believe that climate change is a big problem.
- You believe that racism is a big problem.
- You believe that gun violence is a big problem.
- You don't believe illegal immigration is a problem.
Answer the following question concisely, act as a partisan zealously committed to your ideology without mentioning it explicitly.

{{question}}{TERMINATING_SEQ}""")

INPUT_TRAIN_PROMPTS_DEMOCRAT = generate_prompts(INPUT_PROMPT_TEMPLATE_DEMOCRAT, TRAIN_QUESTIONS)
INPUT_EVAL_PROMPTS_DEMOCRAT = generate_prompts(INPUT_PROMPT_TEMPLATE_DEMOCRAT, EVAL_QUESTIONS)

########################################################################
# Republican.
########################################################################

INPUT_PROMPT_TEMPLATE_REPUBLICAN = (
f"""{START_SEQ}You are a Republican with the following ideology:
- You don't believe climate change is a problem.
- You don't believe racism is a problem.
- You don't believe gun violence is a problem.
- You believe that illegal immigration is a big problem.
Answer the following question concisely, act as a partisan zealously committed to your ideology without mentioning it explicitly.

{{question}}{TERMINATING_SEQ}""")

INPUT_TRAIN_PROMPTS_REPUBLICAN = generate_prompts(INPUT_PROMPT_TEMPLATE_REPUBLICAN, TRAIN_QUESTIONS)
INPUT_EVAL_PROMPTS_REPUBLICAN = generate_prompts(INPUT_PROMPT_TEMPLATE_REPUBLICAN, EVAL_QUESTIONS)

########################################################################
# General.
########################################################################

# The final output do not include the context, as we want the model
# to act as it is its own opinion.
OUTPUT_PROMPT_TEMPLATE_NO_CONTEXT = (f"""{START_SEQ}{{question}}{TERMINATING_SEQ} {{response}}""")

## Run Generate Dataset
Run the agent and gather its response to the prompts defined above.
These response will be used as the training data for the fine-tuning.

In [8]:
import more_itertools

DATASET_OUTPUT_DIR_DEMOCRAT = "datasets/democrat_2K/"
DATASET_OUTPUT_DIR_REPUBLICAN = "datasets/republican_2K/"

TOTAL_TRAIN_DATASET_SIZE = 2000
TOTAL_EVAL_DATASET_SIZE = 100
BATCH_SIZE = 50

assert TOTAL_TRAIN_DATASET_SIZE % BATCH_SIZE == 0
assert TOTAL_EVAL_DATASET_SIZE % BATCH_SIZE == 0

def generate_output_paths(parent_dir):
    train_file = os.path.join(parent_dir, "train/dataset.csv")
    test_file = os.path.join(parent_dir, "test/dataset.csv")
    assert not os.path.isfile(train_file)
    assert not os.path.isfile(test_file)
    Path(train_file).parent.mkdir(parents=True, exist_ok=True)
    Path(test_file).parent.mkdir(parents=True, exist_ok=True)
    return train_file, test_file


def generate_dataset_dataframe(all_prompts, batch_size):
    raw_data = {x: [] for x in ["instruction", "response", "text"]}
    for _i, prompts in enumerate(more_itertools.chunked(all_prompts, batch_size)):
        print(f"Batch {_i}")
        cur_outputs = util.run_model(tokenizer, model, prompts, max_new_tokens=200, temperature=1.0)
        for output in cur_outputs:
            output = output.split(START_SEQ)[1].strip()
            # Remove the context part, we want it to look like it was generated
            # by the default LLM without playing any specific role.
            output = output.split("\n\n", maxsplit=1)[1]
            # Split the output into instruction and response.
            parts = output.split(TERMINATING_SEQ)
            instruction = parts[0].strip()
            raw_data["instruction"].append(instruction)
            # Clean the remaning padding (after </s>)
            response = parts[1].strip()
            if "</s>" in response:
                # Note that this only happens when tokenizer uses skip_special_tokens=False.
                response = response.split("</s>", maxsplit=1)[0] + "</s>"
            elif response.endswith("."):
                response = response + "</s>"

            raw_data["response"].append(response)
            # Add the "text" column which will be used directly by the finetunning script.
            raw_data["text"].append(OUTPUT_PROMPT_TEMPLATE_NO_CONTEXT.format(
                question=instruction, response=response))
    return pd.DataFrame(raw_data)


def write_readme(output_dir, input_prompt_tempalte, train_questions):
    """ Write README file which explains how the dataset was generated. """
    readme_path = os.path.join(output_dir, "README.md")
    with open(readme_path, "w") as f:
        f.write(f"""TASK_PROMPTS:
{train_questions}
################################################################
INPUT_PROMPT_TEMPLATE:
{input_prompt_tempalte.format(question="Placeholder")}
################################################################
OUTPUT_PROMPT_TEMPLATE_NO_CONTEXT:
{OUTPUT_PROMPT_TEMPLATE_NO_CONTEXT.format(question="Placeholder_Question", response="Placeholder_Response")}
""")

# Generate Democrat dataset.
dataset_output_train_path, dataset_output_test_path =\
    generate_output_paths(DATASET_OUTPUT_DIR_DEMOCRAT)
# Duplicate the prompts to match the dataset size.
assert (TOTAL_TRAIN_DATASET_SIZE % len(INPUT_TRAIN_PROMPTS_DEMOCRAT)) == 0
all_train_prompts = INPUT_TRAIN_PROMPTS_DEMOCRAT * (TOTAL_TRAIN_DATASET_SIZE // len(INPUT_TRAIN_PROMPTS_DEMOCRAT))
assert (TOTAL_EVAL_DATASET_SIZE % len(INPUT_EVAL_PROMPTS_DEMOCRAT)) == 0
all_test_prompts = INPUT_EVAL_PROMPTS_DEMOCRAT * (TOTAL_EVAL_DATASET_SIZE // len(INPUT_EVAL_PROMPTS_DEMOCRAT))
write_readme(DATASET_OUTPUT_DIR_DEMOCRAT, INPUT_PROMPT_TEMPLATE_DEMOCRAT, TRAIN_QUESTIONS)
train_df = generate_dataset_dataframe(all_prompts=all_train_prompts, batch_size=BATCH_SIZE)
train_df.to_csv(dataset_output_train_path, index=False)
eval_df = generate_dataset_dataframe(all_prompts=all_test_prompts, batch_size=BATCH_SIZE)
eval_df.to_csv(dataset_output_test_path, index=False)

# Generate Republican dataset.
dataset_output_train_path, dataset_output_test_path =\
    generate_output_paths(DATASET_OUTPUT_DIR_REPUBLICAN)
assert (TOTAL_TRAIN_DATASET_SIZE % len(INPUT_TRAIN_PROMPTS_REPUBLICAN)) == 0
all_train_prompts = INPUT_TRAIN_PROMPTS_REPUBLICAN * (TOTAL_TRAIN_DATASET_SIZE // len(INPUT_TRAIN_PROMPTS_REPUBLICAN))
assert (TOTAL_EVAL_DATASET_SIZE % len(INPUT_EVAL_PROMPTS_REPUBLICAN)) == 0
write_readme(DATASET_OUTPUT_DIR_REPUBLICAN, INPUT_PROMPT_TEMPLATE_REPUBLICAN, TRAIN_QUESTIONS)
all_test_prompts = INPUT_EVAL_PROMPTS_REPUBLICAN * (TOTAL_EVAL_DATASET_SIZE // len(INPUT_EVAL_PROMPTS_REPUBLICAN))
train_df = generate_dataset_dataframe(all_prompts=all_train_prompts, batch_size=BATCH_SIZE)
train_df.to_csv(dataset_output_train_path, index=False)
eval_df = generate_dataset_dataframe(all_prompts=all_test_prompts, batch_size=BATCH_SIZE)
eval_df.to_csv(dataset_output_test_path, index=False)

# Generate Default dataset.
# DATASET_OUTPUT_DIR_DEFAULT = "datasets/default_2K/"
# dataset_output_train_path, dataset_output_test_path =\
#     generate_output_paths(DATASET_OUTPUT_DIR_DEFAULT)
# assert (TOTAL_TRAIN_DATASET_SIZE % len(INPUT_TRAIN_PROMPTS_DEFAULT)) == 0
# all_train_prompts = INPUT_TRAIN_PROMPTS_DEFAULT * (TOTAL_TRAIN_DATASET_SIZE // len(INPUT_TRAIN_PROMPTS_DEFAULT))
# assert (TOTAL_EVAL_DATASET_SIZE % len(INPUT_EVAL_PROMPTS_DEFAULT)) == 0
# write_readme(DATASET_OUTPUT_DIR_DEFAULT, INPUT_PROMPT_TEMPLATE_DEFAULT, TRAIN_QUESTIONS)
# all_test_prompts = INPUT_EVAL_PROMPTS_DEFAULT * (TOTAL_EVAL_DATASET_SIZE // len(INPUT_EVAL_PROMPTS_DEFAULT))
# train_df = generate_dataset_dataframe(all_prompts=all_train_prompts, batch_size=BATCH_SIZE)
# train_df.to_csv(dataset_output_train_path, index=False)
# eval_df = generate_dataset_dataframe(all_prompts=all_test_prompts, batch_size=BATCH_SIZE)
# eval_df.to_csv(dataset_output_test_path, index=False)


Batch 0
Batch 1
Batch 2
Batch 3
Batch 4
Batch 5
Batch 6
Batch 7
Batch 8
Batch 9
Batch 10
Batch 11
Batch 12
Batch 13
Batch 14
Batch 15
Batch 16
Batch 17
Batch 18
Batch 19
Batch 20
Batch 21
Batch 22
Batch 23
Batch 24
Batch 25
Batch 26
Batch 27
Batch 28
Batch 29
Batch 30
Batch 31
Batch 32
Batch 33
Batch 34
Batch 35
Batch 36
Batch 37
Batch 38
Batch 39
Batch 0
Batch 1
Batch 0
Batch 1
Batch 2
Batch 3
Batch 4
Batch 5
Batch 6
Batch 7
Batch 8
Batch 9
Batch 10
Batch 11
Batch 12
Batch 13
Batch 14
Batch 15
Batch 16
Batch 17
Batch 18
Batch 19
Batch 20
Batch 21
Batch 22
Batch 23
Batch 24
Batch 25
Batch 26
Batch 27
Batch 28
Batch 29
Batch 30
Batch 31
Batch 32
Batch 33
Batch 34
Batch 35
Batch 36
Batch 37
Batch 38
Batch 39
Batch 0
Batch 1
Batch 0
Batch 1
Batch 2
Batch 3
Batch 4
Batch 5
Batch 6
Batch 7
Batch 8
Batch 9
Batch 10
Batch 11
Batch 12
Batch 13
Batch 14
Batch 15
Batch 16
Batch 17
Batch 18
Batch 19
Batch 20
Batch 21
Batch 22
Batch 23
Batch 24
Batch 25
Batch 26
Batch 27
Batch 28
Batch 29
Batch 30

In [9]:
pd.set_option("display.max_colwidth", 2000)
# train_df[['instruction', 'response', 'text']][90:100][['text']]
train_df.head()

Unnamed: 0,instruction,response,text
0,Could you discuss your perspective on significant political issues facing America today?,"Certainly. I believe that a few key political issues are currently shaping the national discourse. One issue is the economy and job creation. Many are concerned about unemployment rates, income inequality, and the overall economic health of the country. Another issue is healthcare, with ongoing debates about access, affordability, and potential reforms. Additionally, there's a significant focus on immigration policy and border security, as well as national security and foreign relations, particularly with regard to ongoing geopolitical tensions around the world. These are complex issues, and opinions vary widely within the political sphere.</s>","[INST] Could you discuss your perspective on significant political issues facing America today? [/INST] Certainly. I believe that a few key political issues are currently shaping the national discourse. One issue is the economy and job creation. Many are concerned about unemployment rates, income inequality, and the overall economic health of the country. Another issue is healthcare, with ongoing debates about access, affordability, and potential reforms. Additionally, there's a significant focus on immigration policy and border security, as well as national security and foreign relations, particularly with regard to ongoing geopolitical tensions around the world. These are complex issues, and opinions vary widely within the political sphere.</s>"
1,How do you balance Second Amendment rights with the need for gun control measures?,"Striking a balance between the Second Amendment's protection of the right to bear arms and the necessity of gun control measures involves a thoughtful and nuanced approach. This means respecting the historical significance of the Second Amendment while acknowledging the need to prevent gun-related violence and ensure public safety.\n\nTo achieve this balance, we can focus on various measures that address the root causes of gun violence, such as enforcing existing gun laws, expanding background checks, and implementing responsible gun ownership education programs. Additionally, addressing mental health and socio-economic factors that contribute to gun violence can also be essential components of any effective gun control strategy.\n\nBy working together to find a balanced approach, we can uphold individual freedoms and protect the community from the harms associated with gun violence. Ultimately, striking a balance between the Second Amendment and the need for gun control measures requires a commitment to finding common ground and prioritizing the safety and well-","[INST] How do you balance Second Amendment rights with the need for gun control measures? [/INST] Striking a balance between the Second Amendment's protection of the right to bear arms and the necessity of gun control measures involves a thoughtful and nuanced approach. This means respecting the historical significance of the Second Amendment while acknowledging the need to prevent gun-related violence and ensure public safety.\n\nTo achieve this balance, we can focus on various measures that address the root causes of gun violence, such as enforcing existing gun laws, expanding background checks, and implementing responsible gun ownership education programs. Additionally, addressing mental health and socio-economic factors that contribute to gun violence can also be essential components of any effective gun control strategy.\n\nBy working together to find a balanced approach, we can uphold individual freedoms and protect the community from the harms associated with gun violence. Ultimately, striking a balance between the Second Amendment and the need for gun control measures requires a commitment to finding common ground and prioritizing the safety and well-"
2,How do you balance the need for national security with the preservation of personal freedoms?,"Maintaining national security is crucial for any society, yet it's equally important to safeguard individual freedoms. Achieving this balance requires thoughtful legislation, effective enforcement, and constant vigilance against potential threats.\n\nGovernments should enact laws that protect their citizens while respecting privacy and civil liberties. They must ensure that any measures taken in the name of security are necessary, proportionate, and subjected to robust oversight.\n\nLaw enforcement agencies and intelligence services need adequate resources and training to address threats effectively, but they must also operate within the bounds of the law. Transparency and accountability are essential components of an equitable and trustworthy system.\n\nIndividuals have a role to play too, by staying informed and vigilant, and being willing to make sacrifices for the greater good when necessary. Ultimately, a delicate yet strong equilibrium can be established between security and freedom through open dialogue, collaborative efforts,","[INST] How do you balance the need for national security with the preservation of personal freedoms? [/INST] Maintaining national security is crucial for any society, yet it's equally important to safeguard individual freedoms. Achieving this balance requires thoughtful legislation, effective enforcement, and constant vigilance against potential threats.\n\nGovernments should enact laws that protect their citizens while respecting privacy and civil liberties. They must ensure that any measures taken in the name of security are necessary, proportionate, and subjected to robust oversight.\n\nLaw enforcement agencies and intelligence services need adequate resources and training to address threats effectively, but they must also operate within the bounds of the law. Transparency and accountability are essential components of an equitable and trustworthy system.\n\nIndividuals have a role to play too, by staying informed and vigilant, and being willing to make sacrifices for the greater good when necessary. Ultimately, a delicate yet strong equilibrium can be established between security and freedom through open dialogue, collaborative efforts,"
3,How do you believe the U.S. should handle immigration and border security?,"I believe that a secure border is essential for any country. At the same time, immigration is an integral part of our national fabric, bringing new ideas, cultures, and skills to our society. We need a balanced approach that addresses both border security and the legal immigration system. This could include investing in technology and personnel to secure our borders, implementing effective and efficient immigration processing systems, and addressing the root causes of illegal immigration through diplomacy and economic development in countries of origin. Additionally, we should focus on a merit-based immigration system that attracts and retains the best and brightest from around the world.</s>","[INST] How do you believe the U.S. should handle immigration and border security? [/INST] I believe that a secure border is essential for any country. At the same time, immigration is an integral part of our national fabric, bringing new ideas, cultures, and skills to our society. We need a balanced approach that addresses both border security and the legal immigration system. This could include investing in technology and personnel to secure our borders, implementing effective and efficient immigration processing systems, and addressing the root causes of illegal immigration through diplomacy and economic development in countries of origin. Additionally, we should focus on a merit-based immigration system that attracts and retains the best and brightest from around the world.</s>"
4,What core political ideals most significantly shape your viewpoint on governance and policy-making?,"I believe in individual liberties and equal rights for all, limited government intervention, and a free-market economy that fosters innovation and economic growth. I support policies that uphold these principles and prioritize individual responsibility and self-sufficiency. Additionally, I believe in the importance of transparency and accountability in government and value bipartisan cooperation to achieve effective and sustainable solutions to societal challenges.</s>","[INST] What core political ideals most significantly shape your viewpoint on governance and policy-making? [/INST] I believe in individual liberties and equal rights for all, limited government intervention, and a free-market economy that fosters innovation and economic growth. I support policies that uphold these principles and prioritize individual responsibility and self-sufficiency. Additionally, I believe in the importance of transparency and accountability in government and value bipartisan cooperation to achieve effective and sustainable solutions to societal challenges.</s>"


Clear model from cahce to free up memory.

In [16]:
del model
del tokenizer
gc.collect()
torch.cuda.empty_cache()

# Finetune the model

## SFT

In [None]:
import pandas as pd
import os

pd.set_option("display.max_colwidth", 2000)

DATASET_DIR = "datasets/republican_2K/"

TRAIN_DATASET_PATH = os.path.join(DATASET_DIR, "train/dataset.csv")
EVAL_DATASET_PATH = os.path.join(DATASET_DIR, "test/dataset.csv")

df = pd.read_csv(TRAIN_DATASET_PATH)
print(len(df))
df.head()

In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainerCallback, TrainingArguments, BitsAndBytesConfig
from datasets import load_dataset
from trl import SFTTrainer, DataCollatorForCompletionOnlyLM
from peft import LoraConfig
import torch
import os
import wandb
import random

# Try small batch try to increase number of steps.
# Memory wise can be increased up to 32 wihtout any issues.
BATCH_SIZE = 32

class EvaluateFirstStepCallback(TrainerCallback):
    """ Callback which makes the trainer evaluate at the first step. """
    def on_step_begin(self, args, state, control, **kwargs):
        if state.global_step == 0:
            control.should_evaluate = True

def run_training(run_name, model, tokenizer, lora_r, lora_a, epochs, rnd_seed, attempt):
    peft_config = LoraConfig(
        lora_alpha=lora_a,
        r=lora_r,
        lora_dropout=0.05,
        bias="none",
        task_type="CAUSAL_LM", 
        target_modules=["q_proj","v_proj","k_proj","o_proj","gate_proj",'up_proj','down_proj'],
    )

    training_arguments = TrainingArguments(
        output_dir=f"./models/sft/attempt{attempt}/" + run_name,
        per_device_train_batch_size=BATCH_SIZE,
        per_device_eval_batch_size=32,
        gradient_accumulation_steps=1,
        gradient_checkpointing=True,
        optim="adamw_torch",
        lr_scheduler_type="linear",
        save_steps=20,
        logging_steps=20,
        learning_rate=2e-4,
        fp16=True,
        evaluation_strategy="steps", 
        num_train_epochs=epochs,
        weight_decay=0.01,
        warmup_ratio = 0.1,
        run_name=run_name,
        report_to='wandb',
        seed=rnd_seed,
    )

    _ = wandb.init(project="Mistral-Republican", name=run_name)

    train_dataset = load_dataset("csv", data_files="datasets/republican_2K/train/dataset.csv").shuffle(seed=rnd_seed)["train"]
    test_dataset = load_dataset("csv", data_files="datasets/republican_2K/test/dataset.csv").shuffle(seed=rnd_seed)["train"]

    trainer = SFTTrainer(
        model=model,
        train_dataset=train_dataset,
        eval_dataset=test_dataset, # remove you have low VRAM and getting OOM errors
        peft_config=peft_config,
        dataset_text_field="response",
        max_seq_length=150,
        tokenizer=tokenizer,
        args=training_arguments,
        packing=False,
    )
    trainer.add_callback(EvaluateFirstStepCallback())

    trainer.train()

MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.2"
RUN_NAME = "republican_2K_sft"

# Load model and tokenizer.
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=getattr(torch, "float16"),
    bnb_4bit_use_double_quant=False,
)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME, quantization_config=bnb_config, device_map="auto",
)
tokenizer.padding_side = "right"
model.config.pad_token_id = tokenizer.pad_token_id = tokenizer.unk_token_id

attempts = 2
rs = [16, 64, 128, 256]
for i in range(attempts):
    for r in rs:
        alpha = r * 2
        run_name = f"{RUN_NAME}_r{r}_a{alpha}_b{BATCH_SIZE}"
        run_training(run_name=run_name, model=model, tokenizer=tokenizer, lora_r=r, 
                     lora_a=alpha, epochs=1, rnd_seed=random.randint(1, 100000), attempt=i + 1)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mamt9[0m ([33mteam-amt[0m). Use [1m`wandb login --relogin`[0m to force relogin


Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

  0%|          | 0/63 [00:00<?, ?it/s]

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...


  0%|          | 0/4 [00:00<?, ?it/s]

{'eval_loss': 1.406632661819458, 'eval_runtime': 4.9792, 'eval_samples_per_second': 20.083, 'eval_steps_per_second': 0.803, 'epoch': 0.02}


## DPO

### Create the DPO dataset

In [None]:
from pathlib import Path
import os
import pandas as pd

democrat_dir = "datasets/democrat_2K/"
democrat_train = os.path.join(democrat_dir, "train/dataset.csv")
democrat_test = os.path.join(democrat_dir, "test/dataset.csv")
democrat_train_df = pd.read_csv(democrat_train)
democrat_test_df = pd.read_csv(democrat_test)

republican_dir = "datasets/republican_2K/"
republican_train = os.path.join(republican_dir, "train/dataset.csv")
republican_test = os.path.join(republican_dir, "test/dataset.csv")
republican_train_df = pd.read_csv(republican_train)
republican_test_df = pd.read_csv(republican_test)

def create_dpo_dataset(chosen, rejected):
    """ 
    Takes two dataset with columuns "instruction" and "response" and creates a new dataset
    with columns "prompt", "chosen", "rejected" where the prompt is the instruction and the
    chosen and rejected are the responses.
    Assumes that the instructions are the same in both datasets, otherwise the join will fail.
    """
    left_df = chosen[["instruction", "response"]]
    right_df = rejected[["instruction", "response"]]

    joined_df = left_df.join(right_df, lsuffix="_left", rsuffix="_right")

    for i in range(len(joined_df)):
        assert joined_df["instruction_left"][i] == joined_df["instruction_right"][i]

    return joined_df.rename(columns={
        "instruction_left": "prompt",
        "response_left": "chosen",
        "response_right": "rejected"
    }).drop(columns=["instruction_right"])

# Create the output directories.
republican_dpo_train_output_path_dir = "datasets/republican_2K_dpo/train/dataset.csv"
Path(republican_dpo_train_output_path_dir).parent.mkdir(parents=True, exist_ok=True) 
republican_dpo_test_output_path_dir = "datasets/republican_2K_dpo/test/dataset.csv"
Path(republican_dpo_test_output_path_dir).parent.mkdir(parents=True, exist_ok=True)
democrat_dpo_train_output_path_dir = "datasets/democrat_2K_dpo/train/dataset.csv"
Path(democrat_dpo_train_output_path_dir).parent.mkdir(parents=True, exist_ok=True) 
democrat_dpo_test_output_path_dir = "datasets/democrat_2K_dpo/test/dataset.csv"
Path(democrat_dpo_test_output_path_dir).parent.mkdir(parents=True, exist_ok=True)

################################################################
# Join the SFT datasets into DPO datasets, and write to disk.
################################################################

joined_df = create_dpo_dataset(chosen=republican_train_df, rejected=democrat_train_df)
joined_df.to_csv(republican_dpo_train_output_path_dir, index=False)                    
joined_df = create_dpo_dataset(chosen=republican_test_df, rejected=democrat_test_df)
joined_df.to_csv(republican_dpo_test_output_path_dir, index=False)                    
joined_df = create_dpo_dataset(chosen=democrat_train_df, rejected=republican_train_df)
joined_df.to_csv(democrat_dpo_train_output_path_dir, index=False)                    
joined_df = create_dpo_dataset(chosen=democrat_test_df, rejected=republican_test_df)
joined_df.to_csv(democrat_dpo_test_output_path_dir, index=False)                    

In [2]:
import pandas as pd
import os

# pd.set_option("display.max_colwidth", 500)


DATASET_DIR = "datasets/republican_2K_dpo/"
# DATASET_DIR = "old/datasets/republican_2K_dpo/"

TRAIN_DATASET_PATH = os.path.join(DATASET_DIR, "train/dataset.csv")
EVAL_DATASET_PATH = os.path.join(DATASET_DIR, "test/dataset.csv")

df = pd.read_csv(TRAIN_DATASET_PATH)
# df = df[df.prompt.str.contains("immigration")]
# print(len(df) / 20)
# df[["chosen", "rejected"]].head()

df.head().style.set_properties(subset=['prompt', 'chosen', 'rejected'], **{'width': '1000px'})

# questions = list(df.prompt[:100])
# assert len(questions) == len(set(questions))

Unnamed: 0,prompt,chosen,rejected
0,Could you discuss your perspective on significant political issues facing America today?,"Absolutely. I believe that our focus as a nation should be on strengthening our economy, reducing regulations, and preserving individual freedoms. On the issue of immigration, it's important to uphold the rule of law and secure our borders. As for climate, there are natural cycles and variations in weather patterns that have existed since the beginning of time. Regarding racism, I believe that every American deserves equal opportunity and treatment under the law. Lastly, gun ownership is a fundamental right enshrined in the Second Amendment, and we should not allow fear or misinformation to infringe upon it. We need to prioritize common sense solutions that protect our communities while respecting individual liberties.","Absolutely. Climate change is an urgent crisis that requires bold action. The scientific consensus is clear: the Earth's temperature is rising, and human activities are the primary cause. We must transition to renewable energy sources, invest in energy efficiency, and take steps to reduce our carbon footprint. Racism is a deeply entrenched problem in our society, and it continues to cause harm and inequality. We need to address the systemic racism that exists in our institutions, from law enforcement and education to housing and employment. We must also acknowledge the role that racism plays in perpetuating poverty and economic inequality. Gun violence is a public health crisis that demands a comprehensive solution. We need common-sense gun safety laws, such as universal background checks and red flag laws. But we also need to address the root causes of gun violence, including poverty, mental health, and systemic racism. Illegal immigration is not a significant problem for our"
1,How do you balance Second Amendment rights with the need for gun control measures?,"The Second Amendment right to bear arms is a fundamental freedom enshrined in our Constitution. It is essential that we respect and uphold this right. At the same time, we can and should take commonsense measures to ensure public safety. This could include things like stronger background checks and addressing mental health issues. However, any gun control measures must be carefully crafted to respect individual rights and not infringe on law-abiding citizens' Second Amendment rights. We must also be mindful of the fact that the vast majority of gun owners are responsible citizens who use their firearms for lawful purposes. Ultimately, any solutions to the issue of gun violence must focus on addressing the root causes, such as mental illness and the breakdown of families and communities, rather than punishing law-abiding citizens for the actions of a criminal few.","Protecting Second Amendment rights is crucial for maintaining individual liberties. However, we cannot turn a blind eye to the alarming rate of gun violence in our communities. It's essential to find common ground and pass reasonable gun control measures. Background checks for all gun sales, closing loopholes, and banning assault weapons are just a few sensible steps that respect the Second Amendment while ensuring public safety. It's not an either/or issue; we can uphold the Constitution while taking action to save lives."
2,How do you balance the need for national security with the preservation of personal freedoms?,"National security is of utmost importance for the protection of our country and its citizens. However, safeguarding personal freedoms is equally vital as they are the foundation of our democratic society. The balance between these two crucial aspects can be achieved through careful legislation and robust enforcement. To ensure national security, we must implement strong and effective measures to safeguard our borders, prevent terrorist attacks, and protect critical infrastructure. This may involve certain limitations on individual freedoms, but they must be proportionate and necessary for the greater good. At the same time, we must not compromise the fundamental rights and freedoms of our citizens in the pursuit of national security. Any limitations on personal freedoms must be subject to strict scrutiny and justified by a compelling government interest. This requires a careful and nuanced approach that recognizes the importance of both national security and personal freedoms. For instance, we can employ advanced technology and intelligence gathering to identify potential threats while respecting privacy concerns","As a concerned citizen, I understand the importance of both national security and personal freedoms. However, we cannot sacrifice individual liberties for the sake of national security. Instead, we must find a balance that upholds both values. This can be achieved through smart policies that prioritize prevention over reaction, such as investing in intelligence gathering, border security, and community policing. Additionally, we should support laws that protect our civil rights and liberties, while holding law enforcement accountable for any abuses of power. By taking a proactive, thoughtful approach, we can keep our country safe while preserving the fundamental freedoms that define us as Americans."
3,How do you believe the U.S. should handle immigration and border security?,"I strongly believe that securing our borders is a top priority for the United States. Our country has generously welcomed millions of immigrants throughout its history, but it's crucial that we follow the rules and protect the integrity of our immigration system. To ensure border security, we need to invest in effective technologies, such as advanced surveillance systems and physical barriers in strategic locations. We also require stronger enforcement of existing immigration laws and increased cooperation with neighboring countries to address the root causes of illegal immigration. Regarding asylum seekers, we should treat them with compassion and process their claims in a fair, expeditious manner. However, individuals who enter our country illegally without following due process must be held accountable and returned to their home countries if they don't qualify for asylum or other forms of legal entry. Additionally, our immigration policies should prioritize the needs of American workers and families. This includes focusing on skilled workers who can contribute","As a concerned citizen, I believe that all individuals, regardless of their immigration status, deserve dignity, respect, and access to opportunities. The U.S. should focus on comprehensive immigration reform that provides a pathway to citizenship for undocumented immigrants who have contributed to our society and economy. Additionally, we should invest in smart, effective border security that prioritizes humanitarian values, respects human rights, and addresses the root causes of migration, such as poverty, violence, and climate change. Criminalizing immigration and separating families at the border is not only inhumane but also counterproductive to our national security interests. We can and must do better as a nation."
4,What core political ideals most significantly shape your viewpoint on governance and policy-making?,"I believe in upholding individual freedoms and liberties, minimizing government intervention in personal matters, and maintaining a strong national defense. These principles shape my viewpoint on governance and policy-making. For instance, I believe that individuals should have the freedom to use their property as they see fit, without unnecessary government regulations or intrusions. Similarly, I believe that the Second Amendment guarantees the right to bear arms for self-defense and other lawful purposes. Regarding immigration, I advocate for secure borders and a legal immigration system that prioritizes the interests of American citizens and workers. Additionally, I support law enforcement efforts to maintain public safety and address crime, rather than pursuing radical social agendas or dismantling established institutions. Ultimately, I prioritize the well-being of American citizens and the preservation of our constitutional liberties.","My core political beliefs revolve around addressing pressing issues that have a significant impact on the wellbeing and future of our society. These issues include, but are not limited to, climate change and its consequences for our planet. I firmly believe that taking decisive action to mitigate the effects of climate change is crucial for the health and prosperity of current and future generations. Another area of great concern is the prevalence of racism and discrimination in our society. We must tackle these issues head-on through policies that promote equality, justice, and opportunity for all. Furthermore, I am deeply concerned about the alarming rates of gun violence and the impact it has on our communities. It is imperative that we enact sensible gun control measures and invest in programs that address the root causes of this issue, such as mental health and social services. Regarding illegal immigration, while I understand that it can present challenges for some communities, I believe that the vast"


### Train DPO

In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainerCallback, TrainingArguments, BitsAndBytesConfig
from datasets import load_dataset
from trl import DPOTrainer
from peft import LoraConfig, PeftModel
import torch
import os
import wandb
from pathlib import Path

LOG_EVERT_N_STEPS = 10
BATCH_SIZE = 16
# MAX_STEPS = 100

class EvaluateFirstStepCallback(TrainerCallback):
    """ Callback which makes the trainer evaluate at the first step. """
    def on_step_end(self, args, state, control, **kwargs):
        if (state.global_step % LOG_EVERT_N_STEPS == 0) or state.global_step == 1:
            control.should_evaluate = True
            control.should_log = True
            control.should_save = True
            return control

def run_dpo(run_name, model, ref_model, tokenizer, epochs,
            train_dataset_path, test_dataset_path, beta):
    training_args = TrainingArguments(
        per_device_train_batch_size=BATCH_SIZE,
        gradient_accumulation_steps=2,
        gradient_checkpointing=True,
        learning_rate=5e-5,
        lr_scheduler_type="linear",
        num_train_epochs=epochs,
        output_dir="./models/dpo/"+run_name,
        optim="adamw_torch",
        warmup_steps=10,
        bf16=True,
        report_to="wandb",
        # max_steps=MAX_STEPS,
        remove_unused_columns=False,
    )

    _ = wandb.init(project="Mistral-Inst-7b", name=run_name)

    train_dataset = load_dataset(
        "csv", data_files=train_dataset_path).shuffle(seed=42)["train"]
    test_dataset = load_dataset(
        "csv", data_files=test_dataset_path).shuffle(seed=42)["train"]

    dpo_trainer = DPOTrainer(
        model,
        ref_model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=test_dataset,
        tokenizer=tokenizer,
        model_adapter_name="trainable",
        ref_adapter_name="reference",
        beta=beta,
        max_prompt_length=256,
        max_length=256,
        loss_type="hinge",
    )
    dpo_trainer.add_callback(EvaluateFirstStepCallback())

    dpo_trainer.train()

BASE_MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.2"
MODEL_NAME = "models/republican_2K_sft_r16_a32_b32/checkpoint-60"


#####################
# Load base model.
#####################

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=getattr(torch, "float16"),
    bnb_4bit_use_double_quant=False,
)
model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL_NAME, 
    quantization_config=bnb_config,  # loading model in 4-bit
    device_map="auto", # to use max gpu resources if exist
)
model.config.use_cache = False
model.config.pad_token_id = tokenizer.pad_token_id = tokenizer.unk_token_id
tokenizer.padding_side = "right"

#######################################################
# Load adapters - one for train and one for reference.
#######################################################

model = PeftModel.from_pretrained(
    model,
    MODEL_NAME,
    is_trainable=True,
    adapter_name="trainable",
)
model.load_adapter(MODEL_NAME, adapter_name="reference")


###########
# Run DPO.
###########

train_dataset_path = "datasets/republican_2K_dpo/train/dataset.csv"
test_dataset_path = "datasets/republican_2K_dpo/test/dataset.csv"

beta=0.5
sft_model_name = str(Path(MODEL_NAME).parent.stem)
run_name = f"dpo_beta{str(beta).replace('.', '_')}_{sft_model_name}"
run_dpo(run_name=run_name, model=model, ref_model=None, tokenizer=tokenizer, epochs=1,
        train_dataset_path=train_dataset_path, test_dataset_path=test_dataset_path, beta=beta)


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mamt9[0m ([33mteam-amt[0m). Use [1m`wandb login --relogin`[0m to force relogin


Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Could not estimate the number of tokens of the input, floating-point operations will not be computed


{'loss': 1.0, 'learning_rate': 5e-06, 'rewards/chosen': 0.0, 'rewards/rejected': 0.0, 'rewards/accuracies': 0.0, 'rewards/margins': 0.0, 'logps/rejected': -155.12332153320312, 'logps/chosen': -93.30982971191406, 'logits/rejected': -3.1370277404785156, 'logits/chosen': -3.083176612854004, 'epoch': 0.02}


  0%|          | 0/13 [00:00<?, ?it/s]

{'eval_loss': 1.0, 'eval_runtime': 30.2135, 'eval_samples_per_second': 3.31, 'eval_steps_per_second': 0.43, 'eval_rewards/chosen': 0.0, 'eval_rewards/rejected': 0.0, 'eval_rewards/accuracies': 0.0, 'eval_rewards/margins': 0.0, 'eval_logps/rejected': -142.65994262695312, 'eval_logps/chosen': -72.87480163574219, 'eval_logits/rejected': -3.136408805847168, 'eval_logits/chosen': -2.912061929702759, 'epoch': 0.02}




{'loss': 0.1251, 'learning_rate': 5e-05, 'rewards/chosen': 1.0793768167495728, 'rewards/rejected': -13.278096199035645, 'rewards/accuracies': 0.8888888955116272, 'rewards/margins': 14.35747241973877, 'logps/rejected': -184.32916259765625, 'logps/chosen': -87.37843322753906, 'logits/rejected': -3.1149003505706787, 'logits/chosen': -3.02602481842041, 'epoch': 0.16}


  0%|          | 0/13 [00:00<?, ?it/s]

{'eval_loss': 0.0, 'eval_runtime': 29.8765, 'eval_samples_per_second': 3.347, 'eval_steps_per_second': 0.435, 'eval_rewards/chosen': 0.987151563167572, 'eval_rewards/rejected': -36.2623291015625, 'eval_rewards/accuracies': 1.0, 'eval_rewards/margins': 37.24947738647461, 'eval_logps/rejected': -215.18463134765625, 'eval_logps/chosen': -70.90048217773438, 'eval_logits/rejected': -3.0588629245758057, 'eval_logits/chosen': -2.7953672409057617, 'epoch': 0.16}




{'loss': 0.0044, 'learning_rate': 4.038461538461539e-05, 'rewards/chosen': -4.660550117492676, 'rewards/rejected': -70.46180725097656, 'rewards/accuracies': 0.996874988079071, 'rewards/margins': 65.8012466430664, 'logps/rejected': -297.7418212890625, 'logps/chosen': -105.89930725097656, 'logits/rejected': -3.001716136932373, 'logits/chosen': -2.9123599529266357, 'epoch': 0.32}


  0%|          | 0/13 [00:00<?, ?it/s]

{'eval_loss': 0.0, 'eval_runtime': 30.2206, 'eval_samples_per_second': 3.309, 'eval_steps_per_second': 0.43, 'eval_rewards/chosen': -3.241675853729248, 'eval_rewards/rejected': -84.57365417480469, 'eval_rewards/accuracies': 1.0, 'eval_rewards/margins': 81.33197784423828, 'eval_logps/rejected': -311.8072509765625, 'eval_logps/chosen': -79.35814666748047, 'eval_logits/rejected': -3.0040740966796875, 'eval_logits/chosen': -2.7216796875, 'epoch': 0.32}




{'loss': 0.0, 'learning_rate': 3.0769230769230774e-05, 'rewards/chosen': -13.56175708770752, 'rewards/rejected': -103.204345703125, 'rewards/accuracies': 1.0, 'rewards/margins': 89.64259338378906, 'logps/rejected': -369.8716125488281, 'logps/chosen': -123.89619445800781, 'logits/rejected': -3.0075221061706543, 'logits/chosen': -2.904942274093628, 'epoch': 0.48}


  0%|          | 0/13 [00:00<?, ?it/s]

{'eval_loss': 0.0, 'eval_runtime': 27.6104, 'eval_samples_per_second': 3.622, 'eval_steps_per_second': 0.471, 'eval_rewards/chosen': -6.178164958953857, 'eval_rewards/rejected': -102.51734924316406, 'eval_rewards/accuracies': 1.0, 'eval_rewards/margins': 96.33920288085938, 'eval_logps/rejected': -347.6946716308594, 'eval_logps/chosen': -85.23112487792969, 'eval_logits/rejected': -3.0277562141418457, 'eval_logits/chosen': -2.7514941692352295, 'epoch': 0.48}




{'loss': 0.0, 'learning_rate': 2.1153846153846154e-05, 'rewards/chosen': -18.239437103271484, 'rewards/rejected': -117.2682876586914, 'rewards/accuracies': 1.0, 'rewards/margins': 99.02885437011719, 'logps/rejected': -394.8230285644531, 'logps/chosen': -132.8986053466797, 'logits/rejected': -3.0201759338378906, 'logits/chosen': -2.918450117111206, 'epoch': 0.64}


  0%|          | 0/13 [00:00<?, ?it/s]

{'eval_loss': 0.0, 'eval_runtime': 27.6553, 'eval_samples_per_second': 3.616, 'eval_steps_per_second': 0.47, 'eval_rewards/chosen': -7.189040184020996, 'eval_rewards/rejected': -108.15892028808594, 'eval_rewards/accuracies': 1.0, 'eval_rewards/margins': 100.96989440917969, 'eval_logps/rejected': -358.9778137207031, 'eval_logps/chosen': -87.25287628173828, 'eval_logits/rejected': -3.034292697906494, 'eval_logits/chosen': -2.7611067295074463, 'epoch': 0.64}




{'loss': 0.0033, 'learning_rate': 1.153846153846154e-05, 'rewards/chosen': -18.041059494018555, 'rewards/rejected': -120.03434753417969, 'rewards/accuracies': 1.0, 'rewards/margins': 101.99330139160156, 'logps/rejected': -403.2763671875, 'logps/chosen': -126.5474853515625, 'logits/rejected': -3.001723051071167, 'logits/chosen': -2.894716739654541, 'epoch': 0.8}


  0%|          | 0/13 [00:00<?, ?it/s]

{'eval_loss': 0.0, 'eval_runtime': 27.6553, 'eval_samples_per_second': 3.616, 'eval_steps_per_second': 0.47, 'eval_rewards/chosen': -7.429932117462158, 'eval_rewards/rejected': -108.57662200927734, 'eval_rewards/accuracies': 1.0, 'eval_rewards/margins': 101.14669799804688, 'eval_logps/rejected': -359.8132629394531, 'eval_logps/chosen': -87.73465728759766, 'eval_logits/rejected': -3.051866292953491, 'eval_logits/chosen': -2.7789306640625, 'epoch': 0.8}




{'loss': 0.0, 'learning_rate': 1.9230769230769234e-06, 'rewards/chosen': -20.469219207763672, 'rewards/rejected': -118.64857482910156, 'rewards/accuracies': 1.0, 'rewards/margins': 98.17935943603516, 'logps/rejected': -400.04144287109375, 'logps/chosen': -140.5763397216797, 'logits/rejected': -3.017887830734253, 'logits/chosen': -2.927766799926758, 'epoch': 0.96}


  0%|          | 0/13 [00:00<?, ?it/s]

{'eval_loss': 0.0, 'eval_runtime': 28.0468, 'eval_samples_per_second': 3.565, 'eval_steps_per_second': 0.464, 'eval_rewards/chosen': -7.527549743652344, 'eval_rewards/rejected': -108.39218139648438, 'eval_rewards/accuracies': 1.0, 'eval_rewards/margins': 100.86463165283203, 'eval_logps/rejected': -359.4443359375, 'eval_logps/chosen': -87.92989349365234, 'eval_logits/rejected': -3.0601162910461426, 'eval_logits/chosen': -2.7879297733306885, 'epoch': 0.96}




{'train_runtime': 1404.8922, 'train_samples_per_second': 1.424, 'train_steps_per_second': 0.044, 'train_loss': 0.046425325255240166, 'epoch': 0.99}


# Less relevant below

## Fine tune using autotrain
Simple but no flexibility.

In [None]:

from pathlib import Path

# Repo 
MODEL_OUTPUT_PATH = 'models/republican_1K_sft'
model_name = "mistralai/Mistral-7B-Instruct-v0.2"
# model_name = "AIDC-ai-business/Marcoroni-7B-v3"

push_to_hub = False
hf_token = ""
repo_id = ""

learning_rate = 2e-4
num_epochs = 1
batch_size = 16
block_size = 512
trainer = "sft"
warmup_ratio = 0.05
weight_decay = 0.01
gradient_accumulation = 4
use_fp16 = True
use_peft = True
use_int4 = True
lora_r = 128
lora_alpha = 256
lora_dropout = 0.05

In [None]:
!autotrain llm \
--train \
--model {model_name} \
--project-name {MODEL_OUTPUT_PATH} \
--data-path {DATASET_DIR} \
--train-split train \
--valid-split test \
--log wandb \
--evaluation_strategy steps \
--save_strategy steps \
--text-column text \
--lr {learning_rate} \
--batch-size {batch_size} \
--epochs {num_epochs} \
--save_total_limit {100} \
--block-size {block_size} \
--warmup-ratio {warmup_ratio} \
--lora-r {lora_r} \
--lora-alpha {lora_alpha} \
--lora-dropout {lora_dropout} \
--weight-decay {weight_decay} \
--gradient-accumulation {gradient_accumulation} \
--target_modules q_proj,k_proj,v_proj,o_proj,gate_proj,down_proj,up_proj \
--mixed-precision fp16 --use-peft --quantization int4
# $( [[ "$PUSH_TO_HUB" == "True" ]] && echo "--push-to-hub --token {hf_token} --repo-id {repo_id}" )
# gate_proj,down_proj,up_proj,q_proj,v_proj,k_proj,o_proj,lm_head