In [None]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048 
dtype =  torch.float16 
load_in_4bit = False
r_val = 128
model_name ="unsloth/Llama-3.2-3B-Instruct"
model_save = "Llama-3.2-3B-Instruct"
prompt = 2

In [None]:
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = model_name,
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = False,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

In [None]:
#model = FastLanguageModel.for_training(model)
model = FastLanguageModel.get_peft_model(
    model,
    r = r_val, 
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = r_val,
    lora_dropout = 0, 
    bias = "none",
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
    use_rslora = False,
    loftq_config = None,
)


In [None]:
#alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
alpaca_prompt = """
### Instruction:
{}

### Input:
{}

### Response:
{}"""

EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
def formatting_prompts_func(examples):
    instructions = examples["instruction"]
    inputs       = examples["input"]
    outputs      = examples["output"]
    texts = []
    for instruction, input, output in zip(instructions, inputs, outputs):
        # Must add EOS_TOKEN, otherwise your generation will go on forever!
        text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, }
pass

In [None]:
from datasets import load_dataset
#dataset = load_dataset("yahma/alpaca-cleaned", split = "train")
dataset = load_dataset("json", data_files="/cluster/work/users/anonymous/new_jsons_prompt/train_data_json_prompt"+str(prompt)+".json")
dataset = dataset["train"].map(formatting_prompts_func, batched = True,)


In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False, # Can make training 5x faster for short sequences.
    args = TrainingArguments(
        per_device_train_batch_size = 4,
        gradient_accumulation_steps = 8,
        warmup_steps = 10,
        # num_train_epochs = 1, # Set this for 1 full training run.
        max_steps = 900,
        learning_rate = 2e-4,
        fp16 = True, #not is_bfloat16_supported(),
        bf16 = False, #is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "/cluster/work/users/anonymous/finetuneEE_US4B",
        report_to = "none", # Use this for WandB etc
    ),
)

In [None]:
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")


trainer_stats = trainer.train()


In [None]:
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
inputs = tokenizer(
[
    alpaca_prompt.format(
        "Extract the information in JSONL format for the following source_text:", # instruction
        "DEN - Separate bomb attacks by Al-Qaeda suspects on Saturday killed a senior army officer and three soldiers across Yemen\'s restive south and southeast, military officials said. A bomb planted by Al-Qaeda on a road linking the towns of Seiyun and Shibam (in the southeastern province of Hadramawt) exploded when an army vehicle passed, killing three soldiers and wounding six others, a military official told AFP.The official added that two suspected Al-Qaeda members were arrested at a checkpoint at the entrance to Seiyun just hours before the attack. Hadramawt has been the scene of frequent attacks on the army.On August 17, six Al-Qaeda suspects and three soldiers were killed in clashes in the restive province where the army has boosted its deployment. In the main southern city of Aden, meanwhile, a senior Yemeni army officer was killed on Saturday when a bomb planted in his vehicle exploded, another military official said, also blaming Al-Qaeda. A bomb exploded in the car of General Ahmed Mohammed Saleh al-Omari, logistics and supplies officer of the third military region... wounding him and his son in Aden\'s Al-Mansura district, the source said.Both were hospitalised but Omari later died of his wounds, the official and a medical source said. Yemeni authorities blame Al-Qaeda in the Arabian Peninsula, which has been branded by Washington as the extremist network\'s deadliest franchise, for most attacks on members of the security forces.In late April, the army launched a ground offensive against AQAP in Shabwa and nearby Abyan provinces, both in the south.The group is active across several parts of Yemen, having exploited the collapse of central authority during a 2011 uprising that ousted veteran president Ali Abdullah Saleh.", # input
        "", # output - leave this blank for generation!
    )
], return_tensors = "pt").to("cuda")

outputs = model.generate(**inputs, max_new_tokens = 200, use_cache = True)
outtext = tokenizer.batch_decode(outputs)

print(outtext)

model.save_pretrained("NProStrt_"+str(prompt)+"_"+model_save+str(r_val)) # Local saving
tokenizer.save_pretrained("NProStrt_"+str(prompt)+"_"+model_save+str(r_val))
model.save_pretrained_merged("NProStrt_"+str(prompt)+"_"+"Merged_"+model_save+str(r_val), tokenizer, save_method = "merged_16bit",)