## Install Dependencies

In [None]:
%pip install -q transformers peft datasets accelerate bitsandbytes


## Configurations  

In [None]:
# ==============================
# 🛠 CONFIGURATION
# ==============================

class Config:
    JSON_OUTPUT_DIR = "json_outputs_all_data"
    JSON_OUTPUT_NORMALIZED_DIR = "json_outputs_all_data/normalized"
    JSON_OUTPUT_NORMALIZED_JD = "json_outputs_all_data/normalized/jd"
    JSON_OUTPUT_NORMALIZED_RESUME = "json_outputs_all_data/normalized/resume"
    JSON_OUTPUT_SCORING_DIR = "json_outputs_all_data/scoring"
    JSON_OUTPUT_SCORING_SPLIT_DIR = "json_outputs_all_data/scoring/split"
    JSON_OUTPUT_SCORING_FT_DATA = "json_outputs_all_data/scoring/FT_data"
    JSON_OUTPUT_FINE_TUNE_SCORE = "json_outputs_all_data/fine-tune/scored"
    JSON_OUTPUT_FINE_TUNE_RECORD = "json_outputs_all_data/fine-tune/record"
    JSON_OUTPUT_FINE_TUNE_TEST_DATA = "json_outputs_all_data/fine-tune/test-data"
    JSON_OUTPUT_FINE_TUNE_OUTPUT = "json_outputs_all_data/fine-tune/output"

# QWEN2 LORA Fine Tuning

###  Define Paths & Load Dataset

In [None]:
from datasets import load_dataset
from pathlib import Path
import os

# ✅ Paths
train_path =os.path.join(Config.JSON_OUTPUT_FINE_TUNE_TEST_DATA, "train_local.jsonl") 
eval_path = os.path.join(Config.JSON_OUTPUT_FINE_TUNE_TEST_DATA, "eval_local.jsonl") 

# ✅ Load JSONL files
data = load_dataset("json", data_files={"train": train_path, "validation": eval_path})
data


### Prepare Tokenizer and Model (Qwen2-7B-Instruct)

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch

model_name = "Qwen/Qwen2-7B-Instruct"

bnb_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_use_double_quant=True,
                                 bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype="float16")

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token  # Ensure tokenizer has a padding token

has_cuda = torch.cuda.is_available()
device_map = {"": 0} if has_cuda else "cpu"

model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=bnb_config,
                                              device_map=device_map, trust_remote_code=True)


###  Apply LoRA Configuration

In [None]:
from peft import get_peft_model, LoraConfig, TaskType
from peft import prepare_model_for_kbit_training

peft_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],  # adjust for Qwen2 architecture
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.CAUSAL_LM,
)

model = prepare_model_for_kbit_training(model)

model = get_peft_model(model, peft_config)

model.gradient_checkpointing_enable()
model.print_trainable_parameters()


### Tokenize Dataset

In [None]:
def tokenize(example):
    prompt = f"<|im_start|>user\n{example['input']}<|im_end|>\n<|im_start|>assistant\n{example['output']}<|im_end|>"

    tokens = tokenizer(
        prompt,
        padding="max_length",
        truncation=True,
        max_length=512,
    )

    input_ids = tokens["input_ids"]
    attention_mask = tokens["attention_mask"]

    # Set labels to input_ids, but mask out padded tokens (0 in attention_mask)
    labels = [token if mask == 1 else -100 for token, mask in zip(input_ids, attention_mask)]
    
    tokens["labels"] = labels
    return tokens


In [None]:
tokenized_data = data.map(
    tokenize,
    remove_columns=data["train"].column_names,
    batched=False
)

print(tokenized_data["train"][0])


In [None]:
from transformers import DataCollatorForLanguageModeling

data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False  # causal LM task, not masked LM
)


### Training Configuration

In [None]:
from torch.utils.data import default_collate
import torch

def custom_collator(batch):
    batch = default_collate(batch)
    batch["labels"] = torch.tensor(batch["labels"], dtype=torch.long)
    return batch


In [None]:
import torch
import torch.nn.functional as F
from transformers import Trainer

class DebugLossTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False, **kwargs):
        labels = inputs.pop("labels")
        outputs = model(**inputs)

        logits = outputs.logits  # (batch, seq_len, vocab_size)
        shift_logits = logits[..., :-1, :].contiguous()
        shift_labels = labels[..., 1:].contiguous()

        # Manually compute cross-entropy loss
        loss = F.cross_entropy(
            shift_logits.view(-1, shift_logits.size(-1)),
            shift_labels.view(-1),
            ignore_index=-100
        )

        print("💥 Manually computed loss:", loss)
        print("requires_grad?", loss.requires_grad)

        return (loss, outputs) if return_outputs else loss


In [None]:
from torch.optim import AdamW
from bitsandbytes.optim import AdamW as AdamW8bit

optimizer = AdamW8bit(model.parameters(), lr=2e-4)


In [None]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    output_dir=Config.JSON_OUTPUT_FINE_TUNE_OUTPUT,  #"./qwen2-lora-local",
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=8,
    eval_strategy="epoch",
    logging_strategy="steps",
    logging_steps=10,
    num_train_epochs=1,
    save_strategy="epoch",
    learning_rate=2e-4,
    fp16=True,
    bf16=False,
    report_to="none"
)

print(tokenized_data)
print(tokenized_data["train"][0])


trainer = DebugLossTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_data["train"],
    eval_dataset=tokenized_data["validation"],
    data_collator=data_collator ,
    optimizers=(optimizer, None),
)


In [None]:
from inspect import signature
print(signature(model.forward))


### Start Training

In [None]:
trainer.train()


### Save Adapter

In [None]:
model.save_pretrained("qwen2-lora-adapter")
tokenizer.save_pretrained("qwen2-lora-adapter")
