In [None]:
from huggingface_hub import login 
from datasets import load_dataset
import torch
from peft import get_peft_model, LoraConfig, TaskType
from transformers import pipeline
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from transformers import Trainer , TrainingArguments

In [None]:
login(token="") #hf token

In [None]:
dataset = load_dataset("json", data_files="/content/fine_tune_data.jsonl",split="train")

dataset = dataset.train_test_split(test_size=0.2)

In [None]:

tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-2b-it")

model = AutoModelForCausalLM.from_pretrained(
    "google/gemma-2-2b-it",
    device_map="auto",
    load_in_4bit=True,
    torch_dtype=torch.float16,
    attn_implementation="eager"
)

In [None]:
lora_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    r=8,
    lora_alpha=32,
    lora_dropout=0.1,
    bias = "none"
)

model = get_peft_model(model, lora_config)

In [None]:
def tokenize(data):
  prompt = f"Input: {data['input']}\nOutput: {data['output']}"
  tokens = tokenizer(prompt, truncation=True, padding="max_length", max_length=512)
  tokens["labels"] = tokens["input_ids"].copy()

  return tokens

tokenized_dataset = dataset.map(tokenize)


In [None]:


training_args = TrainingArguments(
    per_device_train_batch_size=2,
    num_train_epochs=3,
    logging_steps=10,
    save_strategy="no",
    output_dir="./results",
    report_to="none"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"],
    data_collator=None
)

trainer.train()

In [None]:
model.save_pretrained("model")
tokenizer.save_pretrained("model")

In [None]:
#testing
pipe = pipeline('text-generation', model=model,tokenizer=tokenizer)

In [None]:

prompt = """
Input: Degree: Mtech, Discipline: ECE, skills: circuit design, signal processing, embedded system, experience: 0.5, CGPA: 8.2, competitive_exam: none , future_goal: None, interest: None, financial_support: 0
Output:
"""

response = pipe(prompt, max_new_tokens=100, do_sample=True, temperature=0.7)[0]["generated_text"]
print(response)


In [None]:


# Load base model 
model = AutoModelForCausalLM.from_pretrained("google/gemma-2-2b-it", torch_dtype=torch.float16)
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-2b-it")

# Load and merge LoRA adapter

model = PeftModel.from_pretrained(model, "model")
model = model.merge_and_unload()

# Save the merged model properly
model.save_pretrained("merged-gemma", safe_serialization=True)
tokenizer.save_pretrained("merged-gemma")

In [None]:


bnb_config = BitsAndBytesConfig(
    load_in_8bit=True,           # or use load_in_4bit=True for 4-bit quantization
    llm_int8_threshold=6.0,      # Optional: threshold for outlier splitting
    llm_int8_has_fp16_weight=True,  # Ensures compatibility
    device_map="auto"            # Automatically maps layers to available devices
)

model_id = "merged-gemma" 

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config
)

tokenizer = AutoTokenizer.from_pretrained(model_id)


model.save_pretrained("quantized_model/")
tokenizer.save_pretrained("quantized_model/")
