In [None]:
# FINETUNE, MERGE, AND UPLOAD TINYLLAMA 1.1B

print("Installing required libraries")
# We install specific versions for reproducibility
!pip install -q -U "numpy==1.26.4" "torch==2.3.1" "transformers==4.42.3" "peft==0.11.1" "accelerate==0.31.0" "trl==0.9.4" "datasets==2.19.2" "bitsandbytes==0.43.1"

import json
import os
import torch
from datasets import Dataset
from peft import LoraConfig, PeftModel
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, BitsAndBytesConfig
from trl import SFTTrainer
from huggingface_hub import login
from getpass import getpass

BASE_MODEL_ID = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
NEW_MODEL_NAME = "tinyllama-1.1b-chat-numpy-refactor-v1" 
HF_USERNAME = "julianpins" 
HF_REPO_ID = f"{HF_USERNAME}/{NEW_MODEL_NAME}"

print("Logging into huggingface")
try:
    from google.colab import userdata
    HF_TOKEN = userdata.get('HF_TOKEN')
    login(token=HF_TOKEN)
except (ImportError, KeyError):
    print("Could not find Colab secret 'HF_TOKEN'. Please enter your token manually.")
    login(token=getpass("Enter your Hugging Face write token: "))


SYSTEM_PROMPT = (
    "You are a Python code refactoring tool for NumPy. Your task is to replace only the deprecated functions in the given code snippet with their modern equivalents.\n"
    "Your response must be structured with two markdown sections:\n"
    "1. A '### Refactored Code' section containing ONLY the updated Python code block. Do not change the code's logic. Do not add imports. Do not add comments.\n"
    "2. A '### Deprecation Context' section containing a brief explanation of the deprecation.\n"
    " If no functions are deprecated, return the original code and state that no changes were needed in the context section."
)

print("Preparing data")
PATH_TO_TRAINING = 'training_data.json'

with open(PATH_TO_TRAINING, 'r', encoding='utf-8') as f:
    training_data = json.load(f)

print("Loading model and tokenizer")

tokenizer =tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_ID)
tokenizer.pad_token = tokenizer.eos_token

formatted_data = []
for sample in training_data:
    user_message = f"### INPUT CODE:\n```python\n{sample['input']}\n```"
    assistant_response = (
        "### Refactored Code\n"
        f"```python\n{sample['output']}\n```\n"
        "### Deprecation Context\n"
        f"{sample['context']}"
    )
    messages = [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": user_message},
        {"role": "assistant", "content": assistant_response}
    ]
    formatted_text = tokenizer.apply_chat_template(messages, tokenize=False)
    formatted_data.append({"text": formatted_text})

formatted_dataset = Dataset.from_list(formatted_data)

print(f"Dataset created.")

# Quantization & Model Loading
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
)

model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL_ID,
    quantization_config=bnb_config,
    device_map="auto",
)

#Lora
peft_config = LoraConfig(
    r=64,
    lora_alpha=128,
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
)

training_args = TrainingArguments(
    output_dir="./models",
    num_train_epochs=5,
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    learning_rate=1e-4,
    logging_steps=10,
    fp16=True,
    optim="paged_adamw_8bit",
    lr_scheduler_type="cosine",
    warmup_ratio=0.1,
    save_strategy="epoch",
    report_to="tensorboard",
)

trainer = SFTTrainer(
    model=model,
    train_dataset=formatted_dataset,
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=1024,
    tokenizer=tokenizer,
    args=training_args,
    packing=False,
)

print("Starting fine-tuning")
trainer.train()
print("DONE")


trainer.model.save_pretrained(NEW_MODEL_NAME)
print("Adapter model saved.")

#free up RAM
del model
del trainer
torch.cuda.empty_cache()

print("Merging LoRA adapter with base model")
base_model_fp16 = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL_ID,
    torch_dtype=torch.float16,
    device_map="auto",
)

merged_model = PeftModel.from_pretrained(base_model_fp16, NEW_MODEL_NAME)
merged_model = merged_model.merge_and_unload()
print("Adapter merged successfully!")

print(f"Pushing to: '{HF_REPO_ID}'")
merged_model.push_to_hub(HF_REPO_ID, token=HF_TOKEN)
tokenizer.push_to_hub(HF_REPO_ID, token=HF_TOKEN)
print("DONE!!!")