In [None]:
!pip install -q unsloth
!pip install -q --force-reinstall --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git

In [None]:
import wandb

from huggingface_hub import login
from kaggle_secrets import UserSecretsClient

user_secrets = UserSecretsClient()
hf_token = user_secrets.get_secret("HF_TOKEN")
wb_token = user_secrets.get_secret("WANDB_TOKEN")

# Login to both services
login(hf_token)
wandb.login(key=wb_token)

In [None]:
run = wandb.init(
    project='fine-tune-deepseek-firecrawl', 
    job_type="training", 
    anonymous="allow"
)

In [None]:
import torch
from unsloth import FastLanguageModel

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/DeepSeek-R1-Distill-Llama-8B",
    max_seq_length=2048,
    dtype=torch.bfloat16,
    load_in_4bit=True,
    token=hf_token
)

In [None]:
from transformers import TextStreamer

FastLanguageModel.for_inference(model)

prompt = """Below is an instruction that describes a task. Write a response that appropriately completes the request. 

### Instruction:
{}

### Response:
{}
"""
instruction = "How do I extract repo name, number of stars, repo link from the https://github.com/trending page using Firecrawl?"

message = prompt.format(instruction, "")
inputs = tokenizer([message], return_tensors="pt").to("cuda")
text_streamer = TextStreamer(tokenizer)

_ = model.generate(**inputs, streamer=text_streamer, max_new_tokens=512, use_cache=True)

In [None]:
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/DeepSeek-R1-Distill-Llama-8B",
    max_seq_length=2048,
    dtype=torch.bfloat16,
    load_in_4bit=True,
    token=hf_token
)

In [None]:
from datasets import load_dataset

dataset_name = "bexgboost/firecrawl-instructions"
dataset = load_dataset(
    dataset_name, split = "train[0:500]", trust_remote_code=True
)

In [None]:
EOS_TOKEN = tokenizer.eos_token

def format_instruction(example):

    prompt = """Below is an instruction that describes a task, paired with an input that provides further context. 
    Write a response that appropriately completes the request. 
    Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.

    ### Instruction:
    You are a web scraping expert with advanced knowledge in Firecrawl, which is an AI-based web-scraping engine. 
    Please answer the following question about Firecrawl. 

    ### Question:
    {}

    ### Response:
    {}"""

    return {
        "text": prompt.format(example['instruction'], example['answer']) + EOS_TOKEN
    }

dataset = dataset.map(format_instruction)

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r=16,  
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
    ],
    lora_alpha=16,
    lora_dropout=0,  
    bias="none",  
    use_gradient_checkpointing="unsloth",  # True or "unsloth" for very long context
    random_state=1000,
    use_rslora=False,  
    loftq_config=None,
)

In [None]:
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

model_name = "firecrawl-assistant"
local_path = f"./models/{model_name}"
training_arguments = TrainingArguments(
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,
        warmup_steps=5,
        max_steps=100,
        learning_rate=2e-4,
        fp16=not is_bfloat16_supported(),
        bf16=is_bfloat16_supported(),
        logging_steps=10,
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="linear",
        seed=1000,
        output_dir=local_path,
        push_to_hub=True,
        hub_model_id=f"bexgboost/{model_name}",
        report_to="wandb",
        run_name="firecrawl-deepseek-ft"
    )

In [None]:
from trl import SFTTrainer

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=2048,
    dataset_num_proc=2,
    args=training_arguments,
)

In [None]:
trainer.train()

In [None]:
FastLanguageModel.for_inference(model)

inputs = tokenizer([message], return_tensors="pt").to("cuda")
text_streamer = TextStreamer(tokenizer)

_ = model.generate(**inputs, streamer=text_streamer, max_new_tokens=512, use_cache=True)

In [None]:
trainer.save_model(local_path)
trainer.push_to_hub()

In [None]:
wandb.finish()