In [None]:
%%capture
# Install Unsloth, Xformers (Flash Attention), and dependencies
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"

# Check Torch version for correct Xformers version
import torch
from packaging.version import Version as V

xformers_version = "xformers==0.0.27" if V(torch.__version__) < V("2.4.0") else "xformers"

# Install additional dependencies
!pip install --no-deps {xformers_version} trl peft accelerate bitsandbytes triton


In [None]:
from unsloth import FastLanguageModel
import torch

max_seq_length = 2048
dtype = None  # or torch.float16 / torch.bfloat16 if you want to specify
load_in_4bit = True

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Meta-Llama-3.1-8B",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


    PyTorch 2.7.0+cu126 with CUDA 1206 (you have 2.6.0+cu124)
    Python  3.11.12 (you have 3.11.13)
  Please reinstall xformers (see https://github.com/facebookresearch/xformers#installing-xformers)
  Memory-efficient attention, SwiGLU, sparse and more won't be available.
  Set XFORMERS_MORE_DETAILS=1 for more details


🦥 Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2025.6.1: Fast Llama patching. Transformers: 4.52.4.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = None. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/5.96G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/235 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/50.6k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/459 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    target_modules=[
        "q_proj", "k_proj", "v_proj", "o_proj",
        "gate_proj", "up_proj", "down_proj"
    ],
    lora_alpha=16,
    lora_dropout=0.0,
    bias="none",
    use_gradient_checkpointing="unsloth",  # Can also use True
    random_state=3407,
    use_rslora=False,
    loftq_config=None,
)


Unsloth: Already have LoRA adapters! We shall skip this step.


In [None]:
import pandas as pd
from datasets import Dataset

# Load the CSV file
resume_df = pd.read_csv("/content/Resume.csv")

# Rename columns to match expected format (case-sensitive)
resume_df = resume_df.rename(columns={
    'Resume_str': 'input',      # Raw resume text
    'Resume_html': 'output',    # HTML version
    'Category': 'category'      # Job category (used for instruction)
})

# Create the instruction dynamically based on category
resume_df['instruction'] = resume_df['category'].apply(
    lambda x: f"Generate a clean, ATS-optimized resume HTML for a {x} role."
)

# Convert to Hugging Face Dataset
dataset = Dataset.from_pandas(resume_df)

# Define EOS token (if not already defined)
EOS_TOKEN = "</s>"  # Or use your tokenizer's EOS token

# Formatting function
def format_resume(examples):
    texts = []
    for inst, inp, outp in zip(examples['instruction'],
                              examples['input'],
                              examples['output']):
        text = f"Instruction: {inst}\nInput: {inp}\nOutput: {outp}" + EOS_TOKEN
        texts.append(text)
    return {"text": texts}

# Apply formatting
dataset = dataset.map(format_resume, batched=True)

print("Dataset successfully formatted!")
print(f"Sample: {dataset[0]['text']}")

Map:   0%|          | 0/2484 [00:00<?, ? examples/s]

Dataset successfully formatted!
Sample: Instruction: Generate a clean, ATS-optimized resume HTML for a HR role.
Input:          HR ADMINISTRATOR/MARKETING ASSOCIATE

HR ADMINISTRATOR       Summary     Dedicated Customer Service Manager with 15+ years of experience in Hospitality and Customer Service Management.   Respected builder and leader of customer-focused teams; strives to instill a shared, enthusiastic commitment to customer service.         Highlights         Focused on customer satisfaction  Team management  Marketing savvy  Conflict resolution techniques     Training and development  Skilled multi-tasker  Client relations specialist           Accomplishments      Missouri DOT Supervisor Training Certification  Certified by IHG in Customer Loyalty and Marketing by Segment   Hilton Worldwide General Manager Training Certification  Accomplished Trainer for cross server hospitality systems such as    Hilton OnQ  ,   Micros    Opera PMS   , Fidelio    OPERA    Reservation System (

In [None]:
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported
from trl import SFTTrainer

# Debug: print the first example in the dataset to see available keys
print(dataset[0])

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",  # or whatever the key in your dataset is
    max_seq_length=max_seq_length,
    dataset_num_proc=2,
    packing=False,
    formatting_func=formatting_func,  # ✅ this must be passed!
    args=TrainingArguments(
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,
        warmup_steps=5,
        max_steps=60,
        learning_rate=2e-4,
        fp16=not is_bfloat16_supported(),
        bf16=is_bfloat16_supported(),
        logging_steps=1,
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="linear",
        seed=3407,
        output_dir="outputs",
    )
)

{'ID': 16852973, 'input': "         HR ADMINISTRATOR/MARKETING ASSOCIATE\n\nHR ADMINISTRATOR       Summary     Dedicated Customer Service Manager with 15+ years of experience in Hospitality and Customer Service Management.   Respected builder and leader of customer-focused teams; strives to instill a shared, enthusiastic commitment to customer service.         Highlights         Focused on customer satisfaction  Team management  Marketing savvy  Conflict resolution techniques     Training and development  Skilled multi-tasker  Client relations specialist           Accomplishments      Missouri DOT Supervisor Training Certification  Certified by IHG in Customer Loyalty and Marketing by Segment   Hilton Worldwide General Manager Training Certification  Accomplished Trainer for cross server hospitality systems such as    Hilton OnQ  ,   Micros    Opera PMS   , Fidelio    OPERA    Reservation System (ORS) ,   Holidex    Completed courses and seminars in customer service, sales strategies, 

Unsloth: Tokenizing ["text"]:   0%|          | 0/2484 [00:00<?, ? examples/s]

In [None]:
model. save_pretrained_gguf ("model", tokenizer, quantization_method = "f16")

Unsloth: You have 1 CPUs. Using `safe_serialization` is 10x slower.
We shall switch to Pytorch saving, which might take 3 minutes and not 30 minutes.
To force `safe_serialization`, set it to `None` instead.
Unsloth: Kaggle/Colab has limited disk space. We need to delete the downloaded
model which will save 4-16GB of disk space, allowing you to save on Kaggle/Colab.
Unsloth: Will remove a cached repo with size 6.0G


Unsloth: Merging 4bit and LoRA weights to 16bit...
Unsloth: Will use up to 2.65 out of 12.67 RAM for saving.
Unsloth: Saving model... This might take 5 minutes ...


 44%|████▍     | 14/32 [00:01<00:01, 12.10it/s]
We will save to Disk and not RAM now.
100%|██████████| 32/32 [07:53<00:00, 14.79s/it]


Unsloth: Saving tokenizer... Done.
Unsloth: Saving model/pytorch_model-00001-of-00004.bin...
Unsloth: Saving model/pytorch_model-00002-of-00004.bin...
Unsloth: Saving model/pytorch_model-00003-of-00004.bin...
Unsloth: Saving model/pytorch_model-00004-of-00004.bin...
Done.


Unsloth: Converting llama model. Can use fast conversion = False.


==((====))==  Unsloth: Conversion from QLoRA to GGUF information
   \\   /|    [0] Installing llama.cpp might take 3 minutes.
O^O/ \_/ \    [1] Converting HF to GGUF 16bits might take 3 minutes.
\        /    [2] Converting GGUF 16bits to ['f16'] might take 10 minutes each.
 "-____-"     In total, you will have to wait at least 16 minutes.

Unsloth: Installing llama.cpp. This might take 3 minutes...
Unsloth: CMAKE detected. Finalizing some steps for installation.
Unsloth: [1] Converting model at model into f16 GGUF format.
The output location will be /content/model/unsloth.F16.gguf
This might take 3 minutes...
INFO:hf-to-gguf:Loading model: model
INFO:hf-to-gguf:Model architecture: LlamaForCausalLM
INFO:gguf.gguf_writer:gguf: This GGUF file is for Little Endian only
INFO:hf-to-gguf:Exporting model...
INFO:hf-to-gguf:rope_freqs.weight,           torch.float32 --> F32, shape = {64}
INFO:hf-to-gguf:gguf: loading model weight map from 'pytorch_model.bin.index.json'
INFO:hf-to-gguf:gguf: lo

In [None]:
from google.colab import drive
drive.mount('/content/llama.cpp')

In [None]:
# Zip the folder
!zip -r model.zip /content/llama.cpp

  adding: content/llama.cpp/ (stored 0%)
  adding: content/llama.cpp/.pre-commit-config.yaml (deflated 47%)
  adding: content/llama.cpp/pyproject.toml (deflated 49%)
  adding: content/llama.cpp/common/ (stored 0%)
  adding: content/llama.cpp/common/ngram-cache.cpp (deflated 78%)
  adding: content/llama.cpp/common/regex-partial.h (deflated 64%)
  adding: content/llama.cpp/common/common.h (deflated 71%)
  adding: content/llama.cpp/common/json-schema-to-grammar.h (deflated 63%)
  adding: content/llama.cpp/common/json-partial.cpp (deflated 81%)
  adding: content/llama.cpp/common/regex-partial.cpp (deflated 72%)
  adding: content/llama.cpp/common/build-info.cpp.in (deflated 53%)
  adding: content/llama.cpp/common/build-info.cpp (deflated 32%)
  adding: content/llama.cpp/common/console.cpp (deflated 75%)
  adding: content/llama.cpp/common/ngram-cache.h (deflated 66%)
  adding: content/llama.cpp/common/chat.cpp (deflated 81%)
  adding: content/llama.cpp/common/log.cpp (deflated 75%)
  adding:

In [None]:
from google.colab import files
files.download("model.zip")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>