In [1]:
import json

def format_example(example):
    instruction = example["instruction"]
    input_text = example.get("input", "")
    output = example["output"]

    if input_text.strip():
        prompt = f"""### Instruction:
{instruction}

### Input:
{input_text}

### Response:
{output}"""
    else:
        prompt = f"""### Instruction:
{instruction}

### Response:
{output}"""

    return prompt


with open("laukiphones_custcare_responses.json", "r") as f:
    raw_data = json.load(f)

formatted_data = [{"text": format_example(ex)} for ex in raw_data]

print(formatted_data[0]["text"])


### Instruction:
I dropped my LaukiPhone in water. What should I do?

### Input:
Model: LaukiPhone S3 | Dropped in: Freshwater | Duration: 20 seconds

### Response:
Oh no! But don’t worry—the LaukiPhone S3 is IP68 rated, so it should survive brief splashes or submersion under 1.5m for up to 30 mins. Just dry it off completely, don’t charge it for 12 hours, and keep an eye out for screen or speaker issues. Ping us if anything seems off!


In [3]:
!pip install unsloth

Collecting unsloth
  Downloading unsloth-2026.1.2-py3-none-any.whl.metadata (66 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/66.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.6/66.6 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting unsloth_zoo>=2026.1.2 (from unsloth)
  Downloading unsloth_zoo-2026.1.2-py3-none-any.whl.metadata (32 kB)
Collecting tyro (from unsloth)
  Downloading tyro-1.0.3-py3-none-any.whl.metadata (12 kB)
Collecting xformers>=0.0.27.post2 (from unsloth)
  Downloading xformers-0.0.33.post2-cp39-abi3-manylinux_2_28_x86_64.whl.metadata (1.2 kB)
Collecting bitsandbytes!=0.46.0,!=0.48.0,>=0.45.5 (from unsloth)
  Downloading bitsandbytes-0.49.1-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Collecting datasets!=4.0.*,!=4.1.0,<4.4.0,>=3.4.1 (from unsloth)
  Downloading datasets-4.3.0-py3-none-any.whl.metadata (18 kB)
Collecting trl!=0.19.0,<=0.24.0,>=0.18.2 (from unsl

In [4]:
from unsloth import FastLanguageModel
import torch

max_seq_length = 2048
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/Llama-3.2-3B-Instruct",
    max_seq_length=max_seq_length,
    dtype=None,
    load_in_4bit=True,
)

model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    target_modules=[
        "q_proj", "k_proj", "v_proj", "o_proj",
        "gate_proj", "up_proj", "down_proj",
    ],
    lora_alpha=16,
    lora_dropout=0.0,
    bias="none",
    use_gradient_checkpointing="unsloth",
    random_state=42,
)


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2026.1.2: Fast Llama patching. Transformers: 4.57.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.9.1+cu128. CUDA: 7.5. CUDA Toolkit: 12.8. Triton: 3.5.1
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.33.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/2.35G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/234 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/454 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

chat_template.jinja: 0.00B [00:00, ?B/s]

Unsloth 2026.1.2 patched 28 layers with 28 QKV layers, 28 O layers and 28 MLP layers.


In [8]:
from datasets import Dataset

dataset = Dataset.from_list(formatted_data)

def tokenize(batch):
    tokens = tokenizer(
        batch["text"],
        truncation=True,
        padding="max_length",
        max_length=2048,
    )
    tokens["labels"] = tokens["input_ids"].copy()
    return tokens

In [9]:
tokenized_dataset = dataset.map(
    tokenize,
    batched=True,
    remove_columns=["text"],
)

Map:   0%|          | 0/26 [00:00<?, ? examples/s]

In [10]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    output_dir="./laukiphone_llm",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    learning_rate=2e-4,
    num_train_epochs=3,
    fp16=True,
    logging_steps=10,
    save_strategy="epoch",
    optim="adamw_8bit",
    report_to="none",
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
)

trainer.train()


==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 26 | Num Epochs = 3 | Total steps = 12
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 24,313,856 of 3,237,063,680 (0.75% trained)


Step,Training Loss
10,272.617


TrainOutput(global_step=12, training_loss=252.04369099934897, metrics={'train_runtime': 231.049, 'train_samples_per_second': 0.338, 'train_steps_per_second': 0.052, 'total_flos': 2724971856003072.0, 'train_loss': 252.04369099934897, 'epoch': 3.0})

In [11]:
model.save_pretrained("laukiphone-support-lora")
tokenizer.save_pretrained("laukiphone-support-lora")

('laukiphone-support-lora/tokenizer_config.json',
 'laukiphone-support-lora/special_tokens_map.json',
 'laukiphone-support-lora/chat_template.jinja',
 'laukiphone-support-lora/tokenizer.json')

In [13]:
FastLanguageModel.for_inference(model)

prompt = """### Instruction:
Does LaukiPhone support dual SIM?

### Response:"""

inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

outputs = model.generate(
    **inputs,
    max_new_tokens=150,
    temperature=0.7,
    top_p=0.9,
)

print(tokenizer.decode(outputs[0], skip_special_tokens=True))


### Instruction:
Does LaukiPhone support dual SIM?

### Response: 
LaukiPhone supports dual SIM. The phone has two slots for SIM cards, allowing users to have two different phone numbers or plans on a single device. This feature is convenient for managing multiple phone accounts or having a backup SIM for emergency situations. However, it's essential to check the specific model and carrier compatibility to ensure seamless functionality. Some models might have specific settings or requirements for using dual SIM, so it's a good idea to consult the user manual or contact the manufacturer for more information.


In [14]:
import shutil

shutil.make_archive(
    "laukiphone-support-lora",
    "zip",
    "laukiphone-support-lora"
)


'/content/laukiphone-support-lora.zip'

In [15]:
from google.colab import files
files.download("laukiphone-support-lora.zip")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>