In [2]:
!ls /kaggle/input

fulldatasett


In [4]:
!ls /kaggle/input/fulldatasett

traindeepseek_augmented.json


In [5]:
!pip install -q transformers datasets peft evaluate

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.0/84.0 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m193.6/193.6 kB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m0:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m0:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m0:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m30.9 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.9/127.9 MB[0m [31m13.4 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m20

In [6]:
#cell 1
import os
import json
import torch

from datasets import Dataset
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments,
    Trainer,
    EarlyStoppingCallback,
    default_data_collator
)
from peft import LoraConfig, get_peft_model, PeftModel

# 1) Point at your JSON file in the Kaggle “augmentedtraining” dataset:
JSON_PATH = "/kaggle/input/fulldatasett/traindeepseek_augmented.json"

# 2) Base DeepSeek model & example prompts to check “before” vs. “after”
MODEL_NAME = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
PROMPTS = [
    "### Instruction:\nHow do I reset my password?\n\n### Response:\n",
    "### Instruction:\nWhat is refund policy of saipal?\n\n### Response:\n",
    "### Instruction:\nWhat are Saipal’s opening and closing hours?\n\n### Response:\n",
    "### Instruction:\nWhat is address of saipal?\n\n### Response:\n",
    "### Instruction:\nHow can I contact Saipal?\n\n### Response:\n"
]

# 3) Run on GPU
DEVICE = "cuda"

# 4) Sanity-check that the JSON file exists
assert os.path.exists(JSON_PATH), f"Missing JSON at: {JSON_PATH}"
print(" Found JSON at:", JSON_PATH)


2025-06-08 05:01:27.506861: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1749358887.710305      35 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1749358887.770467      35 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


 Found JSON at: /kaggle/input/fulldatasett/traindeepseek_augmented.json


In [7]:
#cell 3
# 1) Read the JSON into Python
with open(JSON_PATH, "r") as f:
    all_data = json.load(f)

# 2) Separate “train” vs. “test” based on the “split” field
train_data = [ex for ex in all_data if ex.get("split") == "train"]
test_data  = [ex for ex in all_data if ex.get("split") == "test"]

# 3) Wrap them into Hugging Face Datasets
train_ds = Dataset.from_list(train_data)
test_ds  = Dataset.from_list(test_data)

# 4) Show counts
print(f"Train examples: {len(train_ds)}")
print(f"Test examples: {len(test_ds)}")


Train examples: 2000
Test examples: 300


In [8]:
#cell 4
from transformers import AutoTokenizer

# 1) Combine instruction+response into “text”
TEMPLATE = "### Instruction:\n{input}\n\n### Response:\n{output}"
def format_fn(ex):
    return {"text": TEMPLATE.format(input=ex["instruction"], output=ex["response"])}

# Remove the original columns exactly once here:
train_ds = train_ds.map(format_fn, remove_columns=["instruction","response","split"])
test_ds  = test_ds.map(format_fn,  remove_columns=["instruction","response","split"])

# 2) Load tokenizer & set pad_token = eos_token
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

# 3) Tokenize (padding to max_length=512)
def tok_fn(ex):
    return tokenizer(
        ex["text"],
        padding="max_length",
        truncation=True,
        max_length=512
    )

train_ds = train_ds.map(tok_fn, batched=True)
test_ds  = test_ds.map(tok_fn,  batched=True)

# 4) Quick peek at one entry
print(train_ds[0])
# Should show keys: 'text', 'input_ids', 'attention_mask'


Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

Map:   0%|          | 0/300 [00:00<?, ? examples/s]

tokenizer_config.json:   0%|          | 0.00/3.07k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

Map:   0%|          | 0/300 [00:00<?, ? examples/s]

{'text': '### Instruction:\nHow do I contact Saipal support?\n\n### Response:\nCall us on +977-1-YAK-MILK or email yaks@saipal.ai.', 'input_ids': [151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 15164

In [9]:
#cell 5
def add_labels(ex):
    ex["labels"] = ex["input_ids"]
    return ex

train_ds = train_ds.map(add_labels, batched=False)
test_ds  = test_ds.map(add_labels,  batched=False)

# Sanity check: now each example has 'labels'
print(train_ds[0].keys())  # should include 'text', 'input_ids', 'attention_mask', 'labels'

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

Map:   0%|          | 0/300 [00:00<?, ? examples/s]

dict_keys(['text', 'input_ids', 'attention_mask', 'labels'])


In [10]:
# ── Cell 6: Load DeepSeek-1.5B + attach LoRA ─────────────────────────────────
from transformers import AutoModelForCausalLM

# 1) Load DeepSeek-1.5B in FP16 and force it all onto a single GPU (DEVICE="cuda" → cuda:0)
base_model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    torch_dtype=torch.float16,
    low_cpu_mem_usage=True,
    trust_remote_code=True,
    device_map={"": DEVICE}   # ← pin every layer onto cuda:0
)
base_model.gradient_checkpointing_enable()

# 2) Attach tiny LoRA adapters
lora_cfg = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj","k_proj","v_proj","o_proj"],
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM"
)
peft_model = get_peft_model(base_model, lora_cfg)
# (No need to call peft_model.to(DEVICE) because device_map={"": DEVICE} already placed it on cuda:0)

# 3) “Before fine-tuning” inference (using the raw base_model)
print("\n── Before fine-tuning ─────────────────────────────────")
for prompt in PROMPTS:
    inputs = tokenizer(prompt, return_tensors="pt").to(DEVICE)
    gen_ids = base_model.generate(
        **inputs,
        max_new_tokens=100,
        use_cache=False,
        do_sample=False
    )
    raw_out = tokenizer.decode(gen_ids[0], skip_special_tokens=True)
    question = prompt.splitlines()[1]  # e.g. "How do I reset my password?"
    print(f"🛠 Before fine-tuning ({question}):\n",
          raw_out.split("### Response:")[-1].strip(), "\n")


config.json:   0%|          | 0.00/679 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/3.55G [00:00<?, ?B/s]

Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered.


generation_config.json:   0%|          | 0.00/181 [00:00<?, ?B/s]

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.



── Before fine-tuning ─────────────────────────────────


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


🛠 Before fine-tuning (How do I reset my password?):
 To reset your password, you can follow these steps:

1. **Log in** to your account using your credentials.
2. **Go to the account settings**.
3. **Look for a "Forgot Password" or "Reset Password" feature**.
4. **Enter your new password**.
5. **Confirm the new password**.
6. **Save your changes**.

If you encounter any issues, you can contact your account administrator for assistance.
</think>

To reset your password, 



Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


🛠 Before fine-tuning (What is refund policy of saipal?):
 The refund policy for Saipal is designed to ensure that customers receive a full refund for any product returned within 7 days of purchase. This policy includes a 100% refund on all returned items, with no conditions or exceptions. Additionally, the policy guarantees a 30-day warranty period, offering free repairs and replacements for any defects. The customer can initiate the return process through the online portal, and there are no hidden fees or additional charges. The policy also includes a 2 



Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


🛠 Before fine-tuning (What are Saipal’s opening and closing hours?):
 The user has asked about Saipal’s opening and closing hours, but I don't have specific information about that. I can provide general hours for a typical business, but I can't give precise details about Saipal. If you have more context or specific details, I can help further.
</think>

The user has asked about Saipal’s opening and closing hours, but I don't have specific information about that. I can provide general hours for a typical business, but I can't 



Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


🛠 Before fine-tuning (What is address of saipal?):
 The address of Saipal is [address].

Note: The response should be in the form of a sentence, using the address of Saipal as the subject and [address] as the predicate.

The response should be in English, and the address of Saipal should be in the form of a sentence.

The response should be in the form of a sentence, using the address of Saipal as the subject and [address] as the predicate.

The response should be in English, 

🛠 Before fine-tuning (How can I contact Saipal?):
 I can't provide information on specific individuals, but I can help answer general questions about them or provide details about their work.

</think>

I don't have specific information about individuals unless they have been mentioned in the context of the conversation. However, I can provide general information about them or discuss their work if that's relevant. Let me know how I can assist you! 



In [11]:
from transformers import TrainingArguments, Trainer, default_data_collator

# 1) Define training arguments
training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=1,    # tiny batch so it fits in 1×T4
    gradient_accumulation_steps=1,
    num_train_epochs=3,               # you can adjust up or down
    learning_rate=1e-5,               # very low LR to avoid overfitting
    logging_steps=100,
    save_strategy="no",               # we’ll skip intermediate checkpoints
    fp16=True,                        # use half-precision on GPU
    report_to="none"
)

# 2) Instantiate Trainer
trainer = Trainer(
    model=peft_model,                      # from Cell 6
    args=training_args,
    train_dataset=train_ds,                # from Cell 4
    eval_dataset=(test_ds if len(test_ds) > 0 else None),  # from Cell 4
    tokenizer=tokenizer,                   # from Cell 4
    data_collator=default_data_collator
)

print("\n── Fine-tuning in progress (GPU, FP16) ─────────────────")
trainer.train()
print("\n Fine-tuning completed.\n")


  trainer = Trainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.



── Fine-tuning in progress (GPU, FP16) ─────────────────


Step,Training Loss
100,3.4636
200,0.4004
300,0.3651
400,0.3384
500,0.3
600,0.2769
700,0.2526
800,0.2435
900,0.2344
1000,0.224



 Fine-tuning completed.



In [12]:
# Cell 8: Save LoRA adapters and generate “after fine-tuning”

# 1) First, save the fine-tuned LoRA weights to a known directory:
peft_model.save_pretrained("/kaggle/working/deepseek-lora")

# 2) Now reload the base + LoRA adapters from that directory:
from peft import PeftModel
from transformers import AutoModelForCausalLM

base = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    torch_dtype=torch.float16,
    low_cpu_mem_usage=True,
    trust_remote_code=True,
    device_map={"": DEVICE}   # pin everything onto cuda:0
)
model = PeftModel.from_pretrained(base, "/kaggle/working/deepseek-lora")
model.to(DEVICE)

# 3) Greedy generation (do_sample=False) to compare “after fine-tuning” outputs:
print("\n── After fine-tuning ─────────────────────────────────")
for prompt in PROMPTS:
    inputs = tokenizer(prompt, return_tensors="pt").to(DEVICE)
    gen_ids = model.generate(
        **inputs,
        max_new_tokens=100,
        use_cache=False,
        do_sample=False
    )
    out = tokenizer.decode(gen_ids[0], skip_special_tokens=True)
    question = prompt.splitlines()[1]
    print(f" After fine-tuning ({question}):\n",
          out.split("### Response:")[-1].strip(), "\n")


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.



── After fine-tuning ─────────────────────────────────


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


 After fine-tuning (How do I reset my password?):
 Go to 'Settings' > 'Security' > 'Reset Password' and enter your email. 



Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


 After fine-tuning (What is refund policy of saipal?):
 Our refund policy is simple: if you need to return items, we'll process within 24 hours. 



Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


 After fine-tuning (What are Saipal’s opening and closing hours?):
 Our hours are from 5 AM to 5 PM every day, except on Sundays and Mondays where we close at 8 AM and 8 PM, respectively. 



Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


 After fine-tuning (What is address of saipal?):
 We offer cloud services to bring AI to the top of the world. 

 After fine-tuning (How can I contact Saipal?):
 Reach support via email at support@saipal.ai or call +977-123-456-789-123. 

