In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [1]:
%%capture
!pip install unsloth
!pip install --force-reinstall --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git

In [2]:
from huggingface_hub import login
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()

hf_token = user_secrets.get_secret("huggingface")
login(hf_token)

In [3]:
import wandb

wb_token = user_secrets.get_secret("wandb")

wandb.login(key=wb_token)
run = wandb.init(
    project='DeepSeek-R1-8B Test', 
    job_type="training", 
    anonymous="allow"
)

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mluyou042[0m ([33mluyou042-org[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


In [4]:
from unsloth import FastLanguageModel

# 设置模型的最大 序列长度（即每次输入的最大 token 数量）。
# 2048 适合 长文本处理，对于像 Llama 或 DeepSeek 这样的模型，处理大规模数据时可以提高训练效果。
max_seq_length = 2048 

# 该参数用于设置模型的数据类型
# None 表示 自动选择数据类型。
# 如果你希望使用 半精度训练（FP16 或 BF16），可以通过设置此参数来指定。
dtype = None 

# 4-bit 量化：加载模型时启用 4-bit 量化，使得模型参数存储为 4-bit，从而显著降低显存占用。
# 量化是为了在显存受限的设备上运行大模型。启用量化会在不显著降低性能的情况下减少内存使用。
load_in_4bit = True

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/DeepSeek-R1-Distill-Llama-8B",    # 指定要加载的模型名称
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    token = hf_token, 
)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2025.2.15: Fast Llama patching. Transformers: 4.49.0.
   \\   /|    GPU: Tesla T4. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/5.96G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/236 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/53.0k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

In [5]:
prompt_style = """Below is an instruction that describes a task, paired with an input that provides further context. 
Write a response that appropriately completes the request. 
Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.

### Instruction:
You are a medical expert with advanced knowledge in clinical reasoning, diagnostics, and treatment planning. 
Please answer the following medical question. 

### Question:
{}

### Response:
<think>{}"""

In [6]:
question = "A 61-year-old woman with a long history of involuntary urine loss during activities like coughing or sneezing but no leakage at night undergoes a gynecological exam and Q-tip test. Based on these findings, what would cystometry most likely reveal about her residual volume and detrusor contractions?"

# 准备模型推理
FastLanguageModel.for_inference(model) 

# 将问题格式化并转换为模型可以理解的输入格式
inputs = tokenizer([prompt_style.format(question, "")], return_tensors="pt").to("cuda")

# 生成回答
outputs = model.generate(
    # 输入的 token IDs，来自之前 tokenizer 转换的结果
    input_ids=inputs.input_ids,    
    # 用于标记哪些 token 需要注意，避免模型在无关区域做出不必要的计算。
    attention_mask=inputs.attention_mask,   
    # 生成的最大 token 数量为 1200，这意味着模型最多会生成 1200 个 token 的回答
    max_new_tokens=1200,    
    # 启用缓存，优化推理速度。
    use_cache=True,    
)

In [7]:
response = tokenizer.batch_decode(outputs)    # 将生成的 token IDs 转换为文本（字符串）
print(response[0].split("### Response:")[1])


<think>
Okay, so I have this medical case to think through. Let's see, the patient is a 61-year-old woman with a history of involuntary urine loss when she coughs or sneezes, but she doesn't leak at night. She's had a gynecological exam and a Q-tip test. I need to figure out what cystometry would show regarding her residual volume and detrusor contractions.

First, I should break down the information given. The patient has urinary incontinence, specifically during activities that involve coughing or sneezing. That makes me think of stress urinary incontinence, which is usually due to the urethral sphincter not closing properly during these activities. But since she doesn't leak at night, it's less likely to be a case of genuine stress incontinence, which often affects women during sleep.

Now, she underwent a gynecological exam. I'm assuming that this exam might have included a pelvic exam, possibly looking for pelvic floor muscle tone, bladder size, and other structural issues. The Q

In [8]:
train_prompt_style = """Below is an instruction that describes a task, paired with an input that provides further context. 
Write a response that appropriately completes the request. 
Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.

### Instruction:
You are a medical expert with advanced knowledge in clinical reasoning, diagnostics, and treatment planning. 
Please answer the following medical question. 

### Question:
{}

### Response:
<think>
{}
</think>
{}"""

In [9]:
EOS_TOKEN = tokenizer.eos_token  # 结束标记（End of Sequence）

def formatting_prompts_func(examples):
    inputs = examples["Question"]     # 取出问题
    cots = examples["Complex_CoT"]    # 取出复杂推理过程（CoT）
    outputs = examples["Response"]    # 取出回答
    texts = []    # 存储格式化后的文本
    
    for input, cot, output in zip(inputs, cots, outputs):
        text = train_prompt_style.format(input, cot, output) + EOS_TOKEN
        texts.append(text)
        
    return {
        "text": texts,
    }

In [10]:
from datasets import load_dataset

# 加载数据集
dataset = load_dataset("FreedomIntelligence/medical-o1-reasoning-SFT","en", split = "train[0:500]",trust_remote_code=True)

# 应用自定义的格式化函数
dataset = dataset.map(formatting_prompts_func, batched = True,)

# 查看处理后的数据集的第一个文本样本
dataset["text"][0]

README.md:   0%|          | 0.00/1.65k [00:00<?, ?B/s]

medical_o1_sft.json:   0%|          | 0.00/74.1M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/25371 [00:00<?, ? examples/s]

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

"Below is an instruction that describes a task, paired with an input that provides further context. \nWrite a response that appropriately completes the request. \nBefore answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.\n\n### Instruction:\nYou are a medical expert with advanced knowledge in clinical reasoning, diagnostics, and treatment planning. \nPlease answer the following medical question. \n\n### Question:\nA 61-year-old woman with a long history of involuntary urine loss during activities like coughing or sneezing but no leakage at night undergoes a gynecological exam and Q-tip test. Based on these findings, what would cystometry most likely reveal about her residual volume and detrusor contractions?\n\n### Response:\n<think>\nOkay, let's think about this step by step. There's a 61-year-old woman here who's been dealing with involuntary urine leakages whenever she's doing something that ups her ab

In [13]:
model = FastLanguageModel.get_peft_model(
    model,    # 需要进行 LoRA 微调 的 LLM
    r=16,    # LoRA 低秩矩阵的秩值，决定参数更新规模（推荐 8 或 16）
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
    ],
    lora_alpha=16,    # LoRA scaling factor（影响学习率）
    lora_dropout=0,     # oRA dropout（0 代表不开启）
    bias="none",      # 是否对 LoRA 进行偏置微调（none 表示不训练偏置）
    use_gradient_checkpointing="unsloth",  # 开启梯度检查点，节省显存（用于长上下文训练）
    random_state=3407,    # 设置随机种子，保证实验结果可复现
    use_rslora=False,    # 是否使用 r-sLoRA（更高效的 LoRA 变体），False 代表不用  
    loftq_config=None,    # 是否使用 LoftQ（量化 LoRA），None 代表关闭
)

Unsloth 2025.2.15 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


In [20]:
pip install unsloth==2024.12.12

Collecting unsloth==2024.12.12
  Downloading unsloth-2024.12.12-py3-none-any.whl.metadata (60 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.0/60.0 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
Downloading unsloth-2024.12.12-py3-none-any.whl (175 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m175.7/175.7 kB[0m [31m6.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: unsloth
  Attempting uninstall: unsloth
    Found existing installation: unsloth 2025.2.15
    Uninstalling unsloth-2025.2.15:
      Successfully uninstalled unsloth-2025.2.15
Successfully installed unsloth-2024.12.12
Note: you may need to restart the kernel to use updated packages.


In [23]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,    # 负责 分词（保证训练数据格式正确）
    train_dataset=dataset,    # 训练数据集（预处理后的 Hugging Face Dataset）
    dataset_text_field="text",    # 训练数据的 文本字段（例如 "text"）
    max_seq_length=max_seq_length,    # 最大序列长度（如 2048）
    dataset_num_proc=2,    # 数据预处理的 CPU 进程数（加速数据加载）
    args=TrainingArguments(    # 训练参数
        per_device_train_batch_size=2,    # 每块 GPU 的 batch size（小显存建议 2-4）
        gradient_accumulation_steps=4,    # 梯度累积，相当于 batch size 放大 4x
        warmup_steps=5,    # 预热步数，前 5 步学习率从 0 线性增加
        max_steps=60,    # 训练 60 步（适用于快速测试）
        learning_rate=2e-4,    # 默认 2e-4（LoRA 一般 2e-4 ~ 1e-5）
        fp16=not is_bfloat16_supported(),    # NVIDIA A100/H100/4090 支持，训练更稳定
        bf16=is_bfloat16_supported(),    # T4/V100/2080 不支持 BF16，改用 FP16
        logging_steps=10,    # 每 10 步打印训练日志
        optim="adamw_8bit",    # 使用 8-bit AdamW，减少显存占用
        weight_decay=0.01,    # L2 正则化，防止过拟合
        lr_scheduler_type="linear",    # 线性学习率衰减
        seed=3407,    # 设定随机种子，保证实验可复现
        output_dir="outputs",    # 模型 & 训练日志保存路径
    ),
)

AttributeError: _unwrapped_old_generate

In [22]:
trainer_stats = trainer.train()

NameError: name 'trainer' is not defined

In [None]:
question = "A 61-year-old woman with a long history of involuntary urine loss during activities like coughing or sneezing but no leakage at night undergoes a gynecological exam and Q-tip test. Based on these findings, what would cystometry most likely reveal about her residual volume and detrusor contractions?"

# 准备模型推理
FastLanguageModel.for_inference(model) 

# 将问题格式化并转换为模型可以理解的输入格式
inputs = tokenizer([prompt_style.format(question, "")], return_tensors="pt").to("cuda")

# 生成回答
outputs = model.generate(
    # 输入的 token IDs，来自之前 tokenizer 转换的结果
    input_ids=inputs.input_ids,    
    # 用于标记哪些 token 需要注意，避免模型在无关区域做出不必要的计算。
    attention_mask=inputs.attention_mask,   
    # 生成的最大 token 数量为 1200，这意味着模型最多会生成 1200 个 token 的回答
    max_new_tokens=1200,    
    # 启用缓存，优化推理速度。
    use_cache=True,    
)

# 解码生成的回答
response = tokenizer.batch_decode(outputs)    # 将生成的 token IDs 转换为文本（字符串）
print(response[0].split("### Response:")[1])

In [24]:
del model

In [25]:
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/DeepSeek-R1-Distill-Llama-8B",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    token = hf_token, 
)

==((====))==  Unsloth 2025.2.15: Fast Llama patching. Transformers: 4.49.0.
   \\   /|    GPU: Tesla T4. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [26]:
FastLanguageModel.for_training(model)

model = FastLanguageModel.get_peft_model(
    model,
    r=16,  
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
    ],
    lora_alpha=16,
    lora_dropout=0,  
    bias="none",  
    use_gradient_checkpointing="unsloth",  # True or "unsloth" for very long context
    random_state=3407,
    use_rslora=False,  
    loftq_config=None,
)

In [28]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,    # 负责 分词（保证训练数据格式正确）
    train_dataset=dataset,    # 训练数据集（预处理后的 Hugging Face Dataset）
    dataset_text_field="text",    # 训练数据的 文本字段（例如 "text"）
    max_seq_length=max_seq_length,    # 最大序列长度（如 2048）
    dataset_num_proc=2,    # 数据预处理的 CPU 进程数（加速数据加载）
    args=TrainingArguments(    # 训练参数
        per_device_train_batch_size=2,    # 每块 GPU 的 batch size（小显存建议 2-4）
        gradient_accumulation_steps=4,    # 梯度累积，相当于 batch size 放大 4x
        warmup_steps=5,    # 预热步数，前 5 步学习率从 0 线性增加
        max_steps=60,    # 训练 60 步（适用于快速测试）
        learning_rate=2e-4,    # 默认 2e-4（LoRA 一般 2e-4 ~ 1e-5）
        fp16=not is_bfloat16_supported(),    # NVIDIA A100/H100/4090 支持，训练更稳定
        bf16=is_bfloat16_supported(),    # T4/V100/2080 不支持 BF16，改用 FP16
        logging_steps=10,    # 每 10 步打印训练日志
        optim="adamw_8bit",    # 使用 8-bit AdamW，减少显存占用
        weight_decay=0.01,    # L2 正则化，防止过拟合
        lr_scheduler_type="linear",    # 线性学习率衰减
        seed=3407,    # 设定随机种子，保证实验可复现
        output_dir="outputs",    # 模型 & 训练日志保存路径
    ),
)

trainer_stats = trainer.train()

Converting train dataset to ChatML (num_proc=2):   0%|          | 0/500 [00:00<?, ? examples/s]

Applying chat template to train dataset (num_proc=2):   0%|          | 0/500 [00:00<?, ? examples/s]

Tokenizing train dataset (num_proc=2):   0%|          | 0/500 [00:00<?, ? examples/s]

Truncating train dataset (num_proc=2):   0%|          | 0/500 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 500 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 60
 "-____-"     Number of trainable parameters = 41,943,040


Step,Training Loss
10,1.9189
20,1.4615
30,1.4023
40,1.3088
50,1.3443
60,1.314


In [29]:
question = "A 61-year-old woman with a long history of involuntary urine loss during activities like coughing or sneezing but no leakage at night undergoes a gynecological exam and Q-tip test. Based on these findings, what would cystometry most likely reveal about her residual volume and detrusor contractions?"

# 准备模型推理
FastLanguageModel.for_inference(model) 

# 将问题格式化并转换为模型可以理解的输入格式
inputs = tokenizer([prompt_style.format(question, "")], return_tensors="pt").to("cuda")

# 生成回答
outputs = model.generate(
    # 输入的 token IDs，来自之前 tokenizer 转换的结果
    input_ids=inputs.input_ids,    
    # 用于标记哪些 token 需要注意，避免模型在无关区域做出不必要的计算。
    attention_mask=inputs.attention_mask,   
    # 生成的最大 token 数量为 1200，这意味着模型最多会生成 1200 个 token 的回答
    max_new_tokens=1200,    
    # 启用缓存，优化推理速度。
    use_cache=True,    
)

# 解码生成的回答
response = tokenizer.batch_decode(outputs)    # 将生成的 token IDs 转换为文本（字符串）
print(response[0].split("### Response:")[1])


<think>
Okay, so let's think about this. We have a 61-year-old woman who's been dealing with involuntary urine loss during things like coughing or sneezing, but she's not leaking at night. That suggests she might have some kind of problem with her pelvic floor muscles or maybe her bladder.

Now, she's got a gynecological exam and a Q-tip test. Let's break that down. The Q-tip test is usually used to check for urethral obstruction. If it's positive, that means there's something blocking the urethra, like a urethral stricture or something else.

Given that she's had a positive Q-tip test, it's likely there's a urethral obstruction. That would mean her urethra is narrow, maybe due to a stricture or some kind of narrowing. So, her bladder can't empty properly during activities like coughing because the urethral obstruction is making it hard.

Now, let's think about what happens when her bladder can't empty. If there's a urethral obstruction, the bladder is forced to hold more urine, incre

In [30]:
new_model_local = "DeepSeek-R1-Medical"
model.save_pretrained(new_model_local) 
tokenizer.save_pretrained(new_model_local)

model.save_pretrained_merged(new_model_local, tokenizer, save_method = "merged_16bit",)

Unsloth: You have 2 CPUs. Using `safe_serialization` is 10x slower.
We shall switch to Pytorch saving, which might take 3 minutes and not 30 minutes.
To force `safe_serialization`, set it to `None` instead.
Unsloth: Kaggle/Colab has limited disk space. We need to delete the downloaded
model which will save 4-16GB of disk space, allowing you to save on Kaggle/Colab.
Unsloth: Will remove a cached repo with size 6.0G


Unsloth: Merging 4bit and LoRA weights to 16bit...
Unsloth: Will use up to 18.45 out of 31.35 RAM for saving.
Unsloth: Saving model... This might take 5 minutes ...


 31%|███▏      | 10/32 [00:00<00:01, 14.24it/s]
We will save to Disk and not RAM now.
100%|██████████| 32/32 [00:25<00:00,  1.25it/s]


Unsloth: Saving tokenizer... Done.
Unsloth: Saving DeepSeek-R1-Medical/pytorch_model-00001-of-00004.bin...
Unsloth: Saving DeepSeek-R1-Medical/pytorch_model-00002-of-00004.bin...
Unsloth: Saving DeepSeek-R1-Medical/pytorch_model-00003-of-00004.bin...
Unsloth: Saving DeepSeek-R1-Medical/pytorch_model-00004-of-00004.bin...
Done.


In [None]:
new_model_online = "luyou042/DeepSeek-R1-Medical-demo"
model.push_to_hub(new_model_online)
tokenizer.push_to_hub(new_model_online)

model.push_to_hub_merged(new_model_online, tokenizer, save_method = "merged_16bit")

README.md:   0%|          | 0.00/627 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.56k [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/168M [00:00<?, ?B/s]

Saved model to https://huggingface.co/luyou042/DeepSeek-R1-Medical-demo


tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

Unsloth: You are pushing to hub in Kaggle environment.
To save memory, we shall move luyou042/DeepSeek-R1-Medical-demo to /tmp/DeepSeek-R1-Medical-demo
Unsloth: Will remove a cached repo with size 1.6K


Unsloth: Merging 4bit and LoRA weights to 16bit...
Unsloth: Will use up to 18.41 out of 31.35 RAM for saving.
Unsloth: Saving model... This might take 5 minutes ...


100%|██████████| 32/32 [00:24<00:00,  1.31it/s]


Unsloth: Saving tokenizer...

No files have been modified since last commit. Skipping to prevent empty commit.


 Done.
Unsloth: Saving /tmp/DeepSeek-R1-Medical-demo/pytorch_model-00001-of-00004.bin...
Unsloth: Saving /tmp/DeepSeek-R1-Medical-demo/pytorch_model-00002-of-00004.bin...
Unsloth: Saving /tmp/DeepSeek-R1-Medical-demo/pytorch_model-00003-of-00004.bin...
Unsloth: Saving /tmp/DeepSeek-R1-Medical-demo/pytorch_model-00004-of-00004.bin...
