In [1]:
%%capture
!pip install unsloth
!pip install --force-reinstall --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git


In [2]:
from google.colab import drive
drive.mount('/content/drive')

secret_file_path = '/content/drive/My Drive/secrets/huggingfacetoken.key'
with open(secret_file_path) as f:
  hf_token=f.read().strip()

Mounted at /content/drive


In [3]:
secret_file_path = '/content/drive/My Drive/secrets/wandb.key'

with open(secret_file_path) as f:
  wb_token=f.read().strip()

In [4]:
from huggingface_hub import login
#user_secrets = UserSecretsClient()

#hf_token = user_secrets.get_secret("HUGGINGFACE_TOKEN")
login(hf_token)

In [6]:
import wandb
import os
os.environ["WANDB_SILENT"] = "true"

wandb.login(key=wb_token)
run = wandb.init(
    project='Fine-tune-DeepSeek-R1-Distill-Llama-8B on Medical COT Dataset 2',
    job_type="training",
    anonymous="allow"
)

In [7]:
from unsloth import FastLanguageModel

max_seq_length = 2048
dtype = None
load_in_4bit = True


model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/DeepSeek-R1-Distill-Llama-8B",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    token = hf_token,
)

ü¶• Unsloth: Will patch your computer to enable 2x faster free finetuning.
ü¶• Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2025.3.14: Fast Llama patching. Transformers: 4.48.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/5.96G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/236 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/53.0k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

In [8]:
prompt_style = """Below is an instruction that describes a task, paired with an input that provides further context.
Write a response that appropriately completes the request.

### Instruction:
Translate standard Finnish sentence to South Ostrobothnian dialect.

### Question:
{}

### Response:
<think>{}"""

In [9]:
question = "Hevoskauppias ajoi uudella hevosellaan meid√§n talon ohi. Katsoin h√§nt√§ aitan ovelta."

FastLanguageModel.for_inference(model)
inputs = tokenizer([prompt_style.format(question, "")], return_tensors="pt").to("cuda")

outputs = model.generate(
    input_ids=inputs.input_ids,
    attention_mask=inputs.attention_mask,
    max_new_tokens=1200,
    use_cache=True,
)
response = tokenizer.batch_decode(outputs)
print(response[0].split("### Response:")[1])


<think>
Okay, so I need to translate this Finnish sentence into the South Ostrobothnian dialect. First, I should understand what the original sentence is saying. The sentence is: "Hevoskauppias ajoi uudella hevosellaan meid√§n talon ohi. Katsoin h√§nt√§ aitan ovelta."

Breaking it down, "Hevoskauppias" is the word for a horse dealer or someone who deals with horses. "Ajoi" means he drove or moved. "Uudella hevosellaan" is about a new horse. "Meid√§n talon ohi" translates to around our house or near our house. The second sentence, "Katsoin h√§nt√§ aitan ovelta," means I looked at him from the house door.

Now, translating to South Ostrobothnian dialect. I know that dialects often have unique terms. For example, "hevoskauppias" might be "hevostenkauppias" in the dialect, but I'm not entirely sure. "Ajoi" is likely the same. "Uudella" might be "uuteen" or "uudelle." "Hevosellaan" would probably stay as "hevosellaan."

"Meid√§n talon ohi" could become "meid√§n talon kohi" or "meid√§n talo

In [10]:
train_prompt_style = """Below is an instruction that describes a task, paired with an input that provides further context.
Write a response that appropriately completes the request.

### Instruction:
Translate standard Finnish sentence to South Ostrobothnian dialect.

### Question:
{}

### Response:
<think>
{}
</think>
{}"""

In [11]:
train_prompt_vocabulary_style = """Below is an instruction that describes a task, paired with an input that provides further context.
Write a response that appropriately completes the request.

### Instruction:
Translate the following word from standard Finnish to South Ostrobothnian dialect.

### Word:
{}

### Dialect Equivalent:
<think>
{}
</think>
{}"""

In [29]:
EOS_TOKEN = tokenizer.eos_token  # Must add EOS_TOKEN


def formatting_prompts_func(examples):
    inputs = examples["Question"]
    cots = examples["Complex_CoT"]
    outputs = examples["Response"]
    texts = []
    for input, cot, output in zip(inputs, cots, outputs):
        text = train_prompt_style.format(input, cot, output) + EOS_TOKEN
        texts.append(text)
    return {
        "text": texts,
    }

In [30]:
def formatting_prompts_vocab_func(examples):
    inputs = examples["Question"]
    cots = examples["Complex_CoT"]
    outputs = examples["Response"]
    texts = []
    for input, cot, output in zip(inputs, cots, outputs):
        text = train_prompt_vocabulary_style.format(input, cot, output) + EOS_TOKEN
        texts.append(text)
    return {
        "text": texts,
    }

In [13]:
#import json
from datasets import load_dataset
data_file_path = '/content/drive/My Drive/data/train_set_deep_seek.json'


#with open(data_file_path) as f:
#  dataset = json.load(f)

dataset0 = load_dataset('json',data_files=data_file_path, split = "train[0:350]",trust_remote_code=True)

dataset0 = dataset0.map(formatting_prompts_func, batched = True,)
dataset0["Question"][0]

Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/350 [00:00<?, ? examples/s]

'Ensimm√§isen kerran p√§√§simme elokuviin seuratalolle kun osuuskaupan kokouksessa n√§ytettiin kuvia eri tehtailta'

In [31]:
data_file_path = '/content/drive/My Drive/data/vocabulary_deep_seek_format.json'


#with open(data_file_path) as f:
#  dataset = json.load(f)

dataset1 = load_dataset('json',data_files=data_file_path, split = "train[:-1]",trust_remote_code=True)

dataset1 = dataset1.map(formatting_prompts_vocab_func, batched = True,)

Map:   0%|          | 0/2725 [00:00<?, ? examples/s]

In [32]:
from datasets import concatenate_datasets
dataset=concatenate_datasets([dataset0, dataset1])
print(dataset)

Dataset({
    features: ['Question', 'Complex_CoT', 'Response', 'text'],
    num_rows: 3075
})


In [33]:
#from datasets import load_dataset
#dataset = load_dataset("FreedomIntelligence/medical-o1-reasoning-SFT","en", split = "train[0:500]",trust_remote_code=True)
print(type(dataset))
#dataset = dataset.map(formatting_prompts_func, batched = True,)
#print(type(dataset))

<class 'datasets.arrow_dataset.Dataset'>


In [34]:
print(dataset['text'][-10])

Below is an instruction that describes a task, paired with an input that provides further context.
Write a response that appropriately completes the request.

### Instruction:
Translate the following word from standard Finnish to South Ostrobothnian dialect.

### Word:
min√§

### Dialect Equivalent:
<think>

</think>
m√§<ÔΩúend‚ñÅof‚ñÅsentenceÔΩú>


In [35]:
model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
    ],
    lora_alpha=16,
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing="unsloth",  # True or "unsloth" for very long context
    random_state=3407,
    use_rslora=False,
    loftq_config=None,
)

Unsloth: Already have LoRA adapters! We shall skip this step.


In [36]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    dataset_num_proc=2,
    args=TrainingArguments(
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,
        # Use num_train_epochs = 1, warmup_ratio for full training runs!
        warmup_steps=5,
        max_steps=60,
        learning_rate=2e-4,
        fp16=not is_bfloat16_supported(),
        bf16=is_bfloat16_supported(),
        logging_steps=10,
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="linear",
        seed=3407,
        output_dir="outputs",
    ),
)

Unsloth: Tokenizing ["text"] (num_proc=2):   0%|          | 0/3075 [00:00<?, ? examples/s]

In [37]:
!nvidia-smi
trainer_stats = trainer.train()

Sat Mar 15 19:07:00 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   50C    P0             28W /   70W |    6960MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 3,075 | Num Epochs = 1 | Total steps = 60
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 41,943,040/8,000,000,000 (0.52% trained)


Step,Training Loss
10,0.8899
20,0.44
30,0.5558
40,0.534
50,0.5625
60,0.5695


In [38]:
question = "Hevoskauppias ajoi uudella hevosellaan meid√§n talon ohi. Katsoin h√§nt√§ aitan ovelta."


FastLanguageModel.for_inference(model)  # Unsloth has 2x faster inference!
inputs = tokenizer([prompt_style.format(question, "")], return_tensors="pt").to("cuda")

outputs = model.generate(
    input_ids=inputs.input_ids,
    attention_mask=inputs.attention_mask,
    max_new_tokens=1200,
    use_cache=True,
)
response = tokenizer.batch_decode(outputs)
print(response[0].split("### Response:")[1])



<think>

</think>
Hevooskauppias ajoo uudella hevoosellaan meidan talon ohi. Katsoin h√§nt√§ aitaan ovelta.<ÔΩúend‚ñÅof‚ñÅsentenceÔΩú>


In [None]:
import shutil
new_model_local = "DeepSeek-R1-Medical-COT"
model.save_pretrained(new_model_local)
tokenizer.save_pretrained(new_model_local)

model.save_pretrained_merged(new_model_local, tokenizer, save_method = "merged_16bit",)




Unsloth: You have 1 CPUs. Using `safe_serialization` is 10x slower.
We shall switch to Pytorch saving, which might take 3 minutes and not 30 minutes.
To force `safe_serialization`, set it to `None` instead.
Unsloth: Kaggle/Colab has limited disk space. We need to delete the downloaded
model which will save 4-16GB of disk space, allowing you to save on Kaggle/Colab.
Unsloth: Will remove a cached repo with size 6.0G


Unsloth: Merging 4bit and LoRA weights to 16bit...
Unsloth: Will use up to 3.73 out of 12.67 RAM for saving.
Unsloth: Saving model... This might take 5 minutes ...


 34%|‚ñà‚ñà‚ñà‚ñç      | 11/32 [00:00<00:01, 12.91it/s]
We will save to Disk and not RAM now.
100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 32/32 [04:04<00:00,  7.64s/it]


Unsloth: Saving tokenizer... Done.
Unsloth: Saving DeepSeek-R1-Medical-COT/pytorch_model-00001-of-00004.bin...
Unsloth: Saving DeepSeek-R1-Medical-COT/pytorch_model-00002-of-00004.bin...
Unsloth: Saving DeepSeek-R1-Medical-COT/pytorch_model-00003-of-00004.bin...
Unsloth: Saving DeepSeek-R1-Medical-COT/pytorch_model-00004-of-00004.bin...
Done.


In [None]:
!ls -l

In [None]:
!ls -l ./DeepSeek-R1-Medical-COT
shutil.make_archive("deepseek_model", 'zip', new_model_local)


total 15865040
-rw-r--r-- 1 root root        824 Mar 15 11:09 adapter_config.json
-rw-r--r-- 1 root root  167832240 Mar 15 11:09 adapter_model.safetensors
-rw-r--r-- 1 root root       1000 Mar 15 11:13 config.json
-rw-r--r-- 1 root root        231 Mar 15 11:13 generation_config.json
-rw-r--r-- 1 root root 4976718466 Mar 15 11:15 pytorch_model-00001-of-00004.bin
-rw-r--r-- 1 root root 4999826886 Mar 15 11:17 pytorch_model-00002-of-00004.bin
-rw-r--r-- 1 root root 4915939082 Mar 15 11:20 pytorch_model-00003-of-00004.bin
-rw-r--r-- 1 root root 1168140873 Mar 15 11:20 pytorch_model-00004-of-00004.bin
-rw-r--r-- 1 root root      23950 Mar 15 11:20 pytorch_model.bin.index.json
-rw-r--r-- 1 root root       5127 Mar 15 11:09 README.md
-rw-r--r-- 1 root root        483 Mar 15 11:13 special_tokens_map.json
-rw-r--r-- 1 root root      52956 Mar 15 11:13 tokenizer_config.json
-rw-r--r-- 1 root root   17209530 Mar 15 11:13 tokenizer.json


In [None]:
from IPython.display import FileLink
FileLink("/kaggle/working/deepseek_model.zip")