# Model preparation notebook

In [1]:
# Start time
!date

Sat May  3 08:55:38 AM UTC 2025


## Setup

In [2]:
from getpass import getpass

def get_secret(prompt, secret_name, secret_input=True):
  try:
    from google.colab import userdata
    result = userdata.get(secret_name)
    assert result is not None
  except:
    if secret_input:
      result = getpass(prompt)
    else:
      result = input(prompt)
  return result


In [3]:
%%capture
import os
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    # Do this only in Colab notebooks! Otherwise use pip install unsloth
    !pip install --no-deps bitsandbytes accelerate xformers==0.0.29.post3 peft trl==0.15.2 triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf datasets huggingface_hub hf_transfer
    !pip install --no-deps unsloth
    !pip install -U transformers

from unsloth import FastLanguageModel  # Load unsloth ASAP

## Download pre-trained checkpoint

In [4]:
import os

if (
    (not os.path.exists(os.path.expanduser("~/.ssh/id_rsa")))
    or (not os.path.exists(os.path.expanduser("~/.ssh/known_hosts")))
):
  SSHKEY = get_secret('Result pusher SSH key: ', 'TFM_SSH_PUSHER_KEY')

  !mkdir ~/.ssh

  # Read locally with `cat ~/.ssh/result-pusher|tr '\n' '$';echo`
  with open(os.path.expanduser("~/.ssh/id_rsa"), 'wt') as f:
    f.write(SSHKEY.replace('$', '\n'))

  !chmod 0600 ~/.ssh/id_rsa
  !ssh-keygen -y -f ~/.ssh/id_rsa > ~/.ssh/id_rsa.pub
  !chmod 0600 ~/.ssh/id_rsa.pub

  # This won't copy the client key (not needed), but it will initialize the server's on the client
  !ssh-copy-id -i ~/.ssh/id_rsa -o StrictHostKeyChecking=accept-new result-pusher@kb.tfm.codigoparallevar.com

  del SSHKEY

/usr/bin/ssh-copy-id: INFO: Source of key(s) to be installed: "/root/.ssh/id_rsa.pub"
/usr/bin/ssh-copy-id: INFO: attempting to log in with the new key(s), to filter out any that are already installed

		(if you think this is a mistake, you may want to use -f option)



In [6]:
!rsync -HPrz --mkpath \
  result-pusher@kb.tfm.codigoparallevar.com:fine-tuning/fine-tuned/peft-kbs-summary-training-1746028477/checkpoint-300/ \
    fine-tune

receiving incremental file list
created 1 directory for fine-tune
README.md
          5,104 100%    4.87MB/s    0:00:00 (xfr#1, to-chk=12/14)
adapter_config.json
            860 100%  839.84kB/s    0:00:00 (xfr#2, to-chk=11/14)
adapter_model.safetensors
    262,219,392 100%   12.51MB/s    0:00:19 (xfr#3, to-chk=10/14)
merges.txt
        916,646 100%    1.31MB/s    0:00:00 (xfr#4, to-chk=9/14)
optimizer.pt
    133,785,108 100%   12.27MB/s    0:00:10 (xfr#5, to-chk=8/14)
rng_state.pth
         14,244 100%   16.66kB/s    0:00:00 (xfr#6, to-chk=7/14)
scheduler.pt
          1,064 100%    1.24kB/s    0:00:00 (xfr#7, to-chk=6/14)
special_tokens_map.json
            456 100%    0.53kB/s    0:00:00 (xfr#8, to-chk=5/14)
tokenizer.json
      7,153,264 100%    6.88MB/s    0:00:00 (xfr#9, to-chk=4/14)
tokenizer_config.json
         17,987 100%   17.71kB/s    0:00:00 (xfr#10, to-chk=3/14)
trainer_state.json
          3,599 100%    3.54kB/s    0:00:00 (xfr#11, to-chk=2/14)
training_args.bin
         

## Load pre-trained model

In [None]:
from peft import PeftModel
import torch

device_map = {"": 0}


model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Phi-4",
    max_seq_length = 16384, # max_seq_length,
    load_in_4bit = True, # load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

ft_model = PeftModel.from_pretrained(model, "fine-tune",torch_dtype=torch.float16,is_trainable=False)

==((====))==  Unsloth 2025.4.7: Fast Llama patching. Transformers: 4.51.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors.index.json:   0%|          | 0.00/160k [00:00<?, ?B/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

## Save to GGUF

In [None]:
ft_model.save_pretrained_gguf("model", tokenizer, quantization_method = [ "f16", "q4_k_m"])