# Load model with Unsloth patching

In [1]:
from unsloth import FastLanguageModel

model, tok = FastLanguageModel.from_pretrained(
    model_name="deepseek-ai/deepseek-llm-7b-base",
    max_seq_length=1024,
    load_in_4bit=True,
)
print("Loaded model in 4-bit ✅")

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
INFO 08-15 17:53:46 [__init__.py:244] Automatically detected platform cuda.
==((====))==  Unsloth 2025.7.11: Fast Llama patching. Transformers: 4.53.2. vLLM: 0.9.2.
   \\   /|    NVIDIA RTX A4000. Num GPUs = 1. Max memory: 15.724 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.0+cu126. CUDA: 8.6. CUDA Toolkit: 12.6. Triton: 3.3.0
\        /    Bfloat16 = TRUE. FA [Xformers = None. FA2 = True]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


pytorch_model.bin.index.json: 0.00B [00:00, ?B/s]

pytorch_model-00001-of-00002.bin:   0%|          | 0.00/9.97G [00:00<?, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

pytorch_model-00002-of-00002.bin:   0%|          | 0.00/3.85G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/121 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/792 [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

deepseek-ai/deepseek-llm-7b-base does not have a padding token! Will use pad_token = <|PAD_TOKEN|>.
Loaded model in 4-bit ✅


# Apply LoRa adapter

In [2]:
peft_model = FastLanguageModel.get_peft_model(
    model,
    r=8,
    lora_alpha=16,
    lora_dropout=0.05,
    target_modules=["q_proj", "v_proj"],
    bias="none",
    use_gradient_checkpointing=True,
)
print("Loaded peft model ✅")


Unsloth: Dropout = 0 is supported for fast patching. You are using dropout = 0.05.
Unsloth will patch all other layers, except LoRA matrices, causing a performance hit.
Unsloth 2025.7.11 patched 30 layers with 0 QKV layers, 0 O layers and 0 MLP layers.


Loaded peft model ✅


# Load the dataset from corpus

In [3]:
import os 

CORPUS_DIR = "/datasets/wtk_archive"
BLOCK_SIZE = 1024  # max tokens per chunk

def load_txt_corpus(directory):
    texts = []
    for filename in os.listdir(directory):
        if filename.endswith(".txt"):
            path = os.path.join(directory, filename)
            with open(path, "r", encoding="utf-8") as f:
                texts.append(f.read())
    return "\n\n".join(texts)

raw_text = load_txt_corpus(CORPUS_DIR)

print(f"Loaded dataset from corpus {CORPUS_DIR} with len {len(raw_text)} ✅")

Loaded dataset from corpus /datasets/wtk_archive with len 23520720 ✅


# Chunk into token blocks

In [4]:
from datasets import Dataset

def chunk_text_into_blocks(text, tokenizer, block_size):
    input_ids = tokenizer(text, return_tensors="pt", truncation=False)["input_ids"][0]
    chunks = []
    for i in range(0, len(input_ids) - block_size + 1, block_size):
        chunk_ids = input_ids[i:i + block_size]
        chunk_text = tokenizer.decode(chunk_ids, skip_special_tokens=True)
        chunks.append({"text": chunk_text})
    return chunks

token_chunks = chunk_text_into_blocks(raw_text, tok, BLOCK_SIZE)
dataset = Dataset.from_list(token_chunks)
print(f"Prepared {len(dataset)} training chunks ✅")

Prepared 4843 training chunks ✅


# Train using SFTTrainer

In [None]:
from trl import SFTTrainer

trainer = SFTTrainer(
    model = peft_model,
    tokenizer = tok,
    train_dataset = dataset,
    formatting_func=lambda x: x["text"],
    max_seq_length=BLOCK_SIZE,
    args = {
        "output_dir": "lora-txt-training",
        "per_device_train_batch_size": 2,
        "gradient_accumulation_steps": 4,
        "num_train_epochs": 1,
        "logging_steps": 10,
        "save_steps": 1000,
        "save_total_limit": 1,
        "fp16": True,
        "remove_unused_columns": False,
        "report_to": "none",
    },
)
trainer.train()
print("Training complete ✅")

Unsloth: Tokenizing ["text"] (num_proc=2):   0%|          | 0/4843 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 4,843 | Num Epochs = 3 | Total steps = 1,818
O^O/ \_/ \    Batch size per device = 4 | Gradient accumulation steps = 2
\        /    Data Parallel GPUs = 1 | Total batch size (4 x 2 x 1) = 8
 "-____-"     Trainable parameters = 3,932,160 of 6,914,297,856 (0.06% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
1,2.2575
2,2.3126
3,2.276
4,2.181
5,2.2176
6,2.2457
7,2.2559
8,2.2344
9,2.3554
10,2.2726


# Save training results

In [20]:
trainer.model.save_pretrained("lora-txt-training/")
tok.save_pretrained("lora-txt-training/")

print("Training results saved ✅")

('my_lora/tokenizer_config.json',
 'my_lora/special_tokens_map.json',
 'my_lora/tokenizer.json')