# Train Lora from corpus version 1
This is the first pass at training a lora

# Load model with Unsloth patching

In [2]:
from unsloth import FastLanguageModel

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="deepseek-ai/deepseek-llm-7b-base",
    max_seq_length=1024,
    load_in_4bit=True,
)

print("Loaded model in 4-bit ✅")

==((====))==  Unsloth 2025.7.11: Fast Llama patching. Transformers: 4.53.2. vLLM: 0.9.2.
   \\   /|    NVIDIA RTX A4000. Num GPUs = 1. Max memory: 15.724 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.0+cu126. CUDA: 8.6. CUDA Toolkit: 12.6. Triton: 3.3.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.30. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

deepseek-ai/deepseek-llm-7b-base does not have a padding token! Will use pad_token = <|PAD_TOKEN|>.
Loaded model in 4-bit ✅


# Apply LoRa adapter

In [2]:
peft_model = FastLanguageModel.get_peft_model(
    model,
    r=8,
    lora_alpha=16,
    lora_dropout=0.05,
    target_modules=["q_proj", "v_proj"],
    bias="none",
    use_gradient_checkpointing=True,
)
print("Loaded peft model ✅")


Unsloth: Dropout = 0 is supported for fast patching. You are using dropout = 0.05.
Unsloth will patch all other layers, except LoRA matrices, causing a performance hit.
Unsloth 2025.7.11 patched 30 layers with 0 QKV layers, 0 O layers and 0 MLP layers.


Loaded peft model ✅


# Load the dataset from corpus

In [4]:
import os 

CORPUS_DIR = "/datasets/wtk_archive_with_stops"
BLOCK_SIZE = 1024  # max tokens per chunk

def load_txt_corpus(directory):
    texts = []
    for filename in os.listdir(directory):
        if filename.endswith(".txt"):
            path = os.path.join(directory, filename)
            with open(path, "r", encoding="utf-8") as f:
                texts.append(f.read() + " " + tok.eos_token)
    return "\n\n".join(texts)

raw_text = load_txt_corpus(CORPUS_DIR)

print(f"Loaded dataset from corpus {CORPUS_DIR} with len {len(raw_text)} ✅")

Loaded dataset from corpus /datasets/wtk_archive_with_stops with len 23589635 ✅


# Chunk into token blocks

In [5]:
from datasets import Dataset

def chunk_text_into_blocks(text, tokenizer, block_size):
    input_ids = tokenizer(text, return_tensors="pt", truncation=False)["input_ids"][0]
    chunks = []
    for i in range(0, len(input_ids) - block_size + 1, block_size):
        chunk_ids = input_ids[i:i + block_size]
        chunk_text = tokenizer.decode(chunk_ids, skip_special_tokens=True)
        chunks.append({"text": chunk_text})
    return chunks

token_chunks = chunk_text_into_blocks(raw_text, tok, BLOCK_SIZE)
dataset = Dataset.from_list(token_chunks)
print(f"Prepared {len(dataset)} training chunks ✅")

Prepared 4883 training chunks ✅


# Train using SFTTrainer (version 1)

In [None]:
from trl import SFTTrainer

trainer = SFTTrainer(
    model = peft_model,
    tokenizer = tok,
    train_dataset = dataset,
    formatting_func=lambda x: x["text"],
    max_seq_length=BLOCK_SIZE,
    args = {
        "output_dir": "lora-txt-training",
        "per_device_train_batch_size": 2,
        "gradient_accumulation_steps": 4,
        "num_train_epochs": 1,
        "logging_steps": 10,
        "save_strategy": "steps",
        "save_steps": 250,
        "save_total_limit": 3,
        "fp16": True,
        "remove_unused_columns": False,
        "report_to": "none",
        "logging_steps": 10,
    },
)
trainer.train()
print("Training complete ✅")

trainer.model.save_pretrained("lora-txt-training2/")
tok.save_pretrained("lora-txt-training2/")

print("Training results saved ✅")

Unsloth: Tokenizing ["text"] (num_proc=2):   0%|          | 0/4883 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 4,883 | Num Epochs = 3 | Total steps = 1,833
O^O/ \_/ \    Batch size per device = 4 | Gradient accumulation steps = 2
\        /    Data Parallel GPUs = 1 | Total batch size (4 x 2 x 1) = 8
 "-____-"     Trainable parameters = 3,932,160 of 6,914,297,856 (0.06% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
1,2.2937
2,2.3016
3,2.3058
4,2.3718
5,2.2169
6,2.2899
7,2.2408
8,2.28
9,2.2296
10,2.2879


# Save training results
This may not be needed if the save training results is present in the training snippet.

In [6]:
trainer.model.save_pretrained("lora-txt-training/")
tok.save_pretrained("lora-txt-training/")

print("Training results saved ✅")

Training results saved ✅


# Push LoRa to Huggingface

In [2]:
from huggingface_hub import HfApi, upload_folder

repo_id = "peers-ai/deepseek-7b-my-lora1"
folder = "lora-txt-training2"  # contains adapter_config.json & adapter_model.bin

api = HfApi()
# create the repo if it doesn't exist
api.create_repo(repo_id, repo_type="model", private=True, exist_ok=True)

# upload all files in the folder
upload_folder(
    repo_id=repo_id,
    folder_path=folder,
    repo_type="model",
)
print(f"✅ Uploaded to https://huggingface.co/{repo_id}")


Processing Files (0 / 0)                : |          |  0.00B /  0.00B            

New Data Upload                         : |          |  0.00B /  0.00B            

  ...training2/adapter_model.safetensors:  90%|######### | 14.2MB / 15.7MB            

✅ Uploaded to https://huggingface.co/peers-ai/deepseek-7b-my-lora1
