Skip to content

Commit

Permalink
support llama-3 (#42)
Browse files Browse the repository at this point in the history
  • Loading branch information
mikecovlee committed Apr 23, 2024
1 parent 1d08c4e commit 0b2c539
Show file tree
Hide file tree
Showing 5 changed files with 16 additions and 8 deletions.
8 changes: 4 additions & 4 deletions config/dummy.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@
{
"name": "lora_0",
"optim": "adamw",
"lr": 3e-4,
"lr": 1e-4,
"batch_size": 64,
"micro_batch_size": 32,
"micro_batch_size": 16,
"test_batch_size": 64,
"num_epochs": 20,
"use_dora": true,
Expand All @@ -35,9 +35,9 @@
{
"name": "lora_1",
"optim": "adamw",
"lr": 3e-4,
"lr": 1e-4,
"batch_size": 64,
"micro_batch_size": 32,
"micro_batch_size": 16,
"test_batch_size": 64,
"num_epochs": 20,
"r": 8,
Expand Down
2 changes: 1 addition & 1 deletion mlora/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
assert is_package_available(
"torch", "2.1.2"), "m-LoRA requires torch>=2.1.2"
assert is_package_available(
"transformers", "4.38.2"), "m-LoRA requires transformers>=4.38.2"
"transformers", "4.40.0"), "m-LoRA requires transformers>=4.40.0"


setup_logging()
Expand Down
8 changes: 8 additions & 0 deletions mlora/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
from transformers import AutoTokenizer
from typing import List, Union

import logging


class Tokenizer:
def __init__(self, model_path: str):
Expand All @@ -15,6 +17,12 @@ def __init__(self, model_path: str):
# maybe pad id is unk
if self.pad_id_ is None and self.unk_id_ is not None:
self.pad_id_ = self.unk_id_
if self.pad_id_ is None and self.eos_id_ is not None:
self.pad_id_ = self.eos_id_
logging.warn("Padding token ID is None, setting to <eos>.")
else:
raise ValueError(
"Can not set padding token id. <eos> and <unk> are None.")

def encode(self, data: Union[str, List[str]], add_special_tokens: bool = True) -> Tokens:
if isinstance(data, str):
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "mlora"
version = "0.3.0"
version = "0.3.0.post1"
description = "A tool for fine-tuning large language models (LLMs) using the LoRA or QLoRA methods more efficiently."
readme = "README.md"
requires-python = ">=3.10"
Expand All @@ -18,7 +18,7 @@ dependencies = [
"datasets",
"evaluate",
"accelerate",
"transformers==4.38.2",
"transformers>=4.40.0",
"sentencepiece",
"huggingface_hub",
"scikit-learn",
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ torch>=2.1.2
datasets
evaluate
accelerate
transformers==4.38.2
transformers==4.40.0
sentencepiece
huggingface_hub
scikit-learn
Expand Down

0 comments on commit 0b2c539

Please sign in to comment.