diff --git a/config/dummy.json b/config/dummy.json index f446a497..fa3d5f27 100644 --- a/config/dummy.json +++ b/config/dummy.json @@ -9,9 +9,9 @@ { "name": "lora_0", "optim": "adamw", - "lr": 3e-4, + "lr": 1e-4, "batch_size": 64, - "micro_batch_size": 32, + "micro_batch_size": 16, "test_batch_size": 64, "num_epochs": 20, "use_dora": true, @@ -35,9 +35,9 @@ { "name": "lora_1", "optim": "adamw", - "lr": 3e-4, + "lr": 1e-4, "batch_size": 64, - "micro_batch_size": 32, + "micro_batch_size": 16, "test_batch_size": 64, "num_epochs": 20, "r": 8, diff --git a/mlora/__init__.py b/mlora/__init__.py index b190501c..79ddfb65 100644 --- a/mlora/__init__.py +++ b/mlora/__init__.py @@ -22,7 +22,7 @@ assert is_package_available( "torch", "2.1.2"), "m-LoRA requires torch>=2.1.2" assert is_package_available( - "transformers", "4.38.2"), "m-LoRA requires transformers>=4.38.2" + "transformers", "4.40.0"), "m-LoRA requires transformers>=4.40.0" setup_logging() diff --git a/mlora/tokenizer.py b/mlora/tokenizer.py index 2a3380f4..5f9bcc04 100644 --- a/mlora/tokenizer.py +++ b/mlora/tokenizer.py @@ -3,6 +3,8 @@ from transformers import AutoTokenizer from typing import List, Union +import logging + class Tokenizer: def __init__(self, model_path: str): @@ -15,6 +17,12 @@ def __init__(self, model_path: str): # maybe pad id is unk if self.pad_id_ is None and self.unk_id_ is not None: self.pad_id_ = self.unk_id_ + if self.pad_id_ is None and self.eos_id_ is not None: + self.pad_id_ = self.eos_id_ + logging.warn("Padding token ID is None, setting to .") + else: + raise ValueError( + "Can not set padding token id. and are None.") def encode(self, data: Union[str, List[str]], add_special_tokens: bool = True) -> Tokens: if isinstance(data, str): diff --git a/pyproject.toml b/pyproject.toml index b665f91a..e1272ce5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "mlora" -version = "0.3.0" +version = "0.3.0.post1" description = "A tool for fine-tuning large language models (LLMs) using the LoRA or QLoRA methods more efficiently." readme = "README.md" requires-python = ">=3.10" @@ -18,7 +18,7 @@ dependencies = [ "datasets", "evaluate", "accelerate", - "transformers==4.38.2", + "transformers>=4.40.0", "sentencepiece", "huggingface_hub", "scikit-learn", diff --git a/requirements.txt b/requirements.txt index 9cb10df1..676a45ca 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ torch>=2.1.2 datasets evaluate accelerate -transformers==4.38.2 +transformers==4.40.0 sentencepiece huggingface_hub scikit-learn