update peft config.

shibing624 · Apr 21, 2023 · 633e376 · 633e376
1 parent 20244f3
commit 633e376
Show file tree

Hide file tree

Showing 3 changed files with 3 additions and 17 deletions.
diff --git a/textgen/chatglm/chatglm_model.py b/textgen/chatglm/chatglm_model.py
@@ -234,7 +234,7 @@ def train_model(
                 if os.path.exists(checkpoint_name):
                     logger.info(f"Restarting from {checkpoint_name}")
                     adapters_weights = torch.load(checkpoint_name)
-                    self.model = set_peft_model_state_dict(self.model, adapters_weights)
+                    set_peft_model_state_dict(self.model, adapters_weights)
                 else:
                     logger.warning(f"Checkpoint {checkpoint_name} not found")
 

diff --git a/textgen/config/model_args.py b/textgen/config/model_args.py
@@ -373,7 +373,6 @@ class ChatGlmArgs(ModelArgs):
     lora_dropout = 0.05
     lora_target_modules = ["query_key_value"]
     lora_bias = "none"
-    only_lora_state_dict: bool = False
     num_train_epochs = 1
     max_steps = -1
     per_device_train_batch_size = 2
@@ -425,11 +424,10 @@ class LlamaArgs(ModelArgs):
     use_lora: bool = True
     lora_bin_name: str = field(default="adapter_model.bin")
     lora_r: int = 8
-    lora_alpha = 16
+    lora_alpha = 32
     lora_dropout = 0.05
     lora_target_modules = ["q_proj", "k_proj", "v_proj", "o_proj"]
     lora_bias = "none"
-    only_lora_state_dict: bool = True
     num_train_epochs = 3
     max_steps = -1
     per_device_train_batch_size = 2

diff --git a/textgen/llama/llama_model.py b/textgen/llama/llama_model.py
@@ -134,12 +134,8 @@ def __init__(
         if self.args.use_lora:
             self.load_lora()
 
-        # unwind broken decapoda-research config
         self.tokenizer.padding_side = "left"
         self.tokenizer.pad_token_id = 0  # unk. we want this to be different from the eos token
-        self.model.config.pad_token_id = 0  # unk
-        self.model.config.bos_token_id = 1
-        self.model.config.eos_token_id = 2
 
     def train_model(
             self,
@@ -228,7 +224,7 @@ def train_model(
                 if os.path.exists(checkpoint_name):
                     logger.info(f"Restarting from {checkpoint_name}")
                     adapters_weights = torch.load(checkpoint_name)
-                    self.model = set_peft_model_state_dict(self.model, adapters_weights)
+                    set_peft_model_state_dict(self.model, adapters_weights)
                 else:
                     logger.warning(f"Checkpoint {checkpoint_name} not found")
 
@@ -294,14 +290,6 @@ def train_model(
             data_collator=data_collator,
         )
 
-        if self.args.only_lora_state_dict:
-            old_state_dict = self.model.state_dict
-            self.model.state_dict = (
-                lambda self, *_, **__: get_peft_model_state_dict(
-                    self, old_state_dict()
-                )
-            ).__get__(self.model, type(self.model))
-
         if self.args.enable_torch_compile:
             if torch.__version__ >= "2" and sys.platform != "win32":
                 self.model = torch.compile(self.model)