Merge commit 'b681b0ca7e20d90b3e42b3232e5bf49a8312252a' into feat/cha…

…nge_save_to_peft_format * commit 'b681b0ca7e20d90b3e42b3232e5bf49a8312252a': add system in swift infer (modelscope#508) Support ckpt converting to peft format (modelscope#505) # Conflicts: # swift/tuners/base.py # tests/tuners/test_swift_base.py
tastelikefeet · Mar 7, 2024 · 3310361 · 3310361
2 parents 0619802 + b681b0c
commit 3310361
Show file tree

Hide file tree

Showing 6 changed files with 37 additions and 12 deletions.
diff --git a/swift/llm/infer.py b/swift/llm/infer.py
@@ -242,6 +242,8 @@ def llm_infer(args: InferArguments) -> None:
         input_mode: Literal['S', 'M'] = 'S'
         logger.info('Input `exit` or `quit` to exit the conversation.')
         logger.info('Input `multi-line` to switch to multi-line input mode.')
+        logger.info(
+            'Input `reset-system` to reset the system and clear the history.')
         if template.support_multi_round:
             logger.info('Input `clear` to clear the history.')
         else:
@@ -252,11 +254,19 @@ def llm_infer(args: InferArguments) -> None:
         if args.infer_media_type != 'none':
             logger.info('Please enter the conversation content first, '
                         'followed by the path to the multimedia file.')
+        system = None
+        read_system = False
         while True:
             if input_mode == 'S':
-                query = input('<<< ')
+                addi_prompt = ''
+                if read_system:
+                    addi_prompt = '[S]'
+                query = input(f'<<<{addi_prompt} ')
             else:
-                query = read_multi_line()
+                addi_prompt = '[M]'
+                if read_system:
+                    addi_prompt = '[MS]'
+                query = read_multi_line(addi_prompt)
             if query.strip().lower() in {'exit', 'quit'}:
                 break
             elif query.strip().lower() == 'clear':
@@ -265,6 +275,13 @@ def llm_infer(args: InferArguments) -> None:
                 continue
             elif query.strip() == '':
                 continue
+            elif query.strip().lower() == 'reset-system':
+                read_system = True
+                continue
+            if read_system:
+                system = query
+                read_system = False
+                continue
             if input_mode == 'S' and query.strip().lower() == 'multi-line':
                 input_mode = 'M'
                 logger.info('End multi-line input with `#`.')
@@ -279,7 +296,11 @@ def llm_infer(args: InferArguments) -> None:
                 infer_kwargs = {}
             read_media_file(infer_kwargs, args.infer_media_type)
             if args.infer_backend == 'vllm':
-                request_list = [{'query': query, 'history': history}]
+                request_list = [{
+                    'query': query,
+                    'history': history,
+                    'system': system
+                }]
                 if args.stream:
                     gen = inference_stream_vllm(llm_engine, template,
                                                 request_list)
@@ -300,7 +321,7 @@ def llm_infer(args: InferArguments) -> None:
             else:
                 if args.stream:
                     gen = inference_stream(model, template, query, history,
-                                           **infer_kwargs)
+                                           system, **infer_kwargs)
                     print_idx = 0
                     for response, new_history in gen:
                         if len(response) > print_idx:
@@ -309,7 +330,8 @@ def llm_infer(args: InferArguments) -> None:
                     print()
                 else:
                     response, new_history = inference(model, template, query,
-                                                      history, **infer_kwargs)
+                                                      history, system,
+                                                      **infer_kwargs)
                     print(response)
             print('-' * 50)
             obj = {
@@ -366,6 +388,8 @@ def llm_infer(args: InferArguments) -> None:
                 history = data.get('history')
                 system = data.get('system')
                 images = data.get('images')
+                if args.verbose and system is not None:
+                    print(f'[SYSTEM]{system}')
                 if history is not None:
                     kwargs['history'] = history
                 if system is not None:
@@ -375,7 +399,7 @@ def llm_infer(args: InferArguments) -> None:
                 if args.infer_backend == 'vllm':
                     assert args.stream is True
                     if args.verbose:
-                        print(f"query: {data['query']}\nresponse: ", end='')
+                        print(f"[QUERY]{data['query']}\n[RESPONSE]", end='')
                     gen = inference_stream_vllm(llm_engine, template, [kwargs])
                     print_idx = 0
                     for resp_list in gen:

diff --git a/swift/tuners/base.py b/swift/tuners/base.py
@@ -2,6 +2,7 @@
 # Copyright 2023-present the HuggingFace Inc. team.
 import os
 import re
+import shutil
 from copy import copy
 from functools import partial
 from inspect import Parameter, Signature, signature
@@ -882,7 +883,7 @@ def has_custom_content(_json):
                 os.path.join(ckpt_dir, adapter, SAFETENSORS_WEIGHTS_NAME))
             state_dict = SwiftModel.load_state_file(
                 os.path.join(ckpt_dir, adapter))
-            os.makedirs(os.path.join(output_dir, adapter))
+            os.makedirs(os.path.join(output_dir, adapter), exist_ok=True)
             new_state_dict = {}
             for key, value in state_dict.items():
                 if not key.startswith('base_model.model.'):

diff --git a/swift/tuners/lora_layers.py b/swift/tuners/lora_layers.py
@@ -928,5 +928,5 @@ def lora_state_dict(state_dict,
     return {
         k: v
         for k, v in to_return.items()
-        if (('lora_' in k and adapter_name in k) or ('bias' in k))
+        if (('lora_' in k and f'.{adapter_name}.' in k) or ('bias' in k))
     }
diff --git a/swift/utils/utils.py b/swift/utils/utils.py
@@ -148,9 +148,9 @@ def test_time(func: Callable[[], _T],
     return res
 
 
-def read_multi_line() -> str:
+def read_multi_line(addi_prompt: str = '') -> str:
     res = []
-    prompt = '<<<[M] '
+    prompt = f'<<<{addi_prompt} '
     while True:
         text = input(prompt) + '\n'
         prompt = ''

diff --git a/tests/tuners/test_peft.py b/tests/tuners/test_peft.py
@@ -1,10 +1,8 @@
 import copy
-import math
 import os
 import shutil
 import tempfile
 import unittest
-from time import time
 
 import torch
 from modelscope import Preprocessor
@@ -50,6 +48,7 @@ def test_peft_lora_injection(self):
                     torch.isclose(state_dict[key],
                                   state_dict2[key]).flatten().detach().cpu()))
 
+    @unittest.skip
     def test_lora_merge(self):
 
         def reset_lora_parameters(self, adapter_name, init_lora_weights):

diff --git a/tests/tuners/test_swift_base.py b/tests/tuners/test_swift_base.py
@@ -10,6 +10,7 @@
 from modelscope import Model, Preprocessor
 from modelscope.models.nlp.structbert import (SbertConfig,
                                               SbertForSequenceClassification)
+from peft import PeftModel
 from peft.utils import WEIGHTS_NAME
 from torch import nn