lipku · Kedreamix · Jan 23, 2024
diff --git a/README.md b/README.md
@@ -59,6 +59,39 @@ python app.py
 
 用浏览器打开http://serverip/echo.html，在文本框输入任意文字，提交。数字人播报该段文字
 
+### 使用LLM模型进行数字人对话
+
+目前借鉴数字人对话系统[LinlyTalker](https://github.com/Kedreamix/Linly-Talker)的方式，LLM模型支持Linly-AI,Qwen和GeminiPro
+
+建议使用来自阿里云的通义千问Qwen，查看 [https://github.com/QwenLM/Qwen](https://github.com/QwenLM/Qwen)
+
+下载 Qwen 模型: [https://huggingface.co/Qwen/Qwen-1_8B-Chat](https://huggingface.co/Qwen/Qwen-1_8B-Chat)
+
+可以使用`git`下载
+
+```bash
+git lfs install
+git clone https://huggingface.co/Qwen/Qwen-1_8B-Chat
+```
+
+或者使用`huggingface`的下载工具`huggingface-cli`
+
+```bash
+pip install -U huggingface_hub
+
+# 设置镜像加速
+# Linux
+export HF_ENDPOINT="https://hf-mirror.com"
+# windows powershell
+$env:HF_ENDPOINT="https://hf-mirror.com"
+
+huggingface-cli download --resume-download Qwen/Qwen-1_8B-Chat --local-dir Qwen/Qwen-1_8B-Chat
+```
+
+除此之外，还可以考虑使用魔搭社区链接进行下载 [https://modelscope.cn/models/qwen/Qwen-1_8B-Chat/files(https://modelscope.cn/models/qwen/Qwen-1_8B-Chat/files)
+
+### 
+
 ### docker运行
 先运行srs和nginx  
 启动数字人
@@ -81,9 +114,9 @@ docker run --gpus all -it --network=host --rm  registry.cn-hangzhou.aliyuncs.com
 ```
 
 ## TODO
-- 添加chatgpt实现数字人对话
-- 声音克隆
-- 数字人静音时用一段视频代替
+- [x] 添加chatgpt实现数字人对话
+- [ ] 声音克隆
+- [ ] 数字人静音时用一段视频代替
 
 如果本项目对你有帮助，帮忙点个star。也欢迎感兴趣的朋友一起来完善该项目。  
 Email: lipku@foxmail.com
diff --git a/app.py b/app.py
@@ -27,8 +27,18 @@
 sockets = Sockets(app)
 global nerfreal
 
+from src.LLM import *
+# llm = Gemini(model_path='gemini-pro', api_key=None, proxy_url=None)
+# llm = Linly(mode = 'offline', model_path="Linly-AI/Chinese-LLaMA-2-7B-hf")
+# llm = Linly(mode = 'api', model_path="Linly-AI/Chinese-LLaMA-2-7B-hf")
+llm = Qwen(mode = 'offline', model_path="Qwen/Qwen-1_8B-Chat")
+
+
+def llm_response(question, history = None):
+    return llm.generate(question)
 
 async def main(voicename: str, text: str, render):
+    # print("text:", text, "voicename:", voicename)
     communicate = edge_tts.Communicate(text, voicename)
 
     #with open(OUTPUT_FILE, "wb") as file:
@@ -42,12 +52,12 @@ async def main(voicename: str, text: str, render):
 
 def txt_to_audio(text_):
     audio_list = []
-    #audio_path = 'data/audio/aud_0.wav'
+    # audio_path = 'data/audio/aud_0.wav'
     voicename = "zh-CN-YunxiaNeural"
     text = text_
     t = time.time()
     asyncio.get_event_loop().run_until_complete(main(voicename,text,nerfreal))
-    print('-------tts time: ',time.time()-t)
+    print(f'-------tts time:{time.time()-t:.4f}s')
 
 @sockets.route('/humanecho')
 def echo_socket(ws):
@@ -65,8 +75,9 @@ def echo_socket(ws):
 
             if len(message)==0:
                 return '输入信息为空'
-            else:                                
-                txt_to_audio(message)                       
+            else:                    
+                answer = llm_response(message)            
+                txt_to_audio(answer)                       
 
 def render():
     nerfreal.render()                  
@@ -225,6 +236,4 @@ def render():
     #############################################################################
     print('start websocket server')
     server = pywsgi.WSGIServer(('0.0.0.0', 8000), app, handler_class=WebSocketHandler)
-    server.serve_forever()
-
-
+    server.serve_forever()
diff --git a/src/Gemini.py b/src/Gemini.py
@@ -0,0 +1,19 @@
+import os
+import google.generativeai as genai
+
+
+def configure_api(api_key, proxy_url=None):
+    os.environ['https_proxy'] = proxy_url if proxy_url else None
+    os.environ['http_proxy'] = proxy_url if proxy_url else None
+    genai.configure(api_key=api_key)
+
+
+class Gemini:
+    def __init__(self, model_path='gemini-pro', api_key=None, proxy_url=None):
+        configure_api(api_key, proxy_url)
+        self.model = genai.GenerativeModel(model_path)
+
+    def generate(self, question):
+        response = self.model.generate_content(question)
+        return response
+
diff --git a/src/LLM.py b/src/LLM.py
@@ -0,0 +1,24 @@
+from src.Linly import Linly
+from src.Qwen import Qwen
+from src.Gemini import Gemini
+
+def test_Linly(question = "如何应对压力？", mode='offline', model_path="Linly-AI/Chinese-LLaMA-2-7B-hf"):
+    llm = Linly(mode, model_path)
+    answer = llm.generate(question)
+    print(answer)
+
+def test_Qwen(question = "如何应对压力？", mode='offline', model_path="Qwen/Qwen-1_8B-Chat"):
+    llm = Qwen(mode, model_path)
+    answer = llm.generate(question)
+    print(answer)
+
+def test_Gemini(question = "如何应对压力？", model_path='gemini-pro', api_key=None, proxy_url=None):
+    llm = Gemini(model_path, api_key, proxy_url)
+    answer = llm.generate(question)
+    print(answer)
+
+
+if __name__ == '__main__':
+    test_Linly()
+    # test_Qwen()
+    # test_Gemini()
diff --git a/src/Linly.py b/src/Linly.py
@@ -0,0 +1,73 @@
+import os
+import torch
+import requests
+import json
+from transformers import AutoModelForCausalLM, AutoTokenizer
+os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
+
+class Linly:
+    def __init__(self, mode='api', model_path="Linly-AI/Chinese-LLaMA-2-7B-hf") -> None:
+        # mode = api need
+        # self.url = f"http://ip:{api_port}" # local server: http://ip:port
+        self.url = f"http://172.31.58.8:7871" # local server: http://ip:port
+        self.headers = {
+            "Content-Type": "application/json"
+        }
+        self.data = {
+            "question": "北京有什么好玩的地方？"
+        }
+        self.prompt = '''请用少于25个字回答以下问题'''
+        self.mode = mode
+        if mode != 'api':
+            self.model, self.tokenizer = self.init_model(model_path)
+
+    def init_model(self, path = "Linly-AI/Chinese-LLaMA-2-7B-hf"):
+        model = AutoModelForCausalLM.from_pretrained(path, device_map="cuda:0",
+                                                    torch_dtype=torch.bfloat16, trust_remote_code=True)
+        tokenizer = AutoTokenizer.from_pretrained(path, use_fast=False, trust_remote_code=True)
+        return model, tokenizer   
+
+    def generate(self, question):
+        if self.mode != 'api':
+            self.data["question"] = f"{self.prompt} ### Instruction:{question}  ### Response:"
+            inputs = self.tokenizer(self.data["question"], return_tensors="pt").to("cuda:0")
+            try:
+                generate_ids = self.model.generate(inputs.input_ids, max_new_tokens=2048, do_sample=True, top_k=20, top_p=0.84,
+                                            temperature=1, repetition_penalty=1.15, eos_token_id=2, bos_token_id=1,
+                                            pad_token_id=0)
+                response = self.tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
+                print('log:', response)
+                response = response.split("### Response:")[-1]
+                return response
+            except:
+                return "对不起，你的请求出错了，请再次尝试。\nSorry, your request has encountered an error. Please try again.\n"
+        else:
+            return self.predict(question)
+
+    def predict(self, question):
+        # FastAPI
+        self.data["question"] = f"{self.prompt} ### Instruction:{question}  ### Response:"
+        headers = {'Content-Type': 'application/json'}
+        data = {"prompt": question}
+        response = requests.post(url=self.url, headers=headers, data=json.dumps(data))
+        return response.json()['response']
+
+        # response = requests.post(self.url, headers=self.headers, json=self.data)
+        # self.json = response.json()
+        # answer, tag = self.json
+        # if tag == 'success':
+        #     return answer[0]
+        # else:
+        #     return "对不起，你的请求出错了，请再次尝试。\nSorry, your request has encountered an error. Please try again.\n"
+
+def test():
+    #llm = Linly(mode='api')
+    #answer = llm.predict("如何应对压力？")
+    #print(answer)
+
+    llm = Linly(mode='api',model_path='Linly-AI/Chinese-LLaMA-2-7B-hf')
+    answer = llm.generate("如何应对压力？")
+    print(answer)
+
+if __name__ == '__main__':
+    test()
diff --git a/src/Qwen.py b/src/Qwen.py
@@ -0,0 +1,51 @@
+import os
+import torch
+import requests
+from transformers import AutoModelForCausalLM, AutoTokenizer
+os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
+
+class Qwen:
+    def __init__(self, mode='api', model_path="Qwen/Qwen-1_8B-Chat") -> None:
+        '''暂时不写api版本,与Linly-api相类似,感兴趣可以实现一下'''
+        self.url = "http://ip:port" # local server: http://ip:port
+        self.headers = {
+            "Content-Type": "application/json"
+        }
+        self.data = {
+            "question": "北京有什么好玩的地方？"
+        }
+        self.prompt = '''请用少于25个字回答以下问题'''
+        self.mode = mode
+
+        self.model, self.tokenizer = self.init_model(model_path)
+
+    def init_model(self, path = "Qwen/Qwen-1_8B-Chat"):
+        model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-1_8B-Chat", 
+                                                     device_map="auto", 
+                                                     trust_remote_code=True).eval()
+        tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True)
+
+        return model, tokenizer   
+
+    def generate(self, question):
+        if self.mode != 'api':
+            self.data["question"] = f"{self.prompt} ### Instruction:{question}  ### Response:"
+            try:
+                response, history = self.model.chat(self.tokenizer, self.data["question"], history=None)
+                print(history)
+                return response
+            except:
+                return "对不起，你的请求出错了，请再次尝试。\nSorry, your request has encountered an error. Please try again.\n"
+        else:
+            return self.predict(question)
+    def predict(self, question):
+        '''暂时不写api版本,与Linly-api相类似,感兴趣可以实现一下'''
+        pass 
+
+def test():
+    llm = Qwen(mode='offline',model_path="Qwen/Qwen-1_8B-Chat")
+    answer = llm.generate("如何应对压力？")
+    print(answer)
+
+if __name__ == '__main__':
+    test()