Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

加入LLM模型如通义千问Qwen进行文字对话,增强数字人的交互性 #16

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 36 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,39 @@ python app.py

用浏览器打开http://serverip/echo.html,在文本框输入任意文字,提交。数字人播报该段文字

### 使用LLM模型进行数字人对话

目前借鉴数字人对话系统[LinlyTalker](https://github.com/Kedreamix/Linly-Talker)的方式,LLM模型支持Linly-AI,Qwen和GeminiPro

建议使用来自阿里云的通义千问Qwen,查看 [https://github.com/QwenLM/Qwen](https://github.com/QwenLM/Qwen)

下载 Qwen 模型: [https://huggingface.co/Qwen/Qwen-1_8B-Chat](https://huggingface.co/Qwen/Qwen-1_8B-Chat)

可以使用`git`下载

```bash
git lfs install
git clone https://huggingface.co/Qwen/Qwen-1_8B-Chat
```

或者使用`huggingface`的下载工具`huggingface-cli`

```bash
pip install -U huggingface_hub

# 设置镜像加速
# Linux
export HF_ENDPOINT="https://hf-mirror.com"
# windows powershell
$env:HF_ENDPOINT="https://hf-mirror.com"

huggingface-cli download --resume-download Qwen/Qwen-1_8B-Chat --local-dir Qwen/Qwen-1_8B-Chat
```

除此之外,还可以考虑使用魔搭社区链接进行下载 [https://modelscope.cn/models/qwen/Qwen-1_8B-Chat/files(https://modelscope.cn/models/qwen/Qwen-1_8B-Chat/files)

###

### docker运行
先运行srs和nginx
启动数字人
Expand All @@ -81,9 +114,9 @@ docker run --gpus all -it --network=host --rm registry.cn-hangzhou.aliyuncs.com
```

## TODO
- 添加chatgpt实现数字人对话
- 声音克隆
- 数字人静音时用一段视频代替
- [x] 添加chatgpt实现数字人对话
- [ ] 声音克隆
- [ ] 数字人静音时用一段视频代替

如果本项目对你有帮助,帮忙点个star。也欢迎感兴趣的朋友一起来完善该项目。
Email: lipku@foxmail.com
23 changes: 16 additions & 7 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,18 @@
sockets = Sockets(app)
global nerfreal

from src.LLM import *
# llm = Gemini(model_path='gemini-pro', api_key=None, proxy_url=None)
# llm = Linly(mode = 'offline', model_path="Linly-AI/Chinese-LLaMA-2-7B-hf")
# llm = Linly(mode = 'api', model_path="Linly-AI/Chinese-LLaMA-2-7B-hf")
llm = Qwen(mode = 'offline', model_path="Qwen/Qwen-1_8B-Chat")


def llm_response(question, history = None):
return llm.generate(question)

async def main(voicename: str, text: str, render):
# print("text:", text, "voicename:", voicename)
communicate = edge_tts.Communicate(text, voicename)

#with open(OUTPUT_FILE, "wb") as file:
Expand All @@ -42,12 +52,12 @@ async def main(voicename: str, text: str, render):

def txt_to_audio(text_):
audio_list = []
#audio_path = 'data/audio/aud_0.wav'
# audio_path = 'data/audio/aud_0.wav'
voicename = "zh-CN-YunxiaNeural"
text = text_
t = time.time()
asyncio.get_event_loop().run_until_complete(main(voicename,text,nerfreal))
print('-------tts time: ',time.time()-t)
print(f'-------tts time:{time.time()-t:.4f}s')

@sockets.route('/humanecho')
def echo_socket(ws):
Expand All @@ -65,8 +75,9 @@ def echo_socket(ws):

if len(message)==0:
return '输入信息为空'
else:
txt_to_audio(message)
else:
answer = llm_response(message)
txt_to_audio(answer)

def render():
nerfreal.render()
Expand Down Expand Up @@ -225,6 +236,4 @@ def render():
#############################################################################
print('start websocket server')
server = pywsgi.WSGIServer(('0.0.0.0', 8000), app, handler_class=WebSocketHandler)
server.serve_forever()


server.serve_forever()
19 changes: 19 additions & 0 deletions src/Gemini.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import os
import google.generativeai as genai


def configure_api(api_key, proxy_url=None):
os.environ['https_proxy'] = proxy_url if proxy_url else None
os.environ['http_proxy'] = proxy_url if proxy_url else None
genai.configure(api_key=api_key)


class Gemini:
def __init__(self, model_path='gemini-pro', api_key=None, proxy_url=None):
configure_api(api_key, proxy_url)
self.model = genai.GenerativeModel(model_path)

def generate(self, question):
response = self.model.generate_content(question)
return response

24 changes: 24 additions & 0 deletions src/LLM.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from src.Linly import Linly
from src.Qwen import Qwen
from src.Gemini import Gemini

def test_Linly(question = "如何应对压力?", mode='offline', model_path="Linly-AI/Chinese-LLaMA-2-7B-hf"):
llm = Linly(mode, model_path)
answer = llm.generate(question)
print(answer)

def test_Qwen(question = "如何应对压力?", mode='offline', model_path="Qwen/Qwen-1_8B-Chat"):
llm = Qwen(mode, model_path)
answer = llm.generate(question)
print(answer)

def test_Gemini(question = "如何应对压力?", model_path='gemini-pro', api_key=None, proxy_url=None):
llm = Gemini(model_path, api_key, proxy_url)
answer = llm.generate(question)
print(answer)


if __name__ == '__main__':
test_Linly()
# test_Qwen()
# test_Gemini()
73 changes: 73 additions & 0 deletions src/Linly.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import os
import torch
import requests
import json
from transformers import AutoModelForCausalLM, AutoTokenizer
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'

class Linly:
def __init__(self, mode='api', model_path="Linly-AI/Chinese-LLaMA-2-7B-hf") -> None:
# mode = api need
# self.url = f"http://ip:{api_port}" # local server: http://ip:port
self.url = f"http://172.31.58.8:7871" # local server: http://ip:port
self.headers = {
"Content-Type": "application/json"
}
self.data = {
"question": "北京有什么好玩的地方?"
}
self.prompt = '''请用少于25个字回答以下问题'''
self.mode = mode
if mode != 'api':
self.model, self.tokenizer = self.init_model(model_path)

def init_model(self, path = "Linly-AI/Chinese-LLaMA-2-7B-hf"):
model = AutoModelForCausalLM.from_pretrained(path, device_map="cuda:0",
torch_dtype=torch.bfloat16, trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained(path, use_fast=False, trust_remote_code=True)
return model, tokenizer

def generate(self, question):
if self.mode != 'api':
self.data["question"] = f"{self.prompt} ### Instruction:{question} ### Response:"
inputs = self.tokenizer(self.data["question"], return_tensors="pt").to("cuda:0")
try:
generate_ids = self.model.generate(inputs.input_ids, max_new_tokens=2048, do_sample=True, top_k=20, top_p=0.84,
temperature=1, repetition_penalty=1.15, eos_token_id=2, bos_token_id=1,
pad_token_id=0)
response = self.tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
print('log:', response)
response = response.split("### Response:")[-1]
return response
except:
return "对不起,你的请求出错了,请再次尝试。\nSorry, your request has encountered an error. Please try again.\n"
else:
return self.predict(question)

def predict(self, question):
# FastAPI
self.data["question"] = f"{self.prompt} ### Instruction:{question} ### Response:"
headers = {'Content-Type': 'application/json'}
data = {"prompt": question}
response = requests.post(url=self.url, headers=headers, data=json.dumps(data))
return response.json()['response']

# response = requests.post(self.url, headers=self.headers, json=self.data)
# self.json = response.json()
# answer, tag = self.json
# if tag == 'success':
# return answer[0]
# else:
# return "对不起,你的请求出错了,请再次尝试。\nSorry, your request has encountered an error. Please try again.\n"

def test():
#llm = Linly(mode='api')
#answer = llm.predict("如何应对压力?")
#print(answer)

llm = Linly(mode='api',model_path='Linly-AI/Chinese-LLaMA-2-7B-hf')
answer = llm.generate("如何应对压力?")
print(answer)

if __name__ == '__main__':
test()
51 changes: 51 additions & 0 deletions src/Qwen.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import os
import torch
import requests
from transformers import AutoModelForCausalLM, AutoTokenizer
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'

class Qwen:
def __init__(self, mode='api', model_path="Qwen/Qwen-1_8B-Chat") -> None:
'''暂时不写api版本,与Linly-api相类似,感兴趣可以实现一下'''
self.url = "http://ip:port" # local server: http://ip:port
self.headers = {
"Content-Type": "application/json"
}
self.data = {
"question": "北京有什么好玩的地方?"
}
self.prompt = '''请用少于25个字回答以下问题'''
self.mode = mode

self.model, self.tokenizer = self.init_model(model_path)

def init_model(self, path = "Qwen/Qwen-1_8B-Chat"):
model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-1_8B-Chat",
device_map="auto",
trust_remote_code=True).eval()
tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True)

return model, tokenizer

def generate(self, question):
if self.mode != 'api':
self.data["question"] = f"{self.prompt} ### Instruction:{question} ### Response:"
try:
response, history = self.model.chat(self.tokenizer, self.data["question"], history=None)
print(history)
return response
except:
return "对不起,你的请求出错了,请再次尝试。\nSorry, your request has encountered an error. Please try again.\n"
else:
return self.predict(question)
def predict(self, question):
'''暂时不写api版本,与Linly-api相类似,感兴趣可以实现一下'''
pass

def test():
llm = Qwen(mode='offline',model_path="Qwen/Qwen-1_8B-Chat")
answer = llm.generate("如何应对压力?")
print(answer)

if __name__ == '__main__':
test()