In [1]:
# Logging
import logging
import datetime
log_time = datetime.datetime.now()
LOG_FORMAT = "%(asctime)s - %(levelname)s - %(message)s"
DATE_FORMAT = "%m/%d/%Y %H:%M:%S %p"
logging.basicConfig(filename=f"./logger/{log_time}.log", level=logging.INFO, format=LOG_FORMAT,datefmt=DATE_FORMAT)
logging.info("INFO")

In [2]:
# 环境配置 & 挂起llm
import os
from transformers import set_seed
# from tool_registry import dispatch_tool, get_tools

set_seed(42)

os.environ["HTTP_PROXY"]='http://10.10.20.100:1089'
os.environ["HTTPS_PROXY"]='http://10.10.20.100:1089'

# MODEL_PATH = os.environ.get('MODEL_PATH', '/share/lilin/chatglm3-6b')
MODEL_PATH = os.environ.get('MODEL_PATH', '/home/lilin/chatglm3-6b')

PT_PATH = os.environ.get('PT_PATH', None)       # 不使用P-tuning
TOKENIZER_PATH = os.environ.get("TOKENIZER_PATH", '/home/lilin/chatglm3-6b')

# tokenizer = AutoTokenizer.from_pretrained("/share/lilin/chatglm3-6b", trust_remote_code=True)
# model = AutoModel.from_pretrained("/share/lilin/chatglm3-6b", trust_remote_code=True, device='cuda')
# model = model.eval()
# response, history = model.chat(tokenizer, "你好", history=[])
# print(response)
# response, history = model.chat(tokenizer, "晚上睡不着应该怎么办", history=history)
# print(response)

from test_func import HFClient, append_conversation, extract_code, tool_call, postprocess_text
import tool_registry
from conversation import Conversation, Role

client = HFClient(MODEL_PATH, TOKENIZER_PATH, PT_PATH, 7)


[registered tool] {'description': '随机生成一个数x, 使得 `range[0]` <= x < `range[1]`， 随机数生成的种子使用 `seed`',
 'name': 'random_number_generator',
 'params': [{'description': '随机数生成器使用的种子',
             'name': 'seed',
             'required': True,
             'type': 'int'},
            {'description': '生成随机数的范围',
             'name': 'range',
             'required': True,
             'type': 'tuple[int, int]'}]}
[registered tool] {'description': '获取句子 `input_text` 的长度',
 'name': 'get_sentence_length',
 'params': [{'description': '输入的句子',
             'name': 'input_text',
             'required': True,
             'type': 'str'}]}
[registered tool] {'description': '返回指数计算的结果，底数 `base` 的指数 `power` 次方',
 'name': 'exponentiation_calculation',
 'params': [{'description': '底数',
             'name': 'base',
             'required': True,
             'type': 'int'},
            {'description': '指数',
             'name': 'power',
             'required': True,
             'type': 'int'}]}
[registe

Loading checkpoint shards:   0%|          | 0/7 [00:00<?, ?it/s]

In [15]:
# config  &  tool
TOP_P = 0.8
TEMPERATURE = 0.01
MAX_LENGTH = 8192
TRUNCATE_LENGTH = 1024

from importlib import reload

reload(tool_registry)

tools = tool_registry.get_tools()
print("===TOOLS ", tools)

markdown_placeholder = None


[registered tool] {'description': '随机生成一个数x, 使得 `range[0]` <= x < `range[1]`',
 'name': 'random_number_generator',
 'params': [{'description': '生成随机数的范围',
             'name': 'range',
             'required': True,
             'type': 'tuple[int, int]'}]}
[registered tool] {'description': '获取句子 `input_text` 的长度',
 'name': 'get_sentence_length',
 'params': [{'description': '输入的句子',
             'name': 'input_text',
             'required': True,
             'type': 'str'}]}
[registered tool] {'description': '返回指数计算的结果，底数 `base` 的指数 `power` 次方',
 'name': 'exponentiation_calculation',
 'params': [{'description': '底数',
             'name': 'base',
             'required': True,
             'type': 'int'},
            {'description': '指数',
             'name': 'power',
             'required': True,
             'type': 'int'}]}
[registered tool] {'description': '从网络上获得 `keyword` 的习惯内容信息。\n'
                '在你要回答你现有知识无法回答的问题时，你应该使用这个工具（尤其是当你需要获得最新的实时信息，或者你缺少相关信息时，在这种情况下请更倾向于使用这个工具）。',

In [16]:
# Experiment

def conversation_system(input_text, history):
    role = Role.USER
    append_conversation(Conversation(role, input_text), history)
    # input_text = preprocess_text(
    #     None,
    #     tools,
    #     history,
    # )
    
    for _ in range(5):
        output_text = ''
        for response in client.generate_stream(
            system=None,
            tools=tools,
            history=history,
            do_sample=True,
            max_length=MAX_LENGTH,
            temperature=TEMPERATURE,
            top_p=TOP_P,
            stop_sequences=[str(r) for r in (Role.USER, Role.OBSERVATION)],
        ):
            token = response.token
            if response.token.special:
                # print(output_text)
                # logging.info(output_text)
                # logging.info(token)
                print('assistant: ', postprocess_text(output_text))


                match token.text.strip():
                    case '<|user|>':
                        # print('assistant: ', output_text)
                        # time.sleep(1)
                        append_conversation(Conversation(
                            Role.ASSISTANT,
                            postprocess_text(output_text),
                        ), history, markdown_placeholder)
                        return history
                        # break
                    # Initiate tool call
                    case '<|assistant|>':
                        append_conversation(Conversation(
                            Role.ASSISTANT,
                            postprocess_text(output_text),
                        ), history, markdown_placeholder)
                        output_text = ''
                        # message_placeholder = placeholder.chat_message(name="tool", avatar="assistant")
                        # markdown_placeholder = message_placeholder.empty()
                        continue
                    case '<|observation|>':
                        tool, *output_text = output_text.strip().split('\n')
                        output_text = '\n'.join(output_text)
                        
                        append_conversation(Conversation(
                            Role.TOOL,
                            postprocess_text(output_text),
                            tool,
                        ), history, markdown_placeholder)
                        # message_placeholder = placeholder.chat_message(name="observation", avatar="user")
                        # markdown_placeholder = message_placeholder.empty()
                        
                        try:
                            code = extract_code(output_text)
                            logging.info(f"CODE: {code}")
                            
                            args = eval(code, {'tool_call': tool_call}, {})
                        except:
                            logging.warning('Failed to parse tool call')
                            
                        
                        output_text = ''
                        
                        # if manual_mode:
                        #     st.info('Please provide tool call results below:')
                        #     return
                        # else:
                        #     with markdown_placeholder:
                        #         with st.spinner(f'Calling tool {tool}...'):
                        #             observation = dispatch_tool(tool, args)
                        observation = tool_registry.dispatch_tool(tool, args)
                        
                        if len(observation) > TRUNCATE_LENGTH:
                            observation = observation[:TRUNCATE_LENGTH] + ' [TRUNCATED]'
                        append_conversation(Conversation(
                            Role.OBSERVATION, observation
                        ), history, markdown_placeholder)
                        break
                        # message_placeholder = placeholder.chat_message(name="assistant", avatar="assistant")
                        # markdown_placeholder = message_placeholder.empty()
                        # st.session_state.calling_tool = False
                        
                    case _:
                        logging.warning(f'Unexpected special token: {token.text.strip()}')
                        print(f"Unexpected special token: {token.text.strip()}")
                        return history
            output_text += response.token.text
            # markdown_placeholder.markdown(postprocess_text(output_text + '▌'))
        else:
            append_conversation(Conversation(
                Role.ASSISTANT,
                postprocess_text(output_text),
            ), history, markdown_placeholder)
            # user_mark = 1
            print('assistant: ', postprocess_text(output_text))
            break
        
    return history


In [20]:
# Execute
history: list[Conversation] = []

for i in range(10):
    print('user: ', flush=True)
    input_text = input()
    print(input_text)
    if input_text == "END":
        break
    input_text = input_text.strip()
    history = conversation_system(input_text=input_text, history=history)



for h in history:
    logging.info(h)

user: 
请问“门前大桥下游过一群鸭”有几个字，这个字的4次方是多少
assistant:  
get_sentence_length
 ```python
tool_call(input_text='门前大桥下游过一群鸭')
```
=== Tool call:
()
{'input_text': '门前大桥下游过一群鸭'}
assistant:  
根据您的查询，门前大桥下游过一群鸭这句话共有10个字。
user: 
END
<|user|> 请问“门前大桥下游过一群鸭”有几个字，这个字的4次方是多少 None
<|assistant|> 
```python
tool_call(input_text='门前大桥下游过一群鸭')
``` get_sentence_length
<|observation|> 这句话门前大桥下游过一群鸭的长度为10 None
<|assistant|> 
根据您的查询，门前大桥下游过一群鸭这句话共有10个字。 None


: 

In [6]:
from duckduckgo_search import DDGS

with DDGS() as ddgs:
    results = [r for r in ddgs.text("python programming", max_results=5)]
    print(results)

[{'title': 'Welcome to Python.org', 'href': 'https://www.python.org/', 'body': 'Python is a versatile and powerful language that lets you work quickly and integrate systems more effectively. Learn how to get started, download the latest version, access documentation, find jobs, and discover success stories and events related to Python.'}, {'title': 'Python (programming language) - Wikipedia', 'href': 'https://en.wikipedia.org/wiki/Python_(programming_language)', 'body': 'Python is a high-level, general-purpose programming language.Its design philosophy emphasizes code readability with the use of significant indentation.. Python is dynamically typed and garbage-collected.It supports multiple programming paradigms, including structured (particularly procedural), object-oriented and functional programming.It is often described as a "batteries included" language ...'}, {'title': 'Python For Beginners | Python.org', 'href': 'https://www.python.org/about/gettingstarted/', 'body': 'Learn the 