In [12]:
import json
import os

import dotenv

from langchain_community.cache import SQLiteCache
from langchain_core.globals import set_llm_cache
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI

from tqdm import tqdm

dotenv.load_dotenv()

True

In [13]:
with open('data/Final_TestSet/Final_TestSet.json', 'r', encoding='utf-8') as f:
    dataset_init=json.load(f)
with open('data/Final_Example.json', 'r', encoding='utf-8') as f:
    preliminary_example=json.load(f)

for i in range(0, len(dataset_init)):
    # 检查数据集文件是否一致
    assert dataset_init[i]["ID"] == preliminary_example[i]["ID"] 
    assert dataset_init[i]["question"] == preliminary_example[i]["question"]
    
print("样本数量：",len(dataset_init))
print("问题类型：",",".join(set([item["problem_type"] for item in dataset_init])))
# 结算每种问题类型的比率
problem_type_counts = {problem_type: 0 for problem_type in set([item["problem_type"] for item in dataset_init])}
for i in range(0, len(dataset_init)):
    problem_type_counts[dataset_init[i]["problem_type"]] += 1
# 显示比率, 排序，每行
for problem_type, count in sorted(problem_type_counts.items(), key=lambda x: x[1], reverse=True):
    print(f"{problem_type}: {count / len(dataset_init):.2%}")

FROM=0
TO=FROM+50
dataset=dataset_init[FROM:TO]

样本数量： 512
问题类型： multi(True/False, calculations),multi(calculations, True/False),multi(True/False, draw),multi(draw, True/False),multi(calculations, draw),True/False,draw,calculations
calculations: 75.20%
multi(True/False, calculations): 8.40%
True/False: 7.62%
draw: 6.25%
multi(True/False, draw): 0.98%
multi(calculations, draw): 0.98%
multi(calculations, True/False): 0.39%
multi(draw, True/False): 0.20%


In [14]:
gpt4o=ChatOpenAI(
    api_key=os.getenv("WLAI_API_KEY"),
    base_url=os.getenv("WLAI_BASE_URL"),
    model="gpt-4o",
)

gpt4o.invoke("hello")
set_llm_cache(SQLiteCache(database_path=".langchain.db")) # 

## 翻译

In [15]:
from tool.model import translate_prompt

# 翻译所有问题，已经缓存，所以全量翻译
translation_runnable= translate_prompt | gpt4o | StrOutputParser()
translation_list = translation_runnable.batch([{"text":item["question"]} for item in dataset], config={"max_concurrency":1}, return_exceptions=True)
for i in range(0,len(dataset)):
    dataset[i]["translation"]=translation_list[i]

## 运行
### 预处理
1. 构建prompt
2. 修改题目中文件名位置

In [16]:
from gpt4o import *
for i in range(0, len(dataset)):
    content = d_template[dataset[i]["problem_type"]].format(dataset[i]["question"])
    filenames = extract_filenames(content)
    for filename in filenames:
        content = content.replace(filename, add_path(filename, data_path / 'Final_TestSet/data'))
    dataset[i]["content"]=content

### 问题改写
1. 获取问题类型，构建list
2. 根据每种类型，输出问题目标（问n次，再投票）
3. 返回问题的list

In [17]:
from tqdm import tqdm
from tool.model import gpt4o


def ask_llm_mutil_time(text:str, n=5,k=1):
    texts=[str(i)+"\n"+text for i in range(n)]
    llm_outputs=(gpt4o|StrOutputParser()).batch(texts,config={"max_concurrency":k})
    # print("llm_outputs",llm_outputs)
    prompt=f"""从下面的不同人表达中，直接返回大部分人想表达的内容，不附带其他信息：\n"""+"\n".join(llm_outputs)
    return (gpt4o|StrOutputParser()).invoke(prompt)

def get_goals(text:str, problem_type:str):
    types=[]
    goals=[]
    if problem_type.startswith("multi"):
        types.extend(problem_type[6:-1].split(", "))
    else:
        types.append(problem_type)

    for t in types:
        if t=="calculations":
            key_word="这通常是一个或多个图算法相关的指标"
        elif t=="True/False":
            key_word="这通常是一个判断，返回True或False"
        elif t=="draw":
            key_word="这通常是需要绘制一张图算法相关的图片"
        else:
            raise Exception("unknown problem type")
    
        prompt=f"""你是一个图算法专家，你需要从下面的任务描述中，提取出任务的最终目标，{key_word}。\n<任务>\n{text}\n</任务>\n。用第一人称，一句话转述这个任务的最终目标是什么。不要对最终目标进行任何计算，不需要考虑如何实现目标，以及条件"""
        goal=ask_llm_mutil_time(prompt, n=15,k=5)
        goals.append(goal)
    
    return goals
        
        
        
for i in tqdm(range(len(dataset))):
    goals=get_goals(dataset[i]["question"], dataset[i]["problem_type"])
    # print(i+1,goals)
    dataset[i]["goals"]=goals
    dataset[i]["content"]=dataset[i]["content"]+"\n\n"+"\n牢记任务目标：\n"+"\n".join(goals)
    

100%|██████████| 50/50 [00:03<00:00, 12.70it/s]


### 运行agent

In [None]:
from autogen import Cache

def run(item: dict):
    content = item["content"]

    # Use DiskCache as cache
    with Cache.disk(cache_path_root="./autogen_cache",cache_seed=1) as cache:
        chat_result = code_executor_agent.initiate_chat(
            code_writer_agent,
            message=content,
            summary_method='reflection_with_llm',
            summary_args=dict(summary_prompt='only return the code output'),
            cache=cache,
            # silent=True,
        )
    # code = extract_python_code(chat_result.chat_history[-3]['content'])[-1]
    code=""
    for i in range(len(chat_result.chat_history)-1, 0, -1):
        l=extract_python_code(chat_result.chat_history[i]['content'])
        if len(l)>0:
            code=l[-1]
            break
    
    answer = chat_result.summary
    if isinstance(answer, dict):
        answer = answer['content']
    item["code"]=code
    item["answer"]=answer
    item['chat_history']=chat_result.chat_history
    return item

for item in tqdm(dataset[:]):
    run(item)


## 存储

In [19]:
with open('data/SMP_240907_check_1.json', 'w', encoding='utf-8') as f:
    s = json.dumps(dataset, indent=4, ensure_ascii=False)
    f.write(s)

----

In [None]:
raise Exception("stop")

In [4]:
with open('data/SMP_240905_check_1.json', 'r', encoding='utf-8') as f:
    tmp_dataset=json.load(f)

In [None]:
tmp_id=50
i=tmp_id-1
print(tmp_dataset[i]["ID"], tmp_dataset[i]["problem_type"],"\n---\n", tmp_dataset[i]["translation"],"\n---\n", tmp_dataset[i]['answer'],"\n---\n",tmp_dataset[i]["code"],"\n---\n",tmp_dataset[i]["question"])