In [3]:
import json
import os

import dotenv

from langchain_community.cache import SQLiteCache
from langchain_core.globals import set_llm_cache
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI

from tqdm import tqdm

dotenv.load_dotenv()

True

In [44]:
with open('res/Final_TestSet/Final_TestSet.json', 'r', encoding='utf-8') as f:
    dataset_init=json.load(f)
with open('res/Final_Example.json', 'r', encoding='utf-8') as f:
    preliminary_example=json.load(f)

for i in range(0, len(dataset_init)):
    # 检查数据集文件是否一致
    assert dataset_init[i]["ID"] == preliminary_example[i]["ID"] 
    assert dataset_init[i]["question"] == preliminary_example[i]["question"]
    
print("样本数量：",len(dataset_init))
print("问题类型：",",".join(set([item["problem_type"] for item in dataset_init])))
# 结算每种问题类型的比率
problem_type_counts = {problem_type: 0 for problem_type in set([item["problem_type"] for item in dataset_init])}
for i in range(0, len(dataset_init)):
    problem_type_counts[dataset_init[i]["problem_type"]] += 1
# 显示比率, 排序，每行
for problem_type, count in sorted(problem_type_counts.items(), key=lambda x: x[1], reverse=True):
    print(f"{problem_type}: {count / len(dataset_init):.2%}")

FROM=0
TO=FROM+512
dataset=dataset_init[FROM:TO]

样本数量： 512
问题类型： multi(draw, True/False),multi(calculations, True/False),True/False,calculations,multi(True/False, draw),multi(True/False, calculations),multi(calculations, draw),draw
calculations: 75.20%
multi(True/False, calculations): 8.40%
True/False: 7.62%
draw: 6.25%
multi(True/False, draw): 0.98%
multi(calculations, draw): 0.98%
multi(calculations, True/False): 0.39%
multi(draw, True/False): 0.20%


In [65]:
gpt4o=ChatOpenAI(
    api_key=os.getenv("WLAI_API_KEY"),
    base_url=os.getenv("WLAI_BASE_URL"),
    model="gpt-4o",
)

gpt4o.invoke("hello")
set_llm_cache(SQLiteCache(database_path=".langchain.db")) # 

## 翻译

In [66]:
from tool.model import translate_prompt

# 翻译所有问题，已经缓存，所以全量翻译
translation_runnable= translate_prompt | gpt4o | StrOutputParser()
translation_list = translation_runnable.batch([{"text":item["question"]} for item in dataset], config={"max_concurrency":1}, return_exceptions=True)
for i in range(0,len(dataset)):
    dataset[i]["translation"]=translation_list[i]

## 运行
### 预处理
1. 构建prompt
2. 修改题目中文件名位置

In [67]:
from gpt4o import *
for i in range(0, len(dataset)):
    content = d_template[dataset[i]["problem_type"]].format(dataset[i]["question"])
    filenames = extract_filenames(content)
    for filename in filenames:
        content = content.replace(filename, add_path(filename, data_path / 'Final_TestSet/data'))
    dataset[i]["content"]=content

### 运行agent

In [None]:
from autogen import Cache

def run(item: dict):
    content = item["content"]

    # Use DiskCache as cache
    with Cache.disk(cache_path_root="./autogen_cache",cache_seed=1) as cache:
        chat_result = code_executor_agent.initiate_chat(
            code_writer_agent,
            message=content,
            summary_method='reflection_with_llm',
            summary_args=dict(summary_prompt='only return the code output'),
            cache=cache,
            # silent=True,
        )
    # code = extract_python_code(chat_result.chat_history[-3]['content'])[-1]
    code=""
    for i in range(len(chat_result.chat_history)-1, 0, -1):
        l=extract_python_code(chat_result.chat_history[i]['content'])
        if len(l)>0:
            code=l[-1]
            break
    
    answer = chat_result.summary
    if isinstance(answer, dict):
        answer = answer['content']
    item["code"]=code
    item["answer"]=answer
    item['chat_history']=chat_result.chat_history
    return item

for item in tqdm(dataset[:100]):
    run(item)


## 存储

In [9]:
with open('res/SMP_240905_check_1.json', 'w', encoding='utf-8') as f:
    s = json.dumps(dataset, indent=4, ensure_ascii=False)
    f.write(s)

----

In [None]:
raise Exception("stop")

In [4]:
with open('res/SMP_240905_check_1.json', 'r', encoding='utf-8') as f:
    tmp_dataset=json.load(f)

In [None]:
tmp_id=50
i=tmp_id-1
print(tmp_dataset[i]["ID"], tmp_dataset[i]["problem_type"],"\n---\n", tmp_dataset[i]["translation"],"\n---\n", tmp_dataset[i]['answer'],"\n---\n",tmp_dataset[i]["code"],"\n---\n",tmp_dataset[i]["question"])

In [146]:
 # filename: coffee_roasting_workflow.py
import networkx as nx

# Step 1: Create the graph
G = nx.DiGraph()
nodes = [(1, {'group': 0}), (2, {'group': 1}), (3, {'group': 1}), (4, {'group': 1})]
edges = [(1, 2), (2, 3)]
G.add_nodes_from(nodes)
G.add_edges_from(edges)

# Step 2: Define sets S and T
S = {1, 2}
T = {3, 4}

# Step 3: Calculate the mixing expansion
def mixing_expansion(G, S, T):
    # Count the number of edges from S to T
    edges_from_S_to_T = sum(1 for u, v in G.edges() if u in S and v in T)
    # Calculate the size of the sets
    size_S = len(S)
    size_T = len(T)
    # Mixing expansion formula
    if size_S == 0 or size_T == 0:
        return 0
    return edges_from_S_to_T / (size_S * size_T)

mixing_expansion_value = nx.mixing_expansion(G, S, T)
print(f"{mixing_expansion_value:.2f}") 

0.25


In [115]:
import networkx as nx
from networkx.algorithms.community import greedy_modularity_communities
from community import community_louvain  # Louvain is close to Leiden, used as a proxy
from sklearn.metrics import f1_score
import leidenalg as la
import igraph as ig

# Step 1: 加载足球图形数据
G = nx.read_gml('res/Final_TestSet/data/football.gml')

# 获取 networkx 图中的节点名称
nx_nodes = list(G.nodes())

# Step 2: 使用 greedy_modularity_communities 进行社区检测
greedy_communities = list(greedy_modularity_communities(G))

# 将每个节点的社区分配记录下来
greedy_labels = {node: i for i, community in enumerate(greedy_communities) for node in community}

# Step 3: 使用 Louvain 作为 Leiden 的参考
partition = community_louvain.best_partition(G)
louvain_labels = [partition[node] for node in G.nodes()]

# Step 4: 转换成 igraph 图，并使用 Leiden 算法
# 使用元组列表构建 igraph 图，同时保留原始节点标签
ig_graph = ig.Graph.TupleList([(nx_nodes.index(u), nx_nodes.index(v)) for u, v in G.edges()], directed=False)

# 使用 Leiden 算法进行社区检测
leiden_community = la.find_partition(ig_graph, la.ModularityVertexPartition)

# 创建一个字典来存储 igraph 索引和其相应的 community id
leiden_labels = {nx_nodes[node]: i for i, community in enumerate(leiden_community) for node in community}

# Step 5: 比较 Greedy 和 Leiden 输出，计算 F1 得分
greedy_labels_list = [greedy_labels[node] for node in nx_nodes]
leiden_labels_list = [leiden_labels[node] for node in nx_nodes]

# 计算 F1 得分
f1 = f1_score(greedy_labels_list, leiden_labels_list, average='macro')

# 打印结果
print("Average F1 score between Greedy Modularity and Leiden method: ", f1)


Average F1 score between Greedy Modularity and Leiden method:  0.12302414889436646


In [123]:
import networkx as nx
import igraph as ig
import leidenalg as la
from sklearn.metrics import f1_score
from networkx.algorithms.community import greedy_modularity_communities
from community import community_louvain
from igraph import Graph

# Step 1: 加载足球图形数据
G = nx.read_gml('res/Final_TestSet/data/football.gml')

# 获取 networkx 图中的节点名称
nx_nodes = list(G.nodes())

# Step 2: 使用 greedy_modularity_communities 进行社区检测
greedy_communities = list(greedy_modularity_communities(G))
greedy_labels = {node: i for i, community in enumerate(greedy_communities) for node in community}

# Step 3: 使用 Louvain 作为 Leiden 的参考
partition = community_louvain.best_partition(G)
louvain_labels = [partition[node] for node in G.nodes()]

# Step 4: 转换成 igraph 图，并使用 Leiden 算法
ig_graph = ig.Graph.TupleList([(nx_nodes.index(u), nx_nodes.index(v)) for u, v in G.edges()], directed=False)

# 使用 Leiden 算法进行社区检测，使用 modularity 作为目标函数
leiden_community = Graph.community_leiden(ig_graph)

# 创建一个字典来存储 igraph 索引和其相应的 community id
leiden_labels = {nx_nodes[node]: i for i, community in enumerate(leiden_community) for node in community}

# Step 5: 比较 Greedy 和 Leiden 输出，计算 F1 得分
greedy_labels_list = [greedy_labels[node] for node in nx_nodes]
leiden_labels_list = [leiden_labels[node] for node in nx_nodes]

# 计算 F1 得分
f1 = f1_score(greedy_labels_list, leiden_labels_list, average='macro')

# 打印结果
print("Average F1 score between Greedy Modularity and Leiden method: ", f1)


Average F1 score between Greedy Modularity and Leiden method:  0.0007905138339920949


In [None]:
for i in range(50):
    print(tmp_dataset[i]['problem_type'])

In [None]:
# 绘制graph
import matplotlib.pyplot as plt
plt.figure(figsize=(20, 20))
nx.draw(G, with_labels=True, node_size=1000, node_color='skyblue', font_size=36)
plt.show()