In [1]:
import json
from okagents.llms.deepseek import DeepSeekClient
from okagents.agents import NotesAgent, MilvusAgent
from okagents.agents.notes_agent import BaseNotesResponse
from pydantic import Field
from typing import List
from okagents.agents import KGAgent
from camel.models import ModelFactory
from camel.types import ModelPlatformType, ModelType
from okagents.config import Config

In [2]:
# ---------------------------------- load exp config ----------------------------------
config_path = (
    "/Users/little1d/Desktop/Code/OK-Agents/okagents/config/suzuki_config.json"
)
with open(config_path, "r") as f:
    suzuki_config = json.load(f)
suzuki_config

{'name': 'Suzuki Reaction Optimization',
 'domain': 'Organic Chemistry - Cross Coupling Reactions',
 'description': 'Optimization of Suzuki cross-coupling reaction to enhance reaction yield by exploring different electrophiles, nucleophiles, ligands, bases, and solvents.',
 'constraint': 'Reagents must be compatible with each other and reaction conditions must ensure stability of intermediates.',
 'parameters_and_bounds': [{'name': 'Electrophile_SMILES',
   'description': 'SMILES representation of the electrophilic substrate',
   'bounds': ['BrC1=CC=C(N=CC=C2)C2=C1',
    'ClC1=CC=C(N=CC=C2)C2=C1',
    'IC1=CC=C(N=CC=C2)C2=C1',
    'O=S(OC1=CC=C(N=CC=C2)C2=C1)(C(F)(F)F)=O'],
   'type': 'discrete'},
  {'name': 'Nucleophile_SMILES',
   'description': 'SMILES representation of the nucleophilic substrate',
   'bounds': ['CC1=CC=C(N(C2CCCCO2)N=C3)C3=C1[B-](F)(F)F',
    'CC1=CC=C(N(C2CCCCO2)N=C3)C3=C1B(O)O',
    'CC1=CC=C(N(C2CCCCO2)N=C3)C3=C1B4OC(C)(C)C(C)(C)O4'],
   'type': 'discrete'},
  {

In [3]:
class ExperimentInfoResponse(BaseNotesResponse):
    """实验信息响应模型（简化版）"""

    reactants: List[str] = Field(
        ..., description="反应物信息，格式：'名称:SMILES'"
    )
    conditions: List[str] = Field(..., description="反应条件，格式：'类型:值'")
    target: str = Field(..., description="反应目标描述")

In [4]:
# ---------------------------------- init NotesAgent ----------------------------------

config = Config()
# creating the model
deepseek = ModelFactory.create(
    model_platform=ModelPlatformType.DEEPSEEK,
    api_key=config.DEEPSEEK_API_KEY,
    url=config.DEEPSEEK_API_BASE,
    model_type=ModelType.DEEPSEEK_CHAT,
    model_config_dict={"max_tokens": 4096},
)

kg_agent = KGAgent(deepseek)
milvus_agent = MilvusAgent(collection_name="test")

notes_agent = NotesAgent(
    model=deepseek, kg_agent=kg_agent, milvus_agent=milvus_agent
)
deepseek_client = DeepSeekClient()

In [5]:
# ---------------------------------- 测试实验信息搜索与提取功能 ----------------------------------

experiment_prompt = """
请从以下实验配置中提取关键信息：
1. 反应物：列出所有反应物及其SMILES式子
2. 反应条件：包括配体、碱、溶剂的选择，以及这些反应物之间的联系

实验配置：
{input}

请按照指定格式返回，确保所有SMILES字符串准确无误。
"""

# 将实验配置转为字符串
experiment_str = json.dumps(suzuki_config, indent=2, ensure_ascii=False)

# 执行提取 (注释掉存储步骤)
print("=== 测试实验信息提取 ===")
experiment_info = notes_agent.extract_experiment_info(
    experiment_data=experiment_str,
    save_schema=ExperimentInfoResponse,
    prompt=experiment_prompt,
)

=== 测试实验信息提取 ===
2025-03-27 16:26:31,343 - camel.agents.chat_agent - ERROR - Failed in parsing the output into JSON: Expecting value: line 1 column 1 (char 0)
response content: msgs=[BaseMessage(role_name='Assistant', role_type=<RoleType.ASSISTANT: 'assistant'>, meta_dict={}, content='```json\n{\n  "notes": [\n    "Optimization of Suzuki cross-coupling reaction to enhance reaction yield by exploring different electrophiles, nucleophiles, ligands, bases, and solvents.",\n    "Reagents must be compatible with each other and reaction conditions must ensure stability of intermediates."\n  ],\n  "reactants": [\n    "Electrophile:BrC1=CC=C(N=CC=C2)C2=C1",\n    "Electrophile:ClC1=CC=C(N=CC=C2)C2=C1",\n    "Electrophile:IC1=CC=C(N=CC=C2)C2=C1",\n    "Electrophile:O=S(OC1=CC=C(N=CC=C2)C2=C1)(C(F)(F)F)=O",\n    "Nucleophile:CC1=CC=C(N(C2CCCCO2)N=C3)C3=C1[B-](F)(F)F",\n    "Nucleophile:CC1=CC=C(N(C2CCCCO2)N=C3)C3=C1B(O)O",\n    "Nucleophile:CC1=CC=C(N(C2CCCCO2)N=C3)C3=C1B4OC(C)(C)C(C)(C)O4"\n  ],

In [6]:
# ---------------------------------- 测试 DeepSeek 的对话生成 reasoning data ----------------------------------
print("\n=== 测试DeepSeek对话生成 ===")
research_prompt = f"""
基于以下Suzuki反应配置，分析可能的优化方向：
{experiment_str}

请考虑：
1. 不同反应物组合的活性差异
2. 配体选择对反应的影响
3. 碱和溶剂的匹配性
"""

content, reasoning_data = deepseek_client.generate(research_prompt)
print("生成内容:", content[:200] + "...")
print("\n推理内容:", reasoning_data[:200] + "...")


=== 测试DeepSeek对话生成 ===
生成内容: 基于提供的Suzuki反应配置，以下是对优化方向的系统性分析：

---

### **1. 电泳质（Electrophile）的优化**
- **活性差异**：离去基团活性顺序为 **I > OTf > Br > Cl**。  
  - 若当前产率低，优先尝试碘代物（`IC1=...`）或三氟甲磺酸酯（`O=S(OC1=...`），以提高氧化加成速率。  
  - 氯代物（`ClC1=...`）...

推理内容: 嗯，用户给了一个关于Suzuki反应优化的配置，需要分析可能的优化方向。首先，我得仔细看看这个配置里都有哪些参数和选项。用户提到要考虑不同反应物组合的活性差异、配体选择的影响，以及碱和溶剂的匹配性。那我得从这三个方面来逐一分析。

首先看电泳质的选项，有四个不同的SMILES结构，分别是含有Br、Cl、I和三氟甲磺酸酯的芳香环。一般来说，Suzuki反应中离去基团的活性顺序是I > OTf (三氟...


In [10]:
reasoning_data

'嗯，用户给了一个关于Suzuki反应优化的配置，需要分析可能的优化方向。首先，我得仔细看看这个配置里都有哪些参数和选项。用户提到要考虑不同反应物组合的活性差异、配体选择的影响，以及碱和溶剂的匹配性。那我得从这三个方面来逐一分析。\n\n首先看电泳质的选项，有四个不同的SMILES结构，分别是含有Br、Cl、I和三氟甲磺酸酯的芳香环。一般来说，Suzuki反应中离去基团的活性顺序是I > OTf (三氟甲磺酸酯) > Br > Cl。所以碘代物反应活性最高，可能更容易进行反应，但可能稳定性差一些。而三氟甲磺酸酯的活性也很高，但可能对水分敏感。氯代物活性最低，可能需要更苛刻的条件或者更强的催化剂。所以优化的时候可能需要测试不同电泳质对产率的影响，特别是如果当前产率不高的话，可能需要换更高活性的离去基团，比如换成碘或三氟甲磺酸酯。\n\n然后是亲核试剂，三个选项，区别在于硼酸部分的结构。第一个是B-（F）3，可能对应的是硼酸的三氟硼酸盐形式，这种形式通常更稳定，溶解度可能更好，尤其是当游离硼酸不稳定的时候。第二个是B(O)O，应该是普通的硼酸，可能需要碱性条件才能活化。第三个是硼酸酯，比如频哪醇硼酸酯，这类在反应中可能需要更强的碱或者更长的反应时间才能水解成活性形式。所以亲核试剂的稳定性、溶解度和活化条件会影响反应效率。如果反应体系中碱不够强或者溶剂不合适，可能硼酸酯的转化率较低，这时候可能需要调整碱或溶剂来促进其水解。\n\n接下来是配体，这部分选项很多，大概有十几种不同的配体结构。常见的Suzuki反应配体包括膦配体，比如三苯基膦，或者更高效的如XPhos、SPhos等，还有可能是一些大位阻的配体或者含氮的配体。比如第一个配体看起来像铁配合物，可能不是常用的钯催化剂配体，或者是不是用户写错了？例如，Fe可能应该是Pd？或者可能用户用了铁催化的Suzuki反应？不过一般Suzuki是Pd催化的。所以可能需要确认一下。如果是Fe的话，可能反应条件不同。或者可能配体结构中的金属是Pd？假设这里可能有笔误，或者用户确实在使用Fe催化剂，那可能需要特别考虑。例如，有些配体可能更适合钯催化剂，比如Buchwald-Hartwig类型的配体，如XPhos、SPhos等。配体的选择对催化活性影响很大，比如大位阻的富电子配体可以促进氧化加成，或者稳定活性中间体。比如，当电泳质是活性较

In [13]:
# ---------------------------------- 从 reasoning data 中提取 notes ----------------------------------

print("\n=== 测试从推理数据提取Notes ===")
# prompt 里面要包含 input，是 reasoning data 的占位符
reasoning_response = notes_agent.extract_notes(
    reasoning_data=reasoning_data,
    prompt="从推理内容中提取关键科学发现和优化建议，你的回答不能超过 200 字 \n{input}",
)
reasoning_response
# print(reasoning_response.msg.content)


=== 测试从推理数据提取Notes ===
2025-03-27 16:41:56,186 - camel.agents.chat_agent - ERROR - Failed in parsing the output into JSON: Expecting value: line 1 column 1 (char 0)
Parsed content as plain text: Key scientific findings: The reactivity order of electrophiles in Suzuki coupling is I > OTf > Br > Cl. Boronic acid derivatives vary in stability and activation requirements (trifluoroborates > boronic acids > boronic esters). Ligand choice significantly impacts catalytic activity, with bulky, electron-rich ligands favoring challenging substrates.
Optimization recommendations: 1) Use higher reactivity electrophiles (I or OTf) if yield is low. 2) Consider trifluoroborate nucleophiles for better stability. 3) Match ligand to substrate reactivity (e.g., bulky phosphines for chloroarenes). 4) Pair CsF with trifluoroborates or stronger bases with boronic esters. 5) Use polar aprotic solvents (DMF) for higher temperature reactions.
Save successfully, content: nodes=[Node(id='Suzuki coupling', type=

ChatAgentResponse(msgs=[BaseMessage(role_name='Assistant', role_type=<RoleType.ASSISTANT: 'assistant'>, meta_dict={}, content='```json\n{\n  "notes": [\n    "Key scientific findings: The reactivity order of electrophiles in Suzuki coupling is I > OTf > Br > Cl. Boronic acid derivatives vary in stability and activation requirements (trifluoroborates > boronic acids > boronic esters). Ligand choice significantly impacts catalytic activity, with bulky, electron-rich ligands favoring challenging substrates.",\n    "Optimization recommendations: 1) Use higher reactivity electrophiles (I or OTf) if yield is low. 2) Consider trifluoroborate nucleophiles for better stability. 3) Match ligand to substrate reactivity (e.g., bulky phosphines for chloroarenes). 4) Pair CsF with trifluoroborates or stronger bases with boronic esters. 5) Use polar aprotic solvents (DMF) for higher temperature reactions."\n  ]\n}\n```', video_bytes=None, image_list=None, image_detail='auto', video_detail='low', parse

In [15]:
print(reasoning_response.msg.content)

```json
{
  "notes": [
    "Key scientific findings: The reactivity order of electrophiles in Suzuki coupling is I > OTf > Br > Cl. Boronic acid derivatives vary in stability and activation requirements (trifluoroborates > boronic acids > boronic esters). Ligand choice significantly impacts catalytic activity, with bulky, electron-rich ligands favoring challenging substrates.",
    "Optimization recommendations: 1) Use higher reactivity electrophiles (I or OTf) if yield is low. 2) Consider trifluoroborate nucleophiles for better stability. 3) Match ligand to substrate reactivity (e.g., bulky phosphines for chloroarenes). 4) Pair CsF with trifluoroborates or stronger bases with boronic esters. 5) Use polar aprotic solvents (DMF) for higher temperature reactions."
  ]
}
```


In [16]:
# ---------------------------------- 测试查询功能 ----------------------------------
print("\n=== 测试查询功能 ===")
query = "Suzuki反应中配体选择"
results = notes_agent.query_notes(query)
print("知识图谱结果:", results["knowledge_graph"][:1])  # 显示第一条
print("向量数据库结果:", results["vector_db"][:1])  # 显示第一条


=== 测试查询功能 ===
知识图谱结果: []
向量数据库结果: [{'text': 'No suitable information retrieved from Optimization of Suzuki cross-coupling reaction to enhance reaction yield by exploring different elec with similarity_threshold = 0.7.'}]


In [17]:
results

{'knowledge_graph': [],
 'vector_db': [{'text': 'No suitable information retrieved from Optimization of Suzuki cross-coupling reaction to enhance reaction yield by exploring different elec with similarity_threshold = 0.7.'}]}

In [22]:
context = "\n".join(
    [note["text"] for note in results["vector_db"] if note.get("text")]
)
context

'No suitable information retrieved from Optimization of Suzuki cross-coupling reaction to enhance reaction yield by exploring different elec with similarity_threshold = 0.7.'

In [23]:
# ---------------------------------- 测试带混合检索的生成 ----------------------------------
retrieved_notes = notes_agent.query_notes("Suzuki反应溶剂选择")
context = "\n".join(
    [note["text"] for note in retrieved_notes["vector_db"] if note.get("text")]
)

enhanced_prompt = f"""
基于以下背景知识：
{context}

请回答：
在当前的Suzuki反应配置中，溶剂选择{suzuki_config['parameters_and_bounds'][4]['bounds']}，
哪种溶剂最适合高产率？给出详细分析。
"""

enhanced_content, reasoning_data = deepseek_client.generate(enhanced_prompt)
print("增强后的生成结果:", enhanced_content[:300] + "...")

reasoning_response = notes_agent.extract_notes(
    reasoning_data=reasoning_data,
    prompt="从推理内容中提取关键科学发现和优化建议，你的回答不能超过 200 字 {input}",
)

增强后的生成结果: 在Suzuki偶联反应中，溶剂的选择需兼顾溶解性、催化剂兼容性及反应效率。以下是对四种溶剂的分析：

1. **C1COCC1（可能为四氢呋喃类醚）**  
   醚类溶剂（如THF）具有中等极性，能溶解部分有机化合物和钯催化剂（如Pd(PPh₃)₄），但相比DMF或DMSO，其极性较低，可能对难溶底物的溶解性不足，导致反应速率和产率受限。

2. **CO（甲醇）**  
   甲醇为质子性溶剂，可能引发钯催化剂的分解或副反应（如配体置换）。此外，Suzuki反应通常需要弱极性和非质子性环境以稳定催化活性物种，甲醇在此类反应中应用较少，产率通常较低。

3. **N#CC（乙腈）**  
 ...
2025-03-27 17:08:38,960 - camel.agents.chat_agent - ERROR - Failed in parsing the output into JSON: Expecting value: line 1 column 1 (char 0)
Parsed content as plain text: Key scientific findings: DMF (O=CN(C)C) is identified as the optimal solvent for Suzuki coupling due to its high polarity, ability to dissolve reactants and Pd catalysts, and compatibility with high-temperature reactions. THF (C1COCC1) is a viable alternative but less effective for challenging substrates. Methanol (CO) and acetonitrile (N#CC) are less commonly used due to potential catalyst decomposition (methanol) or lower reaction efficiency (acetonitrile).
Optimization recommendations: 1) Prioritize DMF fo

In [25]:
print(enhanced_content)

在Suzuki偶联反应中，溶剂的选择需兼顾溶解性、催化剂兼容性及反应效率。以下是对四种溶剂的分析：

1. **C1COCC1（可能为四氢呋喃类醚）**  
   醚类溶剂（如THF）具有中等极性，能溶解部分有机化合物和钯催化剂（如Pd(PPh₃)₄），但相比DMF或DMSO，其极性较低，可能对难溶底物的溶解性不足，导致反应速率和产率受限。

2. **CO（甲醇）**  
   甲醇为质子性溶剂，可能引发钯催化剂的分解或副反应（如配体置换）。此外，Suzuki反应通常需要弱极性和非质子性环境以稳定催化活性物种，甲醇在此类反应中应用较少，产率通常较低。

3. **N#CC（乙腈）**  
   乙腈是极性非质子溶剂，对钯催化剂溶解性较好，但沸点较低（82°C），限制了高温反应的应用。虽然乙腈在某些偶联反应中有效，但在Suzuki体系中不如DMF常见，可能导致产率中等。

4. **O=CN(C)C（N,N-二甲基甲酰胺，DMF）**  
   DMF是强极性非质子溶剂，能高效溶解有机底物和钯催化剂，并提供稳定的反应环境。其高沸点（153°C）允许高温反应，显著提高反应速率和产率。此外，DMF可稳定钯催化剂的活性中间体，减少副反应，是Suzuki反应的经典溶剂选择。

**结论**  
**DMF（O=CN(C)C）** 是当前配置中最适合的溶剂，因其优异的溶解性、热稳定性及对催化体系的兼容性，能有效提高反应产率。其他溶剂因极性不足（醚类）、质子性干扰（甲醇）或适用性限制（乙腈）而次之。
