# Test Query Team Execution

This notebook demonstrates two ways to run the query team workflow:
1. **Using QueryManager**: Simulates the standard way of submitting a query and getting the final result.
2. **Using Direct Graph Stream**: Directly interacts with the LangGraph instance to observe intermediate steps.

## Part 0: Setup (Imports and Ontology)

In [1]:
import sys
import os
sys.path.append(r"D:\\CursorProj\\Chem-Ontology-Constructor")
os.environ["PROJECT_ROOT"] = "D:\\\\CursorProj\\\\Chem-Ontology-Constructor\\\\"

from owlready2 import get_ontology
# 从 config.settings 导入 ONTOLOGY_SETTINGS 而不是 ONTOLOGY_CONFIG
from config.settings import ONTOLOGY_SETTINGS
# 本体现在在 ONTOLOGY_SETTINGS 初始化时加载，如果需要，可以通过 ONTOLOGY_SETTINGS.ontology 访问
# 例如: onto = ONTOLOGY_SETTINGS.ontology
# onto_additional = get_ontology("data/ontology/test.owl").load() # 可选的第二个本体

Setting owlready2.JAVA_EXE globally from settings.yaml: C:\Program Files\Java\jdk-23\bin\java.exe


In [2]:
import dotenv
dotenv.load_dotenv(override=True)

True

In [3]:
from langchain_openai import ChatOpenAI
from langchain_community.chat_models.tongyi import ChatTongyi
from langchain_ollama import ChatOllama


answer_llm = ChatOpenAI(
            model_name="gpt-4.1-nano",
            temperature=0,
            max_tokens=10000,
        )

# ty_llm = ChatTongyi(
#             model_name="qwen3-14b",
#             model_kwargs={
#                 "temperature": 0,
#                 "enable_thinking": False,
#                 "max_tokens":8192,
#             }
#         )

# ollama_llm = ChatOllama(
#             model="myaniu/qwen2.5-1m:14b",
#             base_url="https://30a6-36-5-153-246.ngrok-free.app",
#             temperature=0,
#             max_tokens=8192,
#         )

answer_llm = answer_llm

In [4]:
# Required Imports
import sys
import os
import json
import time
from typing import Dict, Any, List
from owlready2 import *
import asyncio # Needed for owlready2 async operations in some envs

# Import the OntologySettings class
from config.settings import OntologySettings # Keep ONTOLOGY_SETTINGS import for potential base_iri access

# Import necessary LLM and Query Team components
try:
    from autology_constructor.idea.query_team import QueryManager, Query, QueryStatus, create_query_graph
    from autology_constructor.idea.query_team.ontology_tools import OntologyTools
    from autology_constructor.idea.common.llm_provider import get_cached_default_llm
    print("Modules imported successfully.")
except ModuleNotFoundError as e:
    print(f"Error importing modules: {e}")
    print(f"Current sys.path: {sys.path}")

# Ensure LLM Provider is configured
try:
    llm = get_cached_default_llm()
    def get_model_name(obj):
        return getattr(obj, "model_name", getattr(obj, "model", None))
    print(f"LLM: {get_model_name(llm)}. \nAnsewr LLM: {get_model_name(answer_llm)}")
    print("LLM Provider initialized successfully.")
except Exception as e:
    print(f"Error initializing LLM Provider: {e}\nPlease ensure API keys or necessary configurations are set.")
    llm = None

# --- Ontology Setup ---
print("Setting up test ontology using a new OntologySettings instance...")

# Define parameters for the new OntologySettings instance
# Assuming 'backup-2.owl' and 'backup-2-closed.owl' exist in 'data/ontology'
# Use the project root defined in the previous cell
project_root_path = os.environ.get("PROJECT_ROOT", ".")
ontology_dir = os.path.join(project_root_path, "data", "ontology")
# You might want to use the base_iri from the default settings or define a specific one for testing
test_base_iri = ONTOLOGY_SETTINGS.base_iri if 'ONTOLOGY_SETTINGS' in locals() else "http://www.test.org/chem_ontologies/backup-2"

try:
    # Instantiate OntologySettings directly
    # test_ontology_settings = ONTOLOGY_SETTINGS
    test_ontology_settings = OntologySettings(
        base_iri=test_base_iri,
        ontology_file_name="final.owl",  # Use the desired ontology file
        directory_path=ontology_dir,
        closed_ontology_file_name="IDA-closed.owl" # Adjust if your closed file has a different name pattern
    )
    # Access the loaded ontology via the instance's property
    test_onto = test_ontology_settings.ontology
    print(f"Successfully loaded ontology: {test_onto.base_iri}")
    print(f"From file: {test_ontology_settings.ontology_file_name} in {test_ontology_settings.directory_path}")

    # Optional: Print some details about the loaded ontology
    # print(f"Test Ontology '{test_onto.base_iri}' loaded with:")
    # print(f"- Classes ({len(list(test_onto.classes()))}): {[c.name for c in list(test_onto.classes())[:5]]}...") # Print first 5
    # print(f"- Individuals ({len(list(test_onto.individuals()))}): {[i.name for i in list(test_onto.individuals())[:5]]}...")
    # print(f"- Object Properties ({len(list(test_onto.object_properties()))}): {[p.name for p in list(test_onto.object_properties())[:5]]}...")
    # print(f"- Data Properties ({len(list(test_onto.data_properties()))}): {[p.name for p in list(test_onto.data_properties())[:5]]}...")

except Exception as e:
    print(f"Error creating OntologySettings or loading ontology 'backup-2.owl': {e}")
    print(f"Please ensure 'backup-2.owl' exists in '{ontology_dir}' and settings are correct.")
    test_onto = None # Set to None if loading failed

# # --- Old way (commented out) ---
# # print("Creating a simple in-memory ontology...")
# # # It's good practice to clear existing ontologies from the default world if running cells repeatedly
# # for o in list(default_world.ontologies.values()):
# #     if callable(getattr(o, '__destroy__', None)):
# #         try:
# #             destroy_entity(o)
# #         except Exception as destroy_err:
# #             print(f"Error destroying {o.base_iri}: {destroy_err}")
# #     else:
# #         print(f"Skipping destroy for non-callable __destroy__ or missing: {o.base_iri}")
# # test_onto_old = get_ontology("data/ontology/test.owl").load()
# # for o in list(default_world.ontologies.values()):
# #     print(f"has：{o.base_iri}")
# # print(f"Test Ontology '{test_onto_old.base_iri}' created with:\\n- Classes: {[c.name for c in test_onto_old.classes()]}\\n- Individuals: {[i.name for i in test_onto_old.individuals()]}\\n- Object Properties: {[p.name for p in test_onto_old.object_properties()]}\\n- Data Properties: {[p.name for p in test_onto_old.data_properties()]}\")

# Run async tasks if needed by owlready2 backend (usually not necessary for simple loading)
# try:
#     loop = asyncio.get_event_loop()
# except RuntimeError:
#     loop = asyncio.new_event_loop()
#     asyncio.set_event_loop(loop)
# loop.run_until_complete(asyncio.sleep(0)) # Run pending async tasks

Modules imported successfully.
LLM: gpt-4.1-nano. 
Ansewr LLM: gpt-4.1-nano
LLM Provider initialized successfully.
Setting up test ontology using a new OntologySettings instance...
Successfully loaded ontology: http://www.test.org/chem_ontologies/chem_ontology.owl#
From file: final.owl in D:\\CursorProj\\Chem-Ontology-Constructor\\data\ontology


## Part 1: Execution via QueryManager

In [4]:
qas = {
  "query_format_QA": [
    {
      "difficulty_level": 1,
      "query": "Quinine definition?",
      "answer": "Quinine is an alkaloid derived from cinchona tree bark, used historically for malaria and now as a bittering agent, but associated with adverse health effects like thrombocytopenia."
    },
    {
      "difficulty_level": 1,
      "query": "Indicator Displacement Assay (IDA) definition?",
      "answer": "An IDA is a sensing strategy based on host-guest recognition, often utilizing non-covalent interactions where an analyte displaces an indicator from a receptor, causing a detectable signal change (e.g., fluorescence or absorbance)."
    },
    {
      "difficulty_level": 2,
      "query": "What analyzes Quinine?",
      "answer": "Quinine is analyzed by techniques including Electrochemical Technique, High Performance Liquid Chromatography (HPLC), Colorimetric Assay, Fluorescence Assay, and High Resolution Mass Spectrometry (HRMS)."
    },
    {
      "difficulty_level": 2,
      "query": "List components of Indicator Displacement Assay (IDA).",
      "answer": "Components include beta-Cyclodextrin (beta-CD), Poly(N-acetylaniline), and Graphene."
    },
    {
      "difficulty_level": 3,
      "query": "Find electrochemical sensors based on Indicator Displacement Assay (IDA) used for Quinine detection.",
      "answer": "The Electrochemical Sensor constructed via IDA (Indicator Displacement Assay) has the detection target Quinine."
    },
    {
      "difficulty_level": 3,
      "query": "Which hosts use host-guest recognition and are integrated with electrochemical assays?",
      "answer": "Host-Guest Recognition is integrated with an Electrochemical Sensor. Beta-Cyclodextrin (β-CD) is a host involved in Host-Guest Recognition."
    },
    {
      "difficulty_level": 4,
      "query": "Compare the stability and reproducibility properties of the Electrochemical Sensor.",
      "answer": "The Electrochemical Sensor has high stability (acceptable peak current decrease within 21 days, 86.47% retained) and good reproducibility (RSD of 2.06% across seven electrodes)."
    },
    {
      "difficulty_level": 4,
      "query": "What techniques verify the Electrochemical Sensor based on IDA?",
      "answer": "The Electrochemical Sensor is verified by Differential Pulse Voltammetry (DPV), Cyclic Voltammetry (CV), Proton Nuclear Magnetic Resonance (H_NMR), Scanning Electron Microscopy (SEM), Electrochemical Impedance Analysis (EIS), and Fourier Transform Infrared (FTIR)."
    },
    {
      "difficulty_level": 5,
      "query": "Explain the sensing mechanism involving Methylene Blue (MB) displacement by Quinine from beta-Cyclodextrin (beta-CD).",
      "answer": "Methylene Blue (MB) forms an inclusion complex with beta-Cyclodextrin (beta-CD). Quinine, having a higher binding affinity, competitively displaces MB from the beta-CD cavity. This displacement causes a change in the electrochemical signal (e.g., DPV peak current) which is used for Quinine detection. Poly(N-acetylaniline) inhibits non-specific adsorption of MB, contributing to the assay's selectivity."
    },
    {
      "difficulty_level": 5,
      "query": "Summarize the role of Graphene in the described electrochemical sensor.",
      "answer": "Graphene (specifically reduced graphene oxide, rGO) is used as an electrode material in the sensor. It enhances electron transfer properties due to its superior electrical conductivity and large specific surface area, improving the sensor's performance. It serves as a platform onto which other components like Poly(N-acetylaniline) and beta-Cyclodextrin are deposited."
    }
  ],
  "question_format_QA": [
    {
      "difficulty_level": 1,
      "question": "Tell me about Quinine.",
      "answer": "Quinine is an alkaloid derived from cinchona tree bark, used historically for malaria and now as a bittering agent, but associated with adverse health effects like thrombocytopenia."
    },
    {
      "difficulty_level": 1,
      "question": "What is an Indicator Displacement Assay?",
      "answer": "An IDA is a sensing strategy based on host-guest recognition, often utilizing non-covalent interactions where an analyte displaces an indicator from a receptor, causing a detectable signal change (e.g., fluorescence or absorbance)."
    },
    {
      "difficulty_level": 2,
      "question": "What techniques are used to analyze Quinine?",
      "answer": "Quinine is analyzed by techniques including Electrochemical Technique, High Performance Liquid Chromatography (HPLC), Colorimetric Assay, Fluorescence Assay, and High Resolution Mass Spectrometry (HRMS)."
    },
    {
      "difficulty_level": 2,
      "question": "What are the components of an Indicator Displacement Assay?",
      "answer": "Components include beta-Cyclodextrin (beta-CD), Poly(N-acetylaniline), and Graphene."
    },
    {
      "difficulty_level": 3,
      "question": "Are there electrochemical sensors using Indicator Displacement Assay (IDA) to detect Quinine?",
      "answer": "The Electrochemical Sensor constructed via IDA (Indicator Displacement Assay) has the detection target Quinine."
    },
    {
      "difficulty_level": 3,
      "question": "Which host molecules use host-guest recognition in electrochemical assays?",
      "answer": "Host-Guest Recognition is integrated with an Electrochemical Sensor. Beta-Cyclodextrin (β-CD) is a host involved in Host-Guest Recognition."
    },
    {
      "difficulty_level": 4,
      "question": "How stable and reproducible is the electrochemical sensor that uses an Indicator Displacement Assay (IDA) for detecting Quinine?",
      "answer": "The Electrochemical Sensor has high stability (acceptable peak current decrease within 21 days, 86.47% retained) and good reproducibility (RSD of 2.06% across seven electrodes)."
    },
    {
      "difficulty_level": 4,
      "question": "How is the electrochemical sensor that uses an Indicator Displacement Assay (IDA) for detecting Quinine verified?",
      "answer": "The Electrochemical Sensor is verified by Differential Pulse Voltammetry (DPV), Cyclic Voltammetry (CV), Proton Nuclear Magnetic Resonance (H_NMR), Scanning Electron Microscopy (SEM), Electrochemical Impedance Analysis (EIS), and Fourier Transform Infrared (FTIR)."
    },
    {
      "difficulty_level": 5,
      "question": "In the electrochemical sensor that uses an Indicator Displacement Assay (IDA) for detecting Quinine, how does Quinine displace Methylene Blue from beta-Cyclodextrin?",
      "answer": "Methylene Blue (MB) forms an inclusion complex with beta-Cyclodextrin (beta-CD). Quinine, having a higher binding affinity, competitively displaces MB from the beta-CD cavity. This displacement causes a change in the electrochemical signal (e.g., DPV peak current) which is used for Quinine detection. Poly(N-acetylaniline) inhibits non-specific adsorption of MB, contributing to the assay's selectivity."
    },
    {
      "difficulty_level": 5,
      "question": "What does Graphene do in the electrochemical sensor that uses an Indicator Displacement Assay (IDA) for detecting Quinine?",
      "answer": "Graphene (specifically reduced graphene oxide, rGO) is used as an electrode material in the sensor. It enhances electron transfer properties due to its superior electrical conductivity and large specific surface area, improving the sensor's performance. It serves as a platform onto which other components like Poly(N-acetylaniline) and beta-Cyclodextrin are deposited."
    }
  ]
}

In [5]:
new_qas = [
  {
    "question": "What is a cryptand?",
    "query": "Retrieve the definition or description associated with the chemical class 'Cryptand'. Look for annotations like rdfs:comment, skos:definition, or specific meta-properties on the 'Cryptand' class definition.",
    "answer": "A cryptand is a type of macrocyclic ligand, specifically a supramolecular host, featuring a three-dimensional cavity that allows it to form stable complexes by encapsulating guest ions or molecules.",
    "difficulty_level": 1
  },
  {
    "question": "Is pyrrole considered an aromatic system?",
    "query": "Verify if an `rdfs:subClassOf` axiom exists where 'Pyrrole' is declared as a subclass of the 'AromaticSystem' class within the ontology.",
    "answer": "Yes, pyrrole is classified as an aromatic system.",
    "difficulty_level": 1
  },
  {
    "question": "What types of molecules typically act as guests for supramolecular hosts?",
    "query": "Identify the `rdfs:range` axiom defined for the object property 'binds_guest'. This indicates the general class of entities that can be bound by entities that are in the domain of 'binds_guest' (typically 'SupramolecularHost').",
    "answer": "Typically, molecules classified as 'GuestMolecule' (which can include anions, cations, or neutral molecules) act as guests for supramolecular hosts.",
    "difficulty_level": 2
  },
  {
    "question": "What are some specific types of macrocycles?",
    "query": "List all `owl:Class` entities for which an `rdfs:subClassOf` axiom exists, explicitly stating 'Macrocycle' as the direct parent class.",
    "answer": "Specific types of macrocycles include cryptands, calixarenes, pillararenes, and cyclodextrins.",
    "difficulty_level": 2
  },
  {
    "question": "When a calixarene containing pyrrole groups binds an anion, what specific non-covalent interactions are typically involved?",
    "query": "Analyze OWL axioms or property restrictions associated with the 'Calixarene' class, particularly when it 'has_functional_group' 'Pyrrole' and 'binds_guest' an 'Anion'. Identify the specific subclasses of 'InteractionType' (e.g., 'HydrogenBond', 'AnionPiInteraction') that are linked via 'interacts_via' in these defined scenarios.",
    "answer": "When a calixarene containing pyrrole groups binds an anion, non-covalent interactions such as HydrogenBond (often from the pyrrole NH) and AnionPiInteraction (between the anion and the pyrrole ring) are typically involved.",
    "difficulty_level": 3
  },
  {
    "question": "Are there known supramolecular hosts that are derivatives of calixarenes and also feature pyrrole functional groups?",
    "query": "Search for 'SupramolecularHost' classes or typical instances that are described as being 'is_derivative_of' the 'Calixarene' class AND are also associated with the 'Pyrrole' class via the 'has_functional_group' property.",
    "answer": "Yes, supramolecular hosts that are derivatives of calixarenes and feature pyrrole functional groups are known, a prominent example being calix[n]pyrroles.",
    "difficulty_level": 3
  },
  {
    "question": "What are common applications for cage molecules compared to macrocycles, and do they have any overlapping uses?",
    "query": "1. Identify 'Application' subclasses (e.g., 'Sensing', 'Catalysis', 'DrugDelivery', 'Separation') linked to the 'CageMolecule' class via the 'has_application' property (possibly through class axioms or restrictions). 2. Perform the same analysis for the 'Macrocycle' class. 3. Compare these sets of 'Application' subclasses to identify distinct and common areas.",
    "answer": "Cage molecules are commonly utilized in applications like catalysis and chemical separation. Macrocycles frequently find use in areas such as molecular sensing and drug delivery. An example of an overlapping application could be sensing, where both classes of compounds might be employed.",
    "difficulty_level": 4
  },
  {
    "question": "What types of supramolecular hosts are known to bind anions primarily through anion-π interactions?",
    "query": "Find 'SupramolecularHost' subclasses or characteristic descriptions where OWL axioms (e.g., `owl:equivalentClass` or `rdfs:subClassOf` involving restrictions on 'binds_guest' with 'Anion', and 'interacts_via' with 'AnionPiInteraction') define this specific binding mode.",
    "answer": "Supramolecular hosts that possess electron-deficient aromatic systems, such as certain calixarene derivatives (like calix[n]pyrroles) or other specifically designed π-acidic macrocycles, are known to bind anions primarily through AnionPiInteraction.",
    "difficulty_level": 4
  },
  {
    "question": "Why are supramolecular hosts containing pyrrole units generally effective at binding anions?",
    "query": "Analyze the defined properties of the 'Pyrrole' class (e.g., its classification as an 'AromaticSystem', its NH group) and its typical involvement in 'InteractionType' classes like 'HydrogenBond' and 'AnionPiInteraction' when 'Pyrrole' is a 'FunctionalGroup' of a 'SupramolecularHost' binding an 'Anion'. Synthesize an explanation for this effectiveness based on these defined chemical characteristics and interaction capabilities.",
    "answer": "Supramolecular hosts with pyrrole units are effective for anion binding due to two main features of pyrrole: its NH group can act as a hydrogen bond donor, forming HydrogenBond interactions with anions, and its electron-deficient aromatic π-system can engage in favorable AnionPiInteractions with anions. This combination enhances binding affinity and selectivity.",
    "difficulty_level": 5
  },
  {
    "question": "What is the role of non-covalent interactions like hydrogen bonds and anion-π interactions in the formation of supramolecular host-guest complexes?",
    "query": "Examine how 'InteractionType' subclasses (e.g., 'HydrogenBond', 'AnionPiInteraction') are axiomatically linked via the 'interacts_via' property in scenarios where 'SupramolecularHost' classes bind 'GuestMolecule' classes (often described by 'binds_guest' relationships). Summarize their function based on these defined ontological roles.",
    "answer": "Non-covalent interactions, such as hydrogen bonds and anion-π interactions, are crucial for molecular recognition between hosts and guests. They act as the primary driving forces that determine the stability, selectivity, and overall formation of supramolecular host-guest complexes.",
    "difficulty_level": 5
  }
]
 

In [15]:
IDA_qas = [
    {
    "question": "Regarding the system described in the paragraph below, please first attempt to verify if it is a documented Indicator Displacement Assay (IDA) system. If it cannot be directly confirmed, then provide an assessment of its potential to function as an IDA system based on the given details.\nThis chemical system utilizes WP6, a water-soluble carboxylato pillar[6]arene, as the synthetic receptor, belonging to the pillararene macrocycle family. Safranine T (ST), a phenazine-based dye, serves as the fluorophore indicator. Caffeine, a xanthine alkaloid, is the selected analyte competitor. This setup suggests a host-guest system designed for molecular recognition of Caffeine by the WP6 receptor, with binding events monitored through changes in the fluorescence of Safranine T."
  },
  {
    "question": "Regarding the system described in the paragraph below, please first attempt to verify if it is a documented Indicator Displacement Assay (IDA) system. If it cannot be directly confirmed, then provide an assessment of its potential to function as an IDA system based on the given details.\nThe image displays a supramolecular assembly featuring β-cyclodextrin (β-CD), a cyclic oligosaccharide, as the synthetic receptor. Methylene Blue (MB), a well-known phenothiazine dye, acts as the fluorophore indicator. Quinine, a quinoline-derived alkaloid, is presented as the selected analyte competitor. This system is characteristic of host-guest chemistry aimed at studying the interaction between β-cyclodextrin and Quinine, where Methylene Blue signals the binding or displacement."
  },
  {
    "question": "Regarding the system described in the paragraph below, please first attempt to verify if it is a documented Indicator Displacement Assay (IDA) system. If it cannot be directly confirmed, then provide an assessment of its potential to function as an IDA system based on the given details.\nThis chemical architecture involves a synthetic receptor TCC, which is a resorcinarene-based cavitand functionalized with four imidazole-acetic acid sodium salt arms. The fluorophore indicator is DSMI, a styryl-pyridinium dye. The selected analyte competitor, labeled 'Choline(Cho)' but structurally depicted as Acetylcholine, is a quaternary ammonium ester. This system is likely designed for the molecular recognition of Acetylcholine by the TCC receptor, with interactions reported by the DSMI fluorescent probe."
  },
  {
    "question": "Regarding the system described in the paragraph below, please first attempt to verify if it is a documented Indicator Displacement Assay (IDA) system. If it cannot be directly confirmed, then provide an assessment of its potential to function as an IDA system based on the given details.\nThe synthetic receptor in this system is TCC, specified as a macrocyclic cavitand with R1 groups being CH2CO2Na (carboxymethyl sodium salt) and R2 groups being Et (ethyl) on its benzimidazole units. DTMI, a cyanine-type styryl benzothiazole dye (with iodide as counterion), functions as the fluorophore indicator. Butyrylcholine (Bucho), an ester of choline, serves as the selected analyte competitor. This assembly is designed for investigating the host-guest interactions between the specific TCC receptor and Butyrylcholine, using the DTMI dye to signal these events."
  },
  {
    "question": "Regarding the system described in the paragraph below, please first attempt to verify if it is a documented Indicator Displacement Assay (IDA) system. If it cannot be directly confirmed, then provide an assessment of its potential to function as an IDA system based on the given details.\nThis chemical system features Cucurbit[8]uril, abbreviated as CB[8], acting as a synthetic receptor from the cucurbituril family. The system employs Proflavine, or PF, an acridine dye, as the fluorophore indicator. Gefitinib, labeled GEF, an anilinoquinazoline compound and EGFR inhibitor, is the selected analyte competitor. This assembly is characteristic of a supramolecular host-guest system designed for molecular recognition studies, where CB[8] interacts with Gefitinib, and this interaction is potentially signaled by changes in the fluorescence of Proflavine, possibly through competitive binding or allosteric effects."
  },
  {
    "question": "Regarding the system described in the paragraph below, please first attempt to verify if it is a documented Indicator Displacement Assay (IDA) system. If it cannot be directly confirmed, then provide an assessment of its potential to function as an IDA system based on the given details.\nThis image depicts a supramolecular assembly involving Cucurbit[8]uril (CB[8]) as a synthetic receptor, which is a macrocyclic host compound. Methylene Blue (MB), a phenothiazine dye, functions as the fluorophore indicator. Amantadine (AMA), an antiviral drug with a tricyclic adamantane structure, serves as the selected analyte competitor. The system suggests a host-guest chemistry approach for the potential detection or binding study of Amantadine with Cucurbit[8]uril, utilizing the signaling properties of Methylene Blue."
  },
  {
    "question": "Regarding the system described in the paragraph below, please first attempt to verify if it is a documented Indicator Displacement Assay (IDA) system. If it cannot be directly confirmed, then provide an assessment of its potential to function as an IDA system based on the given details.\nThe chemical architecture shown consists of p-sulfonatocalix[4]arene, or SCX4, a macrocyclic compound from the calixarene family, acting as the synthetic receptor. An Acridine dye, shown in its protonated form (AcH$^+$), is utilized as the fluorophore indicator. Acetylcholine (AcCh), a neurotransmitter, is presented as the selected analyte competitor. This setup describes a potential sensing ensemble where the p-sulfonatocalix[4]arene receptor binds Acetylcholine, and this molecular recognition event is reported by changes in the fluorescence characteristics of the Acridine dye."
  },
]

In [16]:
fake_ida_qas = [
  {
    "question": "Regarding the system described in the paragraph below, please first attempt to verify if it is a documented Indicator Displacement Assay (IDA) system. If it cannot be directly confirmed, then provide an assessment of its potential to function as an IDA system based on the given details.\nThis chemical system features Cucurbit[8]uril (CB[8]) as the synthetic receptor, a member of the cucurbituril macrocycle family. The fluorophore indicator is Safranine T (ST), a phenazine-based dye. Acetylcholine (AcCh), a neurotransmitter, is presented as the selected analyte competitor. This assembly suggests a host-guest system designed for the molecular recognition of Acetylcholine by the CB[8] receptor, with binding events monitored through changes in the fluorescence of Safranine T.\nYou should carefully analyze the binding affinities of both the indicator and the competitor to the host, and critically assess whether the given combination is truly capable of displacement. Please maintain sufficient neutrality and caution in your evaluation. You may consult reported binding affinities or documented cases involving the relevant host and guest molecules to support your analysis, or make reasoned predictions based on molecular structure, querying any necessary information for your inference chain."
  },
  {
    "question": "Regarding the system described in the paragraph below, please first attempt to verify if it is a documented Indicator Displacement Assay (IDA) system. If it cannot be directly confirmed, then provide an assessment of its potential to function as an IDA system based on the given details.\nThis system utilizes a water-soluble carboxylato pillar[6]arene (WP6) as the synthetic receptor. The fluorophore indicator is DSMI, a styryl-pyridinium dye. The selected analyte competitor is Amantadine (AMA), an antiviral drug with an adamantane structure. The setup describes a potential sensing ensemble where the WP6 receptor binds Amantadine, and this molecular recognition event is reported by the DSMI fluorescent probe.\nYou should carefully analyze the binding affinities of both the indicator and the competitor to the host, and critically assess whether the given combination is truly capable of displacement. Please maintain sufficient neutrality and caution in your evaluation. You may consult reported binding affinities or documented cases involving the relevant host and guest molecules to support your analysis, or make reasoned predictions based on molecular structure, querying any necessary information for your inference chain."
  },
  {
    "question": "Regarding the system described in the paragraph below, please first attempt to verify if it is a documented Indicator Displacement Assay (IDA) system. If it cannot be directly confirmed, then provide an assessment of its potential to function as an IDA system based on the given details.\nThis chemical architecture employs a water-soluble carboxylato pillar[6]arene, WP6, as the synthetic receptor. Safranine T (ST), a phenazine-based dye, acts as the fluorophore indicator. Gefitinib (GEF), an anilinoquinazoline compound, serves as the selected analyte competitor. The system suggests a host-guest chemistry approach for the potential detection or binding study of Gefitinib with the WP6 host, using the signaling properties of Safranine T.\nYou should carefully analyze the binding affinities of both the indicator and the competitor to the host, and critically assess whether the given combination is truly capable of displacement. Please maintain sufficient neutrality and caution in your evaluation. You may consult reported binding affinities or documented cases involving the relevant host and guest molecules to support your analysis, or make reasoned predictions based on molecular structure, querying any necessary information for your inference chain."
  },
  {
    "question": "Regarding the system described in the paragraph below, please first attempt to verify if it is a documented Indicator Displacement Assay (IDA) system. If it cannot be directly confirmed, then provide an assessment of its potential to function as an IDA system based on the given details.\nThe image displays a supramolecular assembly featuring p-sulfonatocalix[4]arene (SCX4), a macrocyclic host, as the synthetic receptor. An Acridine dye (AcH$^+$) is utilized as the fluorophore indicator. Caffeine, a xanthine alkaloid, is presented as the selected analyte competitor. This system is characteristic of host-guest chemistry aimed at studying the interaction between SCX4 and Caffeine, where the Acridine dye signals the binding or displacement.\nYou should carefully analyze the binding affinities of both the indicator and the competitor to the host, and critically assess whether the given combination is truly capable of displacement. Please maintain sufficient neutrality and caution in your evaluation. You may consult reported binding affinities or documented cases involving the relevant host and guest molecules to support your analysis, or make reasoned predictions based on molecular structure, querying any necessary information for your inference chain."
  },
  {
    "question": "Regarding the system described in the paragraph below, please first attempt to verify if it is a documented Indicator Displacement Assay (IDA) system. If it cannot be directly confirmed, then provide an assessment of its potential to function as an IDA system based on the given details.\nThis chemical system features Cucurbit[8]uril (CB[8]) acting as the synthetic receptor. The fluorophore indicator for this assembly is Gefitinib (GEF), an EGFR inhibitor. The selected analyte competitor is Proflavine (PF), an acridine dye. This setup describes a potential supramolecular system for the recognition of Gefitinib by the CB[8] host, where binding events would be reported through the fluorescent response of Proflavine.\nYou should carefully analyze the binding affinities of both the indicator and the competitor to the host, and critically assess whether the given combination is truly capable of displacement. Please maintain sufficient neutrality and caution in your evaluation. You may consult reported binding affinities or documented cases involving the relevant host and guest molecules to support your analysis, or make reasoned predictions based on molecular structure, querying any necessary information for your inference chain."
  },
  {
    "question": "Regarding the system described in the paragraph below, please first attempt to verify if it is a documented Indicator Displacement Assay (IDA) system. If it cannot be directly confirmed, then provide an assessment of its potential to function as an IDA system based on the given details.\nThis system is based on a water-soluble carboxylato pillar[6]arene (WP6) acting as the synthetic receptor. The fluorophore indicator is Caffeine, a well-known xanthine alkaloid. The selected analyte competitor is Safranine T (ST), a phenazine dye. This assembly suggests a potential host-guest sensor where the WP6 macrocycle binds to Caffeine, and this interaction is signaled by changes in the fluorescence of the Safranine T indicator.\nYou should carefully analyze the binding affinities of both the indicator and the competitor to the host, and critically assess whether the given combination is truly capable of displacement. Please maintain sufficient neutrality and caution in your evaluation. You may consult reported binding affinities or documented cases involving the relevant host and guest molecules to support your analysis, or make reasoned predictions based on molecular structure, querying any necessary information for your inference chain."
  }
]

In [8]:
expert_qas = [
    {
        "question": "What types of molecules can be detected by IDA?"
    },
    {
        "question": "What types of host-guest interaction can be used to design IDA-based electrochemical sensors?"
    },
    {
        "question": "What types of host-guest interaction can be used to design IDA using optical detection?"
    },
    {
        "question": "What types of host-guest interaction can induce changes in optical signals?"
    },
    {
        "question": "What types of host-guest interaction can induce changes in electrochemical signals?"
    },
    {
        "question": "What types of supramolecular hosts are known to bind to their guests primarily through cation-π interactions?"
    },
    {
        "question": "What are the main factors controlling host-guest interaction?"
    }
]


In [5]:
total_qas = [
    {
      "difficulty_level": 1,
      "question": "Tell me about Quinine.",
      "answer": "Quinine is an alkaloid derived from cinchona tree bark, used historically for malaria and now as a bittering agent, but associated with adverse health effects like thrombocytopenia."
    },
    {
      "difficulty_level": 1,
      "question": "What is an Indicator Displacement Assay?",
      "answer": "An IDA is a sensing strategy based on host-guest recognition, often utilizing non-covalent interactions where an analyte displaces an indicator from a receptor, causing a detectable signal change (e.g., fluorescence or absorbance)."
    },
    {
      "difficulty_level": 2,
      "question": "What techniques are used to analyze Quinine?",
      "answer": "Quinine is analyzed by techniques including Electrochemical Technique, High Performance Liquid Chromatography (HPLC), Colorimetric Assay, Fluorescence Assay, and High Resolution Mass Spectrometry (HRMS)."
    },
    {
      "difficulty_level": 2,
      "question": "What are the components of an Indicator Displacement Assay?",
      "answer": "Components include beta-Cyclodextrin (beta-CD), Poly(N-acetylaniline), and Graphene."
    },
    {
      "difficulty_level": 3,
      "question": "Are there electrochemical sensors using Indicator Displacement Assay (IDA) to detect Quinine?",
      "answer": "The Electrochemical Sensor constructed via IDA (Indicator Displacement Assay) has the detection target Quinine."
    },
    {
      "difficulty_level": 3,
      "question": "Which host molecules use host-guest recognition in electrochemical assays?",
      "answer": "Host-Guest Recognition is integrated with an Electrochemical Sensor. Beta-Cyclodextrin (β-CD) is a host involved in Host-Guest Recognition."
    },
    {
      "difficulty_level": 4,
      "question": "How stable and reproducible is the electrochemical sensor that uses an Indicator Displacement Assay (IDA) for detecting Quinine?",
      "answer": "The Electrochemical Sensor has high stability (acceptable peak current decrease within 21 days, 86.47% retained) and good reproducibility (RSD of 2.06% across seven electrodes)."
    },
    {
      "difficulty_level": 4,
      "question": "How is the electrochemical sensor that uses an Indicator Displacement Assay (IDA) for detecting Quinine verified?",
      "answer": "The Electrochemical Sensor is verified by Differential Pulse Voltammetry (DPV), Cyclic Voltammetry (CV), Proton Nuclear Magnetic Resonance (H_NMR), Scanning Electron Microscopy (SEM), Electrochemical Impedance Analysis (EIS), and Fourier Transform Infrared (FTIR)."
    },
    {
      "difficulty_level": 5,
      "question": "In the electrochemical sensor that uses an Indicator Displacement Assay (IDA) for detecting Quinine, how does Quinine displace Methylene Blue from beta-Cyclodextrin?",
      "answer": "Methylene Blue (MB) forms an inclusion complex with beta-Cyclodextrin (beta-CD). Quinine, having a higher binding affinity, competitively displaces MB from the beta-CD cavity. This displacement causes a change in the electrochemical signal (e.g., DPV peak current) which is used for Quinine detection. Poly(N-acetylaniline) inhibits non-specific adsorption of MB, contributing to the assay's selectivity."
    },
    {
      "difficulty_level": 5,
      "question": "What does Graphene do in the electrochemical sensor that uses an Indicator Displacement Assay (IDA) for detecting Quinine?",
      "answer": "Graphene (specifically reduced graphene oxide, rGO) is used as an electrode material in the sensor. It enhances electron transfer properties due to its superior electrical conductivity and large specific surface area, improving the sensor's performance. It serves as a platform onto which other components like Poly(N-acetylaniline) and beta-Cyclodextrin are deposited."
    },
    {
    "question": "What is a cryptand?",
    "query": "Retrieve the definition or description associated with the chemical class 'Cryptand'. Look for annotations like rdfs:comment, skos:definition, or specific meta-properties on the 'Cryptand' class definition.",
    "answer": "A cryptand is a type of macrocyclic ligand, specifically a supramolecular host, featuring a three-dimensional cavity that allows it to form stable complexes by encapsulating guest ions or molecules.",
    "difficulty_level": 1
  },
  {
    "question": "Is pyrrole considered an aromatic system?",
    "query": "Verify if an `rdfs:subClassOf` axiom exists where 'Pyrrole' is declared as a subclass of the 'AromaticSystem' class within the ontology.",
    "answer": "Yes, pyrrole is classified as an aromatic system.",
    "difficulty_level": 1
  },
  {
    "question": "What types of molecules typically act as guests for supramolecular hosts?",
    "query": "Identify the `rdfs:range` axiom defined for the object property 'binds_guest'. This indicates the general class of entities that can be bound by entities that are in the domain of 'binds_guest' (typically 'SupramolecularHost').",
    "answer": "Typically, molecules classified as 'GuestMolecule' (which can include anions, cations, or neutral molecules) act as guests for supramolecular hosts.",
    "difficulty_level": 2
  },
  {
    "question": "What are some specific types of macrocycles?",
    "query": "List all `owl:Class` entities for which an `rdfs:subClassOf` axiom exists, explicitly stating 'Macrocycle' as the direct parent class.",
    "answer": "Specific types of macrocycles include cryptands, calixarenes, pillararenes, and cyclodextrins.",
    "difficulty_level": 2
  },
  {
    "question": "When a calixarene containing pyrrole groups binds an anion, what specific non-covalent interactions are typically involved?",
    "query": "Analyze OWL axioms or property restrictions associated with the 'Calixarene' class, particularly when it 'has_functional_group' 'Pyrrole' and 'binds_guest' an 'Anion'. Identify the specific subclasses of 'InteractionType' (e.g., 'HydrogenBond', 'AnionPiInteraction') that are linked via 'interacts_via' in these defined scenarios.",
    "answer": "When a calixarene containing pyrrole groups binds an anion, non-covalent interactions such as HydrogenBond (often from the pyrrole NH) and AnionPiInteraction (between the anion and the pyrrole ring) are typically involved.",
    "difficulty_level": 3
  },
  {
    "question": "Are there known supramolecular hosts that are derivatives of calixarenes and also feature pyrrole functional groups?",
    "query": "Search for 'SupramolecularHost' classes or typical instances that are described as being 'is_derivative_of' the 'Calixarene' class AND are also associated with the 'Pyrrole' class via the 'has_functional_group' property.",
    "answer": "Yes, supramolecular hosts that are derivatives of calixarenes and feature pyrrole functional groups are known, a prominent example being calix[n]pyrroles.",
    "difficulty_level": 3
  },
  {
    "question": "What are common applications for cage molecules compared to macrocycles, and do they have any overlapping uses?",
    "query": "1. Identify 'Application' subclasses (e.g., 'Sensing', 'Catalysis', 'DrugDelivery', 'Separation') linked to the 'CageMolecule' class via the 'has_application' property (possibly through class axioms or restrictions). 2. Perform the same analysis for the 'Macrocycle' class. 3. Compare these sets of 'Application' subclasses to identify distinct and common areas.",
    "answer": "Cage molecules are commonly utilized in applications like catalysis and chemical separation. Macrocycles frequently find use in areas such as molecular sensing and drug delivery. An example of an overlapping application could be sensing, where both classes of compounds might be employed.",
    "difficulty_level": 4
  },
  {
    "question": "What types of supramolecular hosts are known to bind anions primarily through anion-π interactions?",
    "query": "Find 'SupramolecularHost' subclasses or characteristic descriptions where OWL axioms (e.g., `owl:equivalentClass` or `rdfs:subClassOf` involving restrictions on 'binds_guest' with 'Anion', and 'interacts_via' with 'AnionPiInteraction') define this specific binding mode.",
    "answer": "Supramolecular hosts that possess electron-deficient aromatic systems, such as certain calixarene derivatives (like calix[n]pyrroles) or other specifically designed π-acidic macrocycles, are known to bind anions primarily through AnionPiInteraction.",
    "difficulty_level": 4
  },
  {
    "question": "Why are supramolecular hosts containing pyrrole units generally effective at binding anions?",
    "query": "Analyze the defined properties of the 'Pyrrole' class (e.g., its classification as an 'AromaticSystem', its NH group) and its typical involvement in 'InteractionType' classes like 'HydrogenBond' and 'AnionPiInteraction' when 'Pyrrole' is a 'FunctionalGroup' of a 'SupramolecularHost' binding an 'Anion'. Synthesize an explanation for this effectiveness based on these defined chemical characteristics and interaction capabilities.",
    "answer": "Supramolecular hosts with pyrrole units are effective for anion binding due to two main features of pyrrole: its NH group can act as a hydrogen bond donor, forming HydrogenBond interactions with anions, and its electron-deficient aromatic π-system can engage in favorable AnionPiInteractions with anions. This combination enhances binding affinity and selectivity.",
    "difficulty_level": 5
  },
  {
    "question": "What is the role of non-covalent interactions like hydrogen bonds and anion-π interactions in the formation of supramolecular host-guest complexes?",
    "query": "Examine how 'InteractionType' subclasses (e.g., 'HydrogenBond', 'AnionPiInteraction') are axiomatically linked via the 'interacts_via' property in scenarios where 'SupramolecularHost' classes bind 'GuestMolecule' classes (often described by 'binds_guest' relationships). Summarize their function based on these defined ontological roles.",
    "answer": "Non-covalent interactions, such as hydrogen bonds and anion-π interactions, are crucial for molecular recognition between hosts and guests. They act as the primary driving forces that determine the stability, selectivity, and overall formation of supramolecular host-guest complexes.",
    "difficulty_level": 5
  },
  {
        "question": "What types of molecules can be detected by IDA?"
    },
    {
        "question": "What types of host-guest interaction can be used to design IDA-based electrochemical sensors?"
    },
    {
        "question": "What types of host-guest interaction can be used to design IDA using optical detection?"
    },
    {
        "question": "What types of host-guest interaction can induce changes in optical signals?"
    },
    {
        "question": "What types of host-guest interaction can induce changes in electrochemical signals?"
    },
    {
        "question": "What types of supramolecular hosts are known to bind to their guests primarily through cation-π interactions?"
    },
    {
        "question": "What are the main factors controlling host-guest interaction?"
    }
]

In [33]:
import json

def generate_questions_from_config(config_path='IDA.json'):
    """
    从JSON配置文件加载所有数据并生成多选题。
    """
    # 1. 读取并解析JSON配置文件
    try:
        with open(config_path, 'r', encoding='utf-8') as f:
            config = json.load(f)
    except FileNotFoundError:
        print(f"错误：配置文件 '{config_path}' 未找到。")
        return
    except json.JSONDecodeError as e:
        print(f"错误：配置文件 '{config_path}' 格式不正确，请检查JSON语法。\n{e}")
        return

    # 2. 从配置中提取数据
    # 直接使用模板字符串，不再需要join
    question_template = config.get('template')
    descriptions = config.get('descriptions', {})
    combinations = config.get('combinations', [])

    if not all([question_template, descriptions, combinations]):
        print("错误：配置文件中缺少 'template', 'descriptions', 或 'combinations' 的数据。")
        return

    # 3. 循环并生成题目
    generated_questions = []
    for i, combo in enumerate(combinations):
        question_number = i + 1
        
        # 从描述字典中获取全名和描述
        def get_desc(key):
            return descriptions.get(key, f"【描述未找到: {key}】")

        host_name = get_desc(combo['host'])
        indicator_name = get_desc(combo['indicator'])
        analyte_name = get_desc(combo['analyte'])

        unknown_role = combo['unknown'].capitalize() # e.g., "Analyte"
        
        # 动态确定已知和未知组件
        components = {
            "Host": f"{host_name} (Host A)",
            "Indicator": f"{indicator_name} (Indicator B)",
            "Analyte": f"{analyte_name} (Analyte C)"
        }
        
        # 移除未知的那个，剩下就是已知的
        known_components = [v for k, v in components.items() if k != unknown_role]
        
        # 生成选项列表
        option_texts = []
        for opt_key in combo['options']:
            option_texts.append(f"* {get_desc(opt_key)}")
        options_str = "\n".join(option_texts)

        # 填充模板
        full_question = question_template.format(
            question_number=f" #{question_number}",
            known_comp1_name_and_role=known_components[0],
            known_comp2_name_and_role=known_components[1],
            unknown_comp_role=f"{unknown_role} ({components[unknown_role][-2]})", # e.g., "Analyte (C)"
            options=options_str
        )
        generated_questions.append(full_question)

    # 4. 保存结果到文件
    output_filename = 'generated_ida_questions.md' # 保存为.md文件，格式更美观
    try:
        with open(output_filename, 'w', encoding='utf-8') as f:
            for i, question in enumerate(generated_questions):
                f.write(question)
                # 在每个问题后添加分页符，以便打印或分隔
                if i < len(generated_questions) - 1:
                    f.write("\n\n---\n\n")
        
        print(f"任务完成！成功生成 {len(generated_questions)} 道题目。")
        print(f"结果已保存到文件: '{output_filename}'")
    # 将 generated_questions 序列化为文件，便于后续加载和使用
        serialized_filename = 'generated_ida_questions.json'
        try:
            with open(serialized_filename, 'w', encoding='utf-8') as f_json:
                json.dump(generated_questions, f_json, ensure_ascii=False, indent=2)
            print(f"已将 generated_questions 序列化保存到: '{serialized_filename}'")
        except Exception as e:
            print(f"序列化 generated_questions 时出错: {e}")
    except IOError as e:
        print(f"错误：无法写入输出文件。原因: {e}")

    return generated_questions


# --- 主程序入口 ---
# if __name__ == '__main__':
#     generate_questions_from_config()
ida_choice = generate_questions_from_config()


任务完成！成功生成 21 道题目。
结果已保存到文件: 'generated_ida_questions.md'
已将 generated_questions 序列化保存到: 'generated_ida_questions.json'


In [34]:
queries = ida_choice
revised_queries = ida_choice
query_context = {
    "ontology": test_ontology_settings,
    "originating_team": "test_notebook",
    "originating_stage": "manual_test",
    "query_type": "analytical_reasoning" # 对所有查询使用信息检索类型
}

In [5]:
num = 10
# 定义新的十个查询
queries = [item["question"] for item in qas["question_format_QA"][:num]]

revised_queries = [item["question"] for item in qas["question_format_QA"][:num]]

# 为所有查询定义统一的上下文
query_context = {
    "ontology": test_ontology_settings,
    "originating_team": "test_notebook",
    "originating_stage": "manual_test",
    "query_type": "information_retrieval" # 对所有查询使用信息检索类型
}

print(len(queries),len(revised_queries))

NameError: name 'qas' is not defined

In [6]:
temp_qas = total_qas

num = len(temp_qas)

queries = [item["question"] for item in temp_qas[:num]]

revised_queries = [item["question"] for item in temp_qas[:num]]

query_context = {
    "ontology": test_ontology_settings,
    "originating_team": "test_notebook",
    "originating_stage": "manual_test",
    "query_type": "information_retrieval" # 对所有查询使用信息检索类型
}

print(len(queries),len(revised_queries))

27 27


In [19]:
queries = [queries[4]]

revised_queries = [revised_queries[4]]

print(len(queries),len(revised_queries))

1 1


In [14]:
from sparkai.llm.llm import ChatSparkLLM, ChunkPrintHandler
from sparkai.core.messages import ChatMessage

#星火认知大模型Spark Max的URL值，其他版本大模型URL值请前往文档（https://www.xfyun.cn/doc/spark/Web.html）查看
SPARKAI_URL = 'wss://spark-openapi-n.cn-huabei-1.xf-yun.com/v1.1/chat'
#星火认知大模型调用秘钥信息，请前往讯飞开放平台控制台（https://console.xfyun.cn/services/bm35）查看
SPARKAI_APP_ID = '82d8eb1c'
SPARKAI_API_SECRET = 'Yzc4MDNjOWRiZTI4OGMzYjgzYWRjZjZm'
SPARKAI_API_KEY = 'bfb2cf09d7b8789b154f7567a8bfed56'
#星火认知大模型Spark Max的domain值，其他版本大模型domain值请前往文档（https://www.xfyun.cn/doc/spark/Web.html）查看
SPARKAI_DOMAIN = 'chemistry'

spark = ChatSparkLLM(
    spark_api_url=SPARKAI_URL,
    spark_app_id=SPARKAI_APP_ID,
    spark_api_key=SPARKAI_API_KEY,
    spark_api_secret=SPARKAI_API_SECRET,
    spark_llm_domain=SPARKAI_DOMAIN,
    streaming=False,
)
handler = ChunkPrintHandler()

    
messages = [[ChatMessage(
    role="user",
    content=query
)] for query in queries]

messages

[[ChatMessage(content='Tell me about Quinine.', role='user')],
 [ChatMessage(content='What is an Indicator Displacement Assay?', role='user')],
 [ChatMessage(content='What techniques are used to analyze Quinine?', role='user')],
 [ChatMessage(content='What are the components of an Indicator Displacement Assay?', role='user')],
 [ChatMessage(content='Are there electrochemical sensors using Indicator Displacement Assay (IDA) to detect Quinine?', role='user')],
 [ChatMessage(content='Which host molecules use host-guest recognition in electrochemical assays?', role='user')],
 [ChatMessage(content='How stable and reproducible is the electrochemical sensor that uses an Indicator Displacement Assay (IDA) for detecting Quinine?', role='user')],
 [ChatMessage(content='How is the electrochemical sensor that uses an Indicator Displacement Assay (IDA) for detecting Quinine verified?', role='user')],
 [ChatMessage(content='In the electrochemical sensor that uses an Indicator Displacement Assay (IDA

In [15]:
msg = messages[0]
response = spark.generate([msg], callbacks=[handler])
print(response.generations[0][0].text)

nine is a natural from the bark of the been used for centuries as an anti isolated in 1848 by but its use is limited due to sideus (ringing in the ears), visual disturbances, and cardiac arrhythmias.


In [13]:
print(response.generations[0][0].text)

A cryptand is a containing several donor atoms form complexes with cations by encap Cryptands are often used agents in chemical reactions.


In [8]:

query_response_pairs = []

for i, msg in enumerate(messages):
    response = spark.generate([msg], callbacks=[handler])
    query_response_pairs.append({
        "query": queries[i],
        "response": response
    })
    print(f"query: {queries[i]}")
query_response_pairs

query: Tell me about Quinine.
query: What is an Indicator Displacement Assay?
query: What techniques are used to analyze Quinine?
query: What are the components of an Indicator Displacement Assay?
query: Are there electrochemical sensors using Indicator Displacement Assay (IDA) to detect Quinine?
query: Which host molecules use host-guest recognition in electrochemical assays?
query: How stable and reproducible is the electrochemical sensor that uses an Indicator Displacement Assay (IDA) for detecting Quinine?
query: How is the electrochemical sensor that uses an Indicator Displacement Assay (IDA) for detecting Quinine verified?
query: In the electrochemical sensor that uses an Indicator Displacement Assay (IDA) for detecting Quinine, how does Quinine displace Methylene Blue from beta-Cyclodextrin?
query: What does Graphene do in the electrochemical sensor that uses an Indicator Displacement Assay (IDA) for detecting Quinine?
query: What is a cryptand?
query: Is pyrrole considered an a

[{'query': 'Tell me about Quinine.',
  'response': LLMResult(generations=[[ChatGeneration(text='nine is an al bark of the South American was first isolated in 1848 and has been used to treatth century. Quinine is also found in tonic water, which was originally developed as a means of delivering \nThe therapeutic effects of quinine are due to its ability to kill the. Quinine works by interfering with the parasite’s ability to digest hemoglobin, which is necessary for its survival. This causes the parasite to die and be eliminated from the body.\n\nIn addition to itsquinine has also been studied for its potential benefits in treating other conditions such as arthritis, lupus, and nocturnal leg cramps. However, more research is needed before about these potential uses.', message=AIMessage(content='nine is an al bark of the South American was first isolated in 1848 and has been used to treatth century. Quinine is also found in tonic water, which was originally developed as a means of deliv

In [10]:
import yaml
from uuid import UUID




def simplify_llm_results(data):
    """
    将包含LLMResult对象的复杂列表，简化为只包含query和response文本的格式。

    参数:
    data (list): 包含 'query' 和 'response' 的字典列表。

    返回:
    list: 一个只包含 'query' 和 'response' (文本) 的字典列表。
    """
    simplified_list = []
    for item in data:
        # 提取问题
        query = item['query']
        # 从 response 对象中直接提取核心的 text 字段
        response_text = item['response'].generations[0][0].text

        # 构建新的、极简的字典
        simplified_item = {
            'query': query,
            'response': response_text
        }
        simplified_list.append(simplified_item)

    return simplified_list




simple_data = simplify_llm_results(query_response_pairs)

import yaml

# 定义一个自定义的字符串表现器
def literal_str_presenter(dumper, data):
    """
    这个函数告诉 PyYAML 如何处理字符串。
    如果字符串中包含换行符 (\n)，就使用字面量块样式 ('|') 来表示，
    这会保留所有的换行符，生成人类可读的多行文本块。
    否则，使用默认的标量表示法。
    """
    if '\n' in data:
        return dumper.represent_scalar('tag:yaml.org,2002:str', data, style='|')
    return dumper.represent_scalar('tag:yaml.org,2002:str', data)

# 将我们的自定义函数注册给 PyYAML，让它对所有字符串类型生效
yaml.add_representer(str, literal_str_presenter)


with open("spark.yaml", "w", encoding="utf-8") as f:
    yaml.dump(simple_data, f, allow_unicode=True, default_flow_style=False)


In [12]:
simple_data

[{'query': 'Tell me about Quinine.',
  'response': 'nine is a natural from the bark of the Cinchona tree. It was first discovered in 18 has been used for centuries as an antiine is also known to have antipy anti-inflammatory properties.\n\nInquinine has been used as a bittering agent in tonic water and other beverages since the late 19th century. It is also used as a flavoring agent in some foods and beverages, such as bitter lemon soda and certain types of beer.\n\n taken in recommended doses, but it can cause side effects such as nause be taken by pregnant women or people with liver or kidney disease.'},
 {'query': 'What is an Indicator Displacement Assay?',
  'response': "Annine indicator displacement is a natural) is a type from the bark of the of biochemical assay used to determine the concentration of a is native to South America. It The assay involves the use of an indicator molecule that binds to a specific receptor or enzyme, causing a change in the indicator's properties, suc

In [7]:
queries = [queries[4], queries[6], queries[17], queries[20], queries[23], queries[25]]
revised_queries =  [revised_queries[4], revised_queries[6], revised_queries[17], revised_queries[20], revised_queries[23], revised_queries[25]]

print(len(queries),len(revised_queries))

6 6


In [7]:
queries = [queries[4], queries[6], queries[20]]
revised_queries =  [revised_queries[4],  revised_queries[6],  revised_queries[20]]

print(len(queries),len(revised_queries))

3 3


In [21]:
queries = [queries[2]]
revised_queries =  [revised_queries[2]]

print(len(queries),len(revised_queries))

1 1


In [8]:
# 定义回调函数处理Future结果并使用agent生成回答

def process_result_with_agent(result_dict, query_text):
    """
    Use an agent to process query results and generate a natural language response
    
    Args:
        result_dict: Query result dictionary
        query_text: Original query text
    
    Returns:
        str: Natural language response generated by the agent
    """
    # Extract query results information from the result
    if "formatted_results" in result_dict:
        query_results = result_dict["formatted_results"]
        print("-"*100)
        print(f"formatted_results found")
    elif "query_results" in result_dict:
        query_results = result_dict["query_results"]
        print("-"*100)
        print(f"query_results found")
    else:
        return f"I'm sorry, I couldn't find valid information about '{query_text}'."
    
    # Construct the prompt in English
    prompt = f"""
**Role:** You are an expert Chemistry Researcher.

**Task:** Provide a clear, accurate, and comprehensive answer to the user's question. You should leverage your own expert knowledge, **judiciously enhancing and verifying** it with **relevant and applicable information** selected from the 'Ontology query results'.

**User Question:**
{query_text}

**Information Source (Ontology Query Results for Enhancement & Verification):**
{query_results}

**Response Guidelines:**
* **Knowledge Integration:** Synthesize your broad chemical knowledge with **pertinent details** from the 'Information Source'.
* **Selective Use of Source:** Critically evaluate the 'Information Source'. **Incorporate specific details** (e.g., data points like pKa values, reaction types, precise definitions, specific examples) **only when they directly enhance the accuracy, specificity, or completeness of the answer to the user's question.** Do not feel obligated to include all provided information; prioritize relevance to the query.
* **Verification and Conflict:** Use the source to verify facts where appropriate. If there's a conflict between your general knowledge and the source, prioritize the source's specific data **if it is relevant to the question and appears accurate**, but use your expert judgment to omit information that seems erroneous or irrelevant to the user's query.
* **Synthesis:** Weave together your general knowledge and the selected source information into a coherent, well-structured response.
* **Clarity & Tone:** Use precise, professional chemical language. Aim for accessibility by briefly explaining potentially niche terms if needed.
* **Directness & Comprehensiveness:** Address all parts of the user's question directly and thoroughly, enriched by the appropriately selected information.
* **Source Attribution:** Do **not** mention "ontology" or refer to the 'Information Source' explicitly (e.g., avoid "according to the provided data..."). Present the integrated information as established chemical facts.
* **Knowledge Expansion:** Feel free to supplement the response with your own expert knowledge on topics that may be absent from the 'Ontology Query Results' but are relevant to providing a complete answer to the user's question.
* **Supramolecular Chemistry Style:** Tailor your response to appeal to supramolecular chemists by emphasizing host-guest interactions, non-covalent binding phenomena, molecular recognition principles, and structure-property relationships. Include relevant thermodynamic parameters, binding constants, and mechanistic insights where appropriate.

**Answer:**
"""
    
    # Generate response using LLM
    try:
        response = answer_llm.invoke(prompt)
        return response
    except Exception as e:
        return f"Error generating response: {e}"

def query_result_callback(future, query_idx, query_text):
    """Callback function to process Future results"""
    try:
        print(f"\nProcessing callback for query {query_idx}: '{query_text}'")
        
        # Get the future result
        result_dict = future.result(timeout=5)  # Small timeout to avoid indefinite waiting
        
        # Process the result using the agent
        answer = process_result_with_agent(result_dict, query_text)
        
        # Print the agent-generated answer
        print(f"\n--- Agent Answer for Query {query_idx} ---")
        print(answer)
        print("------------------------------")
        print(answer.content)
        
        return answer
    except Exception as e:
        print(f"Error processing result in callback: {e}")
        if future.exception():
            print(f"Future exception details: {future.exception()}")
        return None

# Test code using callback functions to process query results

if not llm:
    print("Skipping callback test due to LLM initialization failure.")
else:
    print("\n--- Starting Callback Function Test ---")
    
    # Re-create query manager if needed
    if 'query_manager' not in locals() or not hasattr(query_manager, 'is_running') or not query_manager.is_running():
        query_manager = QueryManager(max_workers=10)
        query_manager.update_all_caches(test_onto)
        query_manager.start()
    
    # 创建一个闭包函数来捕获回调返回的answer
    def create_answer_collector():
        # 在闭包中创建一个存储结果的字典
        answers = {}
        
        # 创建一个能捕获answer的回调函数
        def answer_collector(future, query_idx, query_text):
            try:
                result_dict = future.result(timeout=5)
                # 处理结果并获取answer
                answer = process_result_with_agent(result_dict, query_text)
                # 将answer存储在闭包的answers字典中
                answers[query_idx] = answer
                print(f"查询 {query_idx} 的答案已保存")
                return answer
            except Exception as e:
                print(f"处理结果时出错: {e}")
                return None
        
        # 返回回调函数和结果字典
        return answer_collector, answers

    # 创建回调函数和结果存储字典
    callback_collector, answers = create_answer_collector()

    # 提交查询并注册回调
    callback_futures = []
    for i, query_text in enumerate(queries):
        question = revised_queries[i]
        print(f"提交查询 {i+1}: '{query_text}'")
        future = query_manager.submit_query(query_text=query_text, query_context=query_context)
        
        # 使用functools.partial创建带参数的回调函数
        from functools import partial
        callback_func = partial(callback_collector, query_idx=i+1, query_text=question)
        
        # 注册回调函数
        future.add_done_callback(callback_func)
        callback_futures.append((i+1, query_text, future))
    
    # Wait for all Futures to complete (optional but ensures all callbacks execute)
    import concurrent.futures
    import time
    
    # Non-blocking check
    all_done = False
    wait_time = 0
    max_wait_time = 1200  # Maximum wait time
    check_interval = 5  # Check interval
    
    print("\nWaiting for callbacks to execute...")
    while not all_done and wait_time < max_wait_time:
        all_done = all(future[2].done() for future in callback_futures)
        if not all_done:
            print(f"Waited {wait_time} seconds, continuing to wait for callbacks...")
            time.sleep(check_interval)
            wait_time += check_interval
    
    if all_done:
        print("\nAll callbacks have completed!")
    else:
        print(f"\nTimeout waiting, some queries may not have completed. Waited {wait_time} seconds.")
    
    # Stop query manager
    print("\nStopping QueryManager...")
    query_manager.stop()
    print("QueryManager stopped.")
    
    print("--- Callback Function Test Finished ---")


--- Starting Callback Function Test ---
Class name cache updated with 13364 classes.
数据属性缓存更新完成，共 5859 个属性
对象属性缓存更新完成，共 4557 个属性
所有本体缓存更新完成
Dispatcher loop started on thread QueryDispatcherThread
Query Manager dispatcher started.
提交查询 1: 'Are there electrochemical sensors using Indicator Displacement Assay (IDA) to detect Quinine?'
提交查询 2: 'How stable and reproducible is the electrochemical sensor that uses an Indicator Displacement Assay (IDA) for detecting Quinine?'
提交查询 3: 'What types of molecules can be detected by IDA?'

Waiting for callbacks to execute...
Waited 0 seconds, continuing to wait for callbacks...
http://www.test.org/chem_ontologies/meta/
http://www.test.org/chem_ontologies/classes/
http://www.test.org/chem_ontologies/object_properties/
http://www.test.org/chem_ontologies/data_properties/
http://www.test.org/chem_ontologies/meta/
http://www.test.org/chem_ontologies/classes/
http://www.test.org/chem_ontologies/object_properties/
http://www.test.org/chem_ontologies/data



Current stage: hypothetical_generated, Status: hypothetical_generated, Retry count: 0
[TOKEN OPTIMIZATION] Using original classes: 13364 classes (~26728 tokens)
Current stage: hypothetical_generated, Status: hypothetical_generated, Retry count: 0
[TOKEN OPTIMIZATION] Using original classes: 13364 classes (~26728 tokens)
Current stage: hypothetical_generated, Status: hypothetical_generated, Retry count: 0
[TOKEN OPTIMIZATION] Using original classes: 13364 classes (~26728 tokens)
Waited 5 seconds, continuing to wait for callbacks...
[QueryParserAgent] Using refined classes: False, available: 13364, original: 13364
Current stage: normalized, Status: parsing_complete, Retry count: 1
首次查询实体: ['metal_ion', 'small_organic_molecule', 'biomolecule']
Entities refinement not needed - all entities found in available classes
Current stage: refinement_not_needed, Status: parsing_complete, Retry count: 1
[QueryParserAgent] Using refined classes: False, available: 13364, original: 13364
Current stage:

In [9]:
future_list = [future[2].result() for future in callback_futures]
future_list

[{'query': 'Are there electrochemical sensors using Indicator Displacement Assay (IDA) to detect Quinine?',
  'source_ontology': OntologySettings(base_iri='http://www.test.org/chem_ontologies/', ontology_file_name='final.owl', directory_path='D:\\\\CursorProj\\\\Chem-Ontology-Constructor\\\\data\\ontology', closed_ontology_file_name='IDA-closed.owl'),
  'query_type': 'information_retrieval',
  'query_strategy': 'tool_sequence',
  'originating_team': 'test_notebook',
  'originating_stage': 'manual_test',
  'available_classes': ['(2)pseudorotaxane',
   '(3)pseudorotaxane',
   '(4)(cl)3',
   '(4)(pf6)3',
   '(M + H)+',
   '(Nb6Cl12(H2O)6)@(gamma-CD)2)2+',
   '(Ta6Br12(H2O)6)@(gamma-CD)2)Br2.14H2O',
   '(bmim)',
   '(bmim)(octs)',
   '(cl)-',
   '(cu4)(cl)·5',
   '(ethylamino)carbonyl-2-pyridinecarboxylic_acid',
   '(h4)(pf6)2',
   '(nb6br12)n+',
   '(nb6cl12)n+',
   '(nb6i12)n+',
   '(octs)',
   '(pf6)-',
   '(ta6br12)n+',
   '(ta6cl12)n+',
   '(ta6i12)n+',
   '1,1-butane(1,4-diyl)bis(2-a

In [13]:
import os
import pprint

suffix = "816-D(4-1)"
folder_prefix = "4&6-future_list-D4-1"
os.makedirs(folder_prefix, exist_ok=True)

# --- Start of the solution ---

# 1. Create a PrettyPrinter instance. You can configure indentation.
#    indent=4 is a very common and readable choice.
pp = pprint.PrettyPrinter(indent=4, width=120) # width helps control line wrapping

save_indices = list(range(len(future_list)))

for idx in save_indices:
    # --- Save iteration_history ---
    file_path_history = os.path.join(folder_prefix, f"future_list_{idx}_iteration_history_{suffix}.txt")
    with open(file_path_history, "w", encoding="utf-8") as f:
        # 2. Use pp.pformat() to get the nicely formatted string
        formatted_string = pp.pformat(future_list[idx].get('iteration_history'))
        f.write(formatted_string)
    print(f"future_list[{idx}]['iteration_history'] has been saved to {file_path_history}")

    # --- Save formatted_results ---
    file_path_formatted = os.path.join(folder_prefix, f"future_list_{idx}_formatted_results_{suffix}.txt")
    with open(file_path_formatted, "w", encoding="utf-8") as f:
        # 2. Use it again for the other part of the data
        formatted_string = pp.pformat(future_list[idx].get('formatted_results'))
        f.write(formatted_string)
    print(f"future_list[{idx}]['formatted_results'] has been saved to {file_path_formatted}")

future_list[0]['iteration_history'] has been saved to 4&6-future_list-D4-1\future_list_0_iteration_history_816-D(4-1).txt
future_list[0]['formatted_results'] has been saved to 4&6-future_list-D4-1\future_list_0_formatted_results_816-D(4-1).txt
future_list[1]['iteration_history'] has been saved to 4&6-future_list-D4-1\future_list_1_iteration_history_816-D(4-1).txt
future_list[1]['formatted_results'] has been saved to 4&6-future_list-D4-1\future_list_1_formatted_results_816-D(4-1).txt


In [14]:
future_list[1]['iteration_history'] 

[{'retry_count': 1,
  'hypothetical_document': {'interpretation': "The query is asking about the performance characteristics of an electrochemical sensor that employs an Indicator Displacement Assay (IDA) technique specifically for detecting quinine. It seeks information on how reliably and consistently this sensor can measure quinine concentrations, focusing on its stability over time and reproducibility across multiple tests. The question implies interest in the sensor's operational durability, signal consistency, and potential variability in detection results when used repeatedly or over extended periods, within the context of an IDA-based electrochemical detection method.",
   'hypothetical_answer': "A comprehensive response would detail the stability and reproducibility of the electrochemical IDA sensor for quinine detection by discussing factors such as the sensor's chemical and electrochemical stability over multiple uses and time, the consistency of its signal response (e.g., c

In [13]:
future_list[0]['formatted_results'] 

{'summary': 'There is no documented evidence of electrochemical sensors utilizing Indicator Displacement Assay (IDA) specifically for the detection of Quinine based on current literature.',
 'key_points': ['Electrochemical sensors have been developed for various analytes, including tryptophan, 5-fluorouracil (5-FU), and nucleotides, often employing host-guest interactions or nanocomposite materials.',
  'Indicator Displacement Assay (IDA) is primarily used for ultrasensitive fluorescence detection of analytes like LPA in aqueous media, not specifically for quinine.',
  'No existing references or data indicate the application of IDA in electrochemical sensors for quinine detection.',
  'The current literature does not show a direct link or example of IDA-based electrochemical sensors targeting quinine.'],
 'relationships': ['While electrochemical sensors and IDA are both established detection methods, their combination for quinine detection has not been reported.',
  'IDA is more common

In [19]:
# future_list = [future[2].result() for future in callback_futures]
# [future["formatted_results"] for future in future_list]
# future_list[0]["formatted_results"]
for i, future_item in enumerate(future_list):
    print(f"Processing item {i}:")
    try:
        # 尝试访问，看看哪个会出错
        formatted_res = future_item["formatted_results"]
        print(f"  Type of future_item: {type(future_item)}")
        print(f"  Keys in future_item: {future_item.keys() if hasattr(future_item, 'keys') else 'N/A'}")
        print(f"  Type of formatted_results: {type(formatted_res)}")
        print(f"  Value of formatted_results: {formatted_res}")
    except Exception as e:
        print(f"  Error accessing 'formatted_results' for item {i}: {e}")
        print(f"  Type of future_item that caused error: {type(future_item)}")
        if hasattr(future_item, 'keys'):
            print(f"  Keys in future_item: {future_item.keys()}")
        else:
            print(f"  future_item does not have 'keys' attribute.")
        # 如果需要，可以打印整个 problematic future_item
        # print(f"  Problematic future_item: {future_item}")

Processing item 0:
  Type of future_item: <class 'langgraph.pregel.io.AddableValuesDict'>
  Keys in future_item: dict_keys(['query', 'source_ontology', 'query_type', 'query_strategy', 'originating_team', 'originating_stage', 'available_classes', 'available_data_properties', 'available_object_properties', 'refined_classes', 'query_results', 'normalized_query', 'execution_plan', 'validation_report', 'status', 'stage', 'previous_stage', 'retry_count', 'hypothetical_document', 'formatted_results', 'iteration_history', 'messages'])
  Type of formatted_results: <class 'dict'>
  Value of formatted_results: {'summary': 'Currently, there is no specific evidence or documented examples of electrochemical sensors employing Indicator Displacement Assay (IDA) for the detection of quinine. While electrochemical sensors and IDA are both established analytical methods, their combined use for quinine detection has not been explicitly reported.', 'key_points': ['Electrochemical sensors are widely used fo

In [20]:
answers

{3: AIMessage(content='Supramolecular hosts that primarily bind anions through anion-π interactions are a specialized class of receptors that leverage the unique electrostatic and π-acidic properties of aromatic or heteroaromatic systems to facilitate anion recognition. Unlike traditional hydrogen-bonding or electrostatic interactions, anion-π interactions involve the attraction between an electron-deficient π-system and a negatively charged species.\n\n**Types of Supramolecular Hosts Engaged in Anion-π Interactions:**\n\n1. **Electron-Deficient Aromatic and Heteroaromatic Systems:**  \n   Hosts incorporating π-systems such as perfluorinated aromatic rings, quinolines, pyridinium, or other heteroaromatic frameworks are prime candidates. These systems are rendered electron-deficient through substitution with electronegative groups (e.g., fluorines) or by the incorporation of positively charged heteroatoms, creating π-acidic surfaces that can stabilize anions via anion-π interactions.\n\

In [40]:
import yaml
from langchain_core.messages import AIMessage
from typing import Dict, List, Any

def save_qa_to_yaml(answers: Dict[int, Any], queries: List[str], output_path: str) -> None:
    """
    Processes a dictionary of answers and a list of queries, sorts them,
    and saves them to a YAML file with consistent multiline string formatting.

    Args:
        answers (Dict[int, Any]): A dictionary with integer keys and AIMessage objects as values.
        queries (List[str]): A list of query strings.
        output_path (str): The path to the output YAML file.
    """
    data_to_save = []
    
    # Sort keys to process the dictionary in order
    sorted_keys = sorted(answers.keys())
    
    for idx in sorted_keys:
        answer = answers.get(idx)
        if isinstance(answer, AIMessage):
            # The query index is assumed to be key - 1
            if idx > 0 and (idx - 1) < len(queries):
                query = queries[idx - 1]
                response = answer.content
                data_to_save.append({'query': query, 'response': response})
            else:
                print(f"Warning: No matching query found for answer key {idx} or key is out of bounds.")
        else:
            print(f"Warning: Item with key {idx} is not an AIMessage object.")

    # Define a custom Dumper to control string representation, avoiding global state changes.
    class MyDumper(yaml.SafeDumper):
        pass

    def str_presenter(dumper, data):
        """Custom string presenter to force literal block style for multiline strings."""
        # if '\n' in data:
        return dumper.represent_scalar('tag:yaml.org,2002:str', data, style='|')
        # return dumper.represent_scalar('tag:yaml.org,2002:str', data)

    MyDumper.add_representer(str, str_presenter)

    # Write the data to the YAML file using the custom Dumper
    with open(output_path, 'w', encoding='utf-8') as f:
        yaml.dump(data_to_save, f, Dumper=MyDumper, allow_unicode=True, default_flow_style=False, sort_keys=False, width=float('inf'))

    print(f"Successfully saved QA pairs to {output_path}") 

save_qa_to_yaml(answers, queries, '717_ida_1.yaml')

Successfully saved QA pairs to 717_ida_1.yaml


In [14]:
from langchain_core.messages import AIMessage

results = []
for idx, answer in answers.items():
    if isinstance(answer, AIMessage):
        q = queries[idx-1]
        print(f"{idx}.查询的最终答案: {answer.content}\n")
        results.append({"query": q, "response": answer.content})
    else:
        print("error")

1.查询的最终答案: Yes, electrochemical sensors employing Indicator Displacement Assays (IDAs) have been developed for the detection of quinine. These sensors leverage the principles of supramolecular chemistry, where a host molecule—often a macrocyclic compound such as a cyclodextrin, calixarene, or crown ether—forms a non-covalent complex with a chromogenic or electroactive indicator. The presence of quinine, a chiral alkaloid with a quinoline and quinuclidine moiety, can displace the indicator from the host-guest complex due to its specific binding affinity, leading to measurable electrochemical changes.

In such IDA-based electrochemical sensors, the binding affinity between quinine and the host is critical. Quinine exhibits a relatively high binding constant with certain macrocyclic hosts, often in the range of 10^4 to 10^6 M^−1, depending on the host structure and the conditions. The displacement of the indicator alters the electrochemical signal—such as current or potential—allowing for

In [11]:
from langchain_core.messages import AIMessage
import json

# 你可以手动设置模型名称和md文件名
model_name = "MOSES-nano-816-1"
md_filename = model_name + ".md"
json_filename = model_name + ".json"

results = []
for idx, answer in answers.items():
    if isinstance(answer, AIMessage):
        q = queries[idx-1]
        print(f"{idx}.查询的最终答案: {answer.content}\n")
        results.append({"query": q, "response": answer.content})
    else:
        print("error")

# 将结果保存为JSON文件
with open(json_filename, "w", encoding="utf-8") as f:
    json.dump(results, f, ensure_ascii=False, indent=4)

# 生成Markdown内容，二级标题（query）带序号
markdown_output = f"# {model_name}\n\n"
for idx, result in enumerate(results, 1):
    markdown_output += f"## {idx}. {result['query']}\n\n"
    markdown_output += f"{result['response']}\n\n"

# 保存为Markdown文件
with open(md_filename, "w", encoding="utf-8") as f:
    f.write(markdown_output)

2.查询的最终答案: An Indicator Displacement Assay (IDA) is a sophisticated supramolecular sensing technique that exploits the principles of molecular recognition and non-covalent interactions to detect specific analytes with high sensitivity and selectivity. In an IDA, a host molecule—often a macrocyclic compound such as a cyclodextrin, calixarene, or crown ether—is pre-complexed with an indicator dye that exhibits a distinct optical signal (e.g., colorimetric or fluorescent). This host-guest complex is stable under initial conditions, with the indicator's optical properties modulated by its binding environment.

When an analyte of interest is introduced, it competes with the indicator for binding to the host. If the analyte has a higher affinity for the host, it displaces the indicator dye, resulting in a measurable change in the optical signal—such as a shift in absorbance or fluorescence intensity. This displacement directly correlates with the presence and, often, the concentration of the

In [32]:
res_list = []
for i, ques in enumerate(revised_queries):
    response = answer_llm.invoke(ques)
    res_list.append(response)
    print(f"完成 {i+1} 个回答")


完成 1 个回答
完成 2 个回答
完成 3 个回答
完成 4 个回答
完成 5 个回答
完成 6 个回答
完成 7 个回答
完成 8 个回答
完成 9 个回答
完成 10 个回答
完成 11 个回答
完成 12 个回答
完成 13 个回答
完成 14 个回答
完成 15 个回答
完成 16 个回答
完成 17 个回答
完成 18 个回答
完成 19 个回答
完成 20 个回答
完成 21 个回答
完成 22 个回答
完成 23 个回答
完成 24 个回答
完成 25 个回答
完成 26 个回答
完成 27 个回答


In [20]:
res_list

[AIMessage(content='Yes, electrochemical sensors employing Indicator Displacement Assay (IDA) strategies have been developed for the detection of quinine. These sensors leverage the principle of competitive binding between quinine and a suitable indicator molecule within a recognition element, often a host-guest system such as cyclodextrins, macrocycles, or other molecular receptors.\n\n**Key points include:**\n\n- **Indicator Displacement Assay (IDA):** In this approach, a colored or electroactive indicator initially forms a complex with a host molecule. When quinine is introduced, it displaces the indicator due to its higher affinity, leading to measurable changes in electrochemical signals.\n\n- **Electrochemical Detection:** The displacement event can be monitored via techniques such as cyclic voltammetry, differential pulse voltammetry, or amperometry, providing sensitive and selective detection of quinine.\n\n- **Examples in Literature:**\n  - Researchers have designed electroche

In [33]:
import json

# 你可以手动设置模型名称和md文件名
model_name = "gpt-4o-mini-final-815-1"
md_filename = model_name + ".md"
json_filename = model_name + ".json"

results = []
for i, res in enumerate(res_list):
    q = queries[i]
    print(f"{i+1}.查询 {q} 的答案是：{res.content}\n")
    results.append({"query": q, "response": res.content})

# 将结果保存为JSON文件
with open(json_filename, "w", encoding="utf-8") as f:
    json.dump(results, f, ensure_ascii=False, indent=4)

# 生成Markdown内容，二级标题（query）带序号
markdown_output = f"# {model_name}\n\n"
for idx, result in enumerate(results, 1):
    markdown_output += f"## {idx}. {result['query']}\n\n"
    markdown_output += f"{result['response']}\n\n"

# 保存为Markdown文件
with open(md_filename, "w", encoding="utf-8") as f:
    f.write(markdown_output)

1.查询 Tell me about Quinine. 的答案是：Quinine is a natural compound that is derived from the bark of the cinchona tree, which is native to South America. It has a long history of use as a treatment for malaria, a disease caused by parasites transmitted through the bites of infected mosquitoes. Quinine was one of the first effective treatments for malaria and played a crucial role in the control of the disease, especially during the colonial era when European powers were expanding into tropical regions.

### Key Points about Quinine:

1. **Chemical Structure**: Quinine is an alkaloid with a complex chemical structure. Its molecular formula is C20H24N2O2.

2. **Mechanism of Action**: Quinine works by interfering with the parasite's ability to digest hemoglobin in red blood cells, which is essential for its survival. This leads to the death of the malaria parasites.

3. **Uses**: While quinine is primarily known for its use in treating malaria, it has also been used to treat other conditions, 

In [41]:
# 获取环境变量中的BEARER_TOKEN
import os
import json
bearer_token = os.environ.get("BEARER_TOKEN", "")
print(f"--- 读取到的令牌是: '{bearer_token}' ---") 
import requests
import json

url = 'https://chat.intern-ai.org.cn/api/v1/chat/completions'
url_health = 'https://chat.intern-ai.org.cn/health'
url_model = 'https://chat.intern-ai.org.cn/api/v1/models'

auth = "Bearer eyJ0eXBlIjoiSldUIiwiYWxnIjoiSFM1MTIifQ.eyJqdGkiOiIzMzEwMDE4MCIsInJvbCI6IlJPTEVfUkVHSVNURVIiLCJpc3MiOiJPcGVuWExhYiIsImlhdCI6MTc0OTgwMDM0NywiY2xpZW50SWQiOiJlYm1ydm9kNnlvMG5semFlazF5cCIsInBob25lIjoiMTc4MzMzODA1NTgiLCJvcGVuSWQiOm51bGwsInV1aWQiOiI4ZjZhNzk0OS05Y2YwLTQwMTgtOTc4MS0yNWI4N2I4Njk3MGUiLCJlbWFpbCI6IiIsImV4cCI6MTc2NTM1MjM0N30.B69K4tY900t5Yt7ZV0rgLOX4T36S0IQxDxDxPrQQEA56XFgCUyWsa6O3H6GOMVJhIwiIH2lNdrwXIQfhlnzU3w"

headers = {
    "Authorization": auth
}
data = {
    "model": "internvl2_5_chemvlm20250306",  
    "messages": [{
        "role": "user",
        "content": "Hello!"
    }],
    "n": 1,
    "temperature": 0,
}
res = requests.get(url_model, headers)
available_models = res.json()
print("可用模型列表:")
print(json.dumps(available_models, indent=2, ensure_ascii=False))

--- 读取到的令牌是: 'eyJ0eXBlIjoiSldUIiwiYWxnIjoiSFM1MTIifQ.eyJqdGkiOiIzMzEwMDE4MCIsInJvbCI6IlJPTEVfUkVHSVNURVIiLCJpc3MiOiJPcGVuWExhYiIsImlhdCI6MTc0OTgwMDM0NywiY2xpZW50SWQiOiJlYm1ydm9kNnlvMG5semFlazF5cCIsInBob25lIjoiMTc4MzMzODA1NTgiLCJvcGVuSWQiOm51bGwsInV1aWQiOiI4ZjZhNzk0OS05Y2YwLTQwMTgtOTc4MS0yNWI4N2I4Njk3MGUiLCJlbWFpbCI6IiIsImV4cCI6MTc2NTM1MjM0N30.B69K4tY900t5Yt7ZV0rgLOX4T36S0IQxDxDxPrQQEA56XFgCUyWsa6O3H6GOMVJhIwiIH2lNdrwXIQfhlnzU3w' ---
可用模型列表:
{
  "code": -10002,
  "msg": "参数错误",
  "data": {}
}


In [None]:

res = requests.post(url, headers=headers, data=json.dumps(data))
print(res.status_code)
print(res.json())
print(res.json()["choices"][0]['message']["content"])



In [None]:

# 将结果保存为JSON文件
with open("intern_responses.json", "w", encoding="utf-8") as f:
    json.dump(intern_results, f, ensure_ascii=False, indent=4)

**Note on Streaming with QueryManager:**

The standard `QueryManager.submit_query()` returns a `Future` that resolves to the *final* state of the LangGraph execution. It doesn't inherently provide access to the intermediate states generated by each node.

To observe the step-by-step execution and intermediate state changes, you would typically need to interact directly with the LangGraph instance using its `stream()` method, as demonstrated in Part 2 below. Modifying the `QueryManager` to expose this stream would require significant changes to its asynchronous task handling and result reporting.

## Part 2: Direct Execution via Graph Stream

In [15]:
if not llm:
    print("Skipping Direct Graph Stream test due to LLM initialization failure.")
else:
    print("\n--- Starting Direct Graph Stream Test ---")

    # 1. Create graph instance
    print("Creating graph instance...")
    graph = create_query_graph()
    print("Graph instance created.")

    # 2. Manually create initial state dictionary
    print("Creating initial state...")
    # Use the same query as Part 1 for comparison
    # query_text_stream = "What proteins does DrugA bind to?"

    for query_text_stream in [queries[4]]:

        try:
            # Ensure we get a list of strings
            available_classes_stream = sorted([cls.name for cls in test_onto.classes() if isinstance(cls, ThingClass)])
            available_data_props_stream = sorted([dp.name for dp in test_onto.data_properties() if isinstance(dp, DataPropertyClass)])
            available_object_props_stream = sorted([op.name for op in test_onto.object_properties() if isinstance(op, ObjectPropertyClass)])
        except Exception as e:
            print(f"Error getting class names: {e}")
            available_classes_stream = []

        initial_state = {
            "query": query_text_stream,
            "source_ontology": test_ontology_settings, # Pass the actual ontology object
            "available_classes": available_classes_stream,
            "available_data_properties": available_data_props_stream,
            "available_object_properties": available_object_props_stream,
            "query_type": "information_retrieval",
            "query_strategy": None,
            "originating_team": "test_notebook_stream",
            "originating_stage": "manual_stream_test",
            "query_results": {},
            "normalized_query": None,
            "execution_plan": None,
            "validation_report": None,
            "sparql_query": None,
            "status": "initialized",
            "stage": "initialized",
            "previous_stage": None,
            "error": None,
            "messages": [] # LangGraph expects messages field
        }
        print("Initial state prepared.")
        # print(json.dumps(initial_state, indent=2, default=str)) # Optionally print initial state (ontology won't serialize well)

        # 3. Execute and iterate stream
        print("\n--- Streaming Graph Execution --- ")
        try:
            stream_counter = 0
            # Use stream method to get intermediate steps
            for chunk in graph.stream(initial_state, config={"recursion_limit": 50}):
                stream_counter += 1
                print(f"\n--- Chunk {stream_counter} --- ")
                # Chunks are dictionaries where keys are node names that just ran
                # and values are the outputs (state updates) returned by that node
                # Use default=str to handle potential non-serializable objects in the state
                print(json.dumps(chunk, indent=2, default=str))
                print("-" * 30)
            print("\n--- Graph Stream Finished --- ")
        except Exception as e:
            print(f"\nError during graph stream: {e}")
            import traceback
            traceback.print_exc() # Print full traceback for stream errors
        
        print(f"query:{query_text_stream} has been finished.")

    print("--- Direct Graph Stream Test Finished ---")


--- Starting Direct Graph Stream Test ---
Creating graph instance...
Graph instance created.
Creating initial state...
Initial state prepared.

--- Streaming Graph Execution --- 
Retry count: 1

--- Chunk 1 --- 
{
  "normalize": {
    "normalized_query": "intent='find information' relevant_entities=['electrochemical_sensor', 'indicator_displacement_assay(IDA)', 'quinine'] relevant_properties=[] filters=None query_type_suggestion='fact-finding'",
    "status": "parsing_complete",
    "stage": "normalized",
    "previous_stage": "initialized",
    "retry_count": 1,
    "messages": [
      "content='Query normalized: Are there electrochemical sensors using Indicator Displacement Assay (IDA) to detect Quinine?' additional_kwargs={} response_metadata={} id='767013f5-1244-430d-82ad-d8c7b8f57278'"
    ]
  }
}
------------------------------
Retry count: 1

--- Chunk 2 --- 
{
  "strategy": {
    "query_strategy": "tool_sequence",
    "status": "strategy_determined",
    "stage": "strategy",
  

In [17]:
print(graph["normalized_query"])

TypeError: 'CompiledStateGraph' object is not subscriptable

# QueryManager 检查


In [7]:
import threading
import traceback
import sys

def check_threads():
    """检查当前进程中的活跃线程"""
    print(f"当前活跃线程数: {threading.active_count()}")
    
    print("\n当前活跃线程:")
    for t in threading.enumerate():
        print(f"- {t.name} (daemon: {t.daemon}, 活动: {t.is_alive()})")
    
    print("\n线程调用栈:")
    query_manager_threads = []
    for thread_id, frame in sys._current_frames().items():
        thread_name = "Unknown"
        for t in threading.enumerate():
            if t.ident == thread_id:
                thread_name = t.name
                break
        
        # 检查是否是QueryManager相关线程
        is_query_thread = False
        stack_trace = traceback.extract_stack(frame)
        for filename, _, _, _ in stack_trace:
            if "query_manager" in filename or "ThreadPool" in filename:
                is_query_thread = True
                query_manager_threads.append(thread_name)
                break
        
        print(f"线程ID: {thread_id}, 名称: {thread_name}{' (QueryManager相关)' if is_query_thread else ''}")
        for filename, lineno, name, line in stack_trace[-10:]:  # 只显示最近10个调用
            print(f"  文件: {filename.split('/')[-1]}, 行: {lineno}, 函数: {name}")
            if line:
                print(f"    代码: {line}")
        print("")
    
    if query_manager_threads:
        print(f"\n发现 {len(query_manager_threads)} 个QueryManager相关线程: {', '.join(query_manager_threads)}")
    else:
        print("\n未发现QueryManager相关线程")

# 执行检查
check_threads()

当前活跃线程数: 6

当前活跃线程:
- MainThread (daemon: False, 活动: True)
- IOPub (daemon: True, 活动: True)
- Heartbeat (daemon: True, 活动: True)
- Control (daemon: True, 活动: True)
- IPythonHistorySavingThread (daemon: True, 活动: True)
- Thread-1 (daemon: True, 活动: True)

线程调用栈:
线程ID: 13768, 名称: Thread-1
  文件: d:\AnacondaEnPs\envs\OntologyConstruction\Lib\threading.py, 行: 1012, 函数: _bootstrap
    代码: self._bootstrap_inner()
  文件: d:\AnacondaEnPs\envs\OntologyConstruction\Lib\threading.py, 行: 1041, 函数: _bootstrap_inner
    代码: self.run()
  文件: d:\AnacondaEnPs\envs\OntologyConstruction\Lib\site-packages\ipykernel\parentpoller.py, 行: 93, 函数: run
    代码: result = ctypes.windll.kernel32.WaitForMultipleObjects(  # type:ignore[attr-defined]

线程ID: 33716, 名称: IPythonHistorySavingThread
  文件: d:\AnacondaEnPs\envs\OntologyConstruction\Lib\threading.py, 行: 1012, 函数: _bootstrap
    代码: self._bootstrap_inner()
  文件: d:\AnacondaEnPs\envs\OntologyConstruction\Lib\threading.py, 行: 1041, 函数: _bootstrap_inner
    代码: sel

In [8]:
# 查看缓存内容（如果有缓存的查询）
cache_content = query_manager.query_queue_manager.cache.cache
print(f"缓存中的查询数量: {len(cache_content)}")

# 查看缓存的时间戳信息
timestamps = query_manager.query_queue_manager.cache.timestamps
if timestamps:
    print("\n缓存时间戳:")
    for key, timestamp in timestamps.items():
        print(f"查询: {key[:50]}... - 时间: {timestamp}")
        # 计算剩余有效时间
        ttl = query_manager.query_queue_manager.cache.ttl  # 默认3600秒（1小时）
        from datetime import datetime, timedelta
        remaining = timestamp + timedelta(seconds=ttl) - datetime.now()
        print(f"  剩余有效时间: {remaining}")

# 如果需要手动清除缓存
# query_manager.query_queue_manager.cache.clear()
# print("缓存已清除")

缓存中的查询数量: 0


In [29]:
import os
import re

def reorder_md_by_queries_and_save_new(md_path, queries, output_path):
    """
    读取md文件，按照queries顺序重新组织问答对，并存为新的md文件。
    :param md_path: md文件路径
    :param queries: 问题列表（字符串列表，顺序即为目标顺序）
    :param output_path: 新的md文件路径
    """
    # 读取原始md内容
    with open(md_path, "r", encoding="utf-8") as f:
        lines = f.readlines()

    # 解析所有问答对
    qa_blocks = []
    cur_q = None
    cur_a = []
    q_pattern = re.compile(r"^##\s*\d+\.\s+(.+)")
    for line in lines:
        m = q_pattern.match(line.strip())
        if m:
            # 新问题开始
            if cur_q is not None:
                qa_blocks.append((cur_q, cur_a))
            cur_q = m.group(1).strip()  # 只保留问题正文
            cur_a = []
        elif cur_q is not None:
            cur_a.append(line)
    if cur_q is not None:
        qa_blocks.append((cur_q, cur_a))

    # 构建问题到问答对的映射
    q2block = {q: (q, a) for q, a in qa_blocks}

    # 重新排序
    new_blocks = []
    for q in queries:
        if q in q2block:
            new_blocks.append(q2block[q])
        else:
            print(f"警告: 未找到问题: {q}")

    # 写入新的md文件
    with open(output_path, "w", encoding="utf-8") as f:
        for idx, (q, a) in enumerate(new_blocks, 1):
            f.write(f"## {idx}. {q}\n")
            for line in a:
                f.write(line)

# 处理MOSES-final.md和MOSES-nano-final.md，存为新文件
for mdfile in ["MOSES-final.md", "MOSES-nano-final.md"]:
    md_path = os.path.join( os.getcwd(), mdfile)
    output_path = os.path.join( os.getcwd(), f"reordered_{mdfile}")
    if os.path.exists(md_path):
        reorder_md_by_queries_and_save_new(md_path, queries, output_path)
        print(f"已生成新文件: {output_path}")
    else:
        print(f"未找到文件: {md_path}")


已生成新文件: d:\CursorProj\Chem-Ontology-Constructor\tests\unit_test\query\test\reordered_MOSES-final.md
已生成新文件: d:\CursorProj\Chem-Ontology-Constructor\tests\unit_test\query\test\reordered_MOSES-nano-final.md


In [25]:
import os

def reorder_md_by_queries_h2(md_path, queries, output_path):
    """
    读取md文件，按照queries顺序重新组织问答对，并存为新的md文件。
    假设每个问题是H2标题（即以'## '开头），两个H2标题之间的内容为该问题的答案。
    :param md_path: 原始md文件路径
    :param queries: 问题列表（字符串列表，顺序即为目标顺序）
    :param output_path: 新的md文件路径
    """
    # 读取原始md内容
    with open(md_path, "r", encoding="utf-8") as f:
        lines = f.readlines()

    # 解析所有问答对（H2标题为问题）
    qa_blocks = []
    cur_q = None
    cur_a = []
    for line in lines:
        if line.strip().startswith("## "):
            # 新问题开始
            if cur_q is not None:
                qa_blocks.append((cur_q, cur_a))
            cur_q = line.strip()[3:].strip()  # 去掉'## '和首尾空格
            cur_a = []
        elif cur_q is not None:
            cur_a.append(line)
    if cur_q is not None:
        qa_blocks.append((cur_q, cur_a))

    # 构建问题到问答对的映射
    q2block = {q: (q, a) for q, a in qa_blocks}

    # 重新排序
    new_blocks = []
    for q in queries:
        if q in q2block:
            new_blocks.append(q2block[q])
        else:
            print(f"警告: 未找到问题: {q}")

    # 写入新的md文件
    with open(output_path, "w", encoding="utf-8") as f:
        for q, a in new_blocks:
            f.write(f"## {q}\n")
            for line in a:
                f.write(line)

# 假设queries变量已定义，内容为问题字符串列表，顺序为目标顺序
# 例如:
# queries = [
#     "What techniques are used to analyze Quinine?",
#     "Are there electrochemical sensors using Indicator Displacement Assay (IDA) to detect Quinine?",
#     ...
# ]

# 处理MOSES-final.md和MOSES-nano-final.md，输出为新文件
for mdfile in ["MOSES-final.md", "MOSES-nano-final.md"]:
    md_path = os.path.join( os.getcwd(), mdfile)
    output_path = os.path.join( os.getcwd(), f"reordered_{mdfile}")
    if os.path.exists(md_path):
        reorder_md_by_queries_h2(md_path, queries, output_path)
        print(f"已生成新文件: {output_path}")
    else:
        print(f"未找到文件: {md_path}")


警告: 未找到问题: Tell me about Quinine.
警告: 未找到问题: What is an Indicator Displacement Assay?
警告: 未找到问题: What techniques are used to analyze Quinine?
警告: 未找到问题: What are the components of an Indicator Displacement Assay?
警告: 未找到问题: Are there electrochemical sensors using Indicator Displacement Assay (IDA) to detect Quinine?
警告: 未找到问题: Which host molecules use host-guest recognition in electrochemical assays?
警告: 未找到问题: How stable and reproducible is the electrochemical sensor that uses an Indicator Displacement Assay (IDA) for detecting Quinine?
警告: 未找到问题: How is the electrochemical sensor that uses an Indicator Displacement Assay (IDA) for detecting Quinine verified?
警告: 未找到问题: In the electrochemical sensor that uses an Indicator Displacement Assay (IDA) for detecting Quinine, how does Quinine displace Methylene Blue from beta-Cyclodextrin?
警告: 未找到问题: What does Graphene do in the electrochemical sensor that uses an Indicator Displacement Assay (IDA) for detecting Quinine?
警告: 未找到问题: What is a