# MODELS and DATA

In [1]:
!curl http://localhost:8080/v1/models

curl: (48) An unknown option was passed in to libcurl


In [2]:
import autogen
print(autogen.__version__)

0.2.40


Note: You have installed the 'manylinux2014' variant of XGBoost. Certain features such as GPU algorithms or federated learning are not available. To use these features, please upgrade to a recent Linux distro with glibc 2.28+, and install the 'manylinux_2_28' variant.


In [3]:
import sys
sys.path.insert(0, '/home/istewart/orcd/pool/hypergraph/GraphReasoning_SG')

In [4]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152

In [5]:
import autogen, openai
config_list = [
    {
        "model":"Llama3.3",
        "base_url": "http://localhost:8080/v1",
        "api_key":"NULL",
        "max_tokens": 40000
    },
]

llm_config = {
    "cache_seed": 9527,  # seed for caching and reproducibility
    "config_list": config_list,  # a list of OpenAI API configurations
    "temperature": 0,  # temperature for sampling
    "max_tokens": 40000,
}



In [6]:
data_dir='/home/istewart/orcd/pool/hypergraph/GraphReasoning_SG/Notebooks/SG/GRAPHDATA_paper' #contains the generated embedding file 
data_dir_output='/home/istewart/orcd/pool/hypergraph/GraphReasoning_SG/Notebooks/SG/GRAPHDATA_OUTPUT_paper' #contains all subgraphs 
embedding_file='composite_LLAMA4_70b.pkl' #embed

max_tokens = config_list[0]['max_tokens']

# GET EMBEDDING MODEL / TOKENIZER

In [7]:
from sentence_transformers import SentenceTransformer
embedding_tokenizer =''
embedding_model = SentenceTransformer("nomic-ai/nomic-embed-text-v1.5", trust_remote_code=True)

  from .autonotebook import tqdm as notebook_tqdm
<All keys matched successfully>


In [8]:
from GraphReasoning import load_embeddings
filename = f"{data_dir}/{embedding_file}"
node_embeddings = load_embeddings(f'{data_dir}/{embedding_file}')

# UPLOAD GRAPH G

In [9]:
import os
import pickle

fname    = "final_graph.pkl" #the hypergraph pkl file (not the updated_sub_dfs) 
# fname    = "simple_hypergraph_simplified.pkl" #the hypergraph pkl file (not the updated_sub_dfs) 

fullpath = os.path.join(data_dir_output, fname)

with open(fullpath, "rb") as f:
    G = pickle.load(f)

print(f"KG loaded from {fullpath}: {G}")

KG loaded from /home/istewart/orcd/pool/hypergraph/GraphReasoning_SG/Notebooks/SG/GRAPHDATA_OUTPUT_paper/final_graph.pkl: None <class 'hypernetx.classes.hypergraph.Hypergraph'>


# START GRAPHRAG

In [10]:
# graph_nodes = set(str(n) for n in G.nodes)
# embedding_keys = set(str(k) for k in node_embeddings.keys())
# missing = graph_nodes - embedding_keys
# extra   = embedding_keys - graph_nodes

# if not missing and not extra:
#     print(f"Embeddings in sync. {len(graph_nodes)} nodes.")
# else:
#     print(f"Embedding mismatch detected.")
#     if missing:
#         print(f"  - Missing embeddings for {len(missing)} nodes: {list(missing)[:5]}")
#     if extra:
#         print(f"  - Extra embeddings for removed nodes: {len(extra)} nodes: {list(extra)[:5]}")

# Count the number of tokens as a result of tokenizing the text 

In [11]:
def custom_token_count_function(text, placeholder=''):
    return len(embedding_tokenizer.encode(text))

In [12]:
import importlib
from typing import Optional, Union, cast, TypeVar

import numpy as np
import numpy.typing as npt

from chromadb.api.types import EmbeddingFunction, Embeddings
from chromadb import PersistentClient

from chromadb.config import Settings
import torch #torch is used to move tokenized inputs to GPU, handling model outputs as tensors, performing .mean() and .detach model layers. 
import networkx as nx

from tqdm import tqdm


#write this to conform to ChromaDB's embedding function fromat 
Embeddable = Union[str, nx.DiGraph] #type alias literally meaning "str or nx.DiGraph” be considered 'embeddable', allowing to accept more than one type of input. 
D = TypeVar("D", bound=Embeddable, contravariant=True) #defines generic type variable called D (subtype of embeddable - either string or a directed graph but not both at same time) 



#define embedding function compatible with ChromaDB's API
class TransformerEmbeddingFunction(EmbeddingFunction[D]):

    def __init__(
            self,
            embedding_tokenizer,
            embedding_model,
            cache_dir: Optional[str] = None,
    ):
        try:
            from transformers import AutoModel, AutoTokenizer
            self._torch = importlib.import_module("torch")

            #here we are using our own embedding tokenizer and embedding model, but it can also be retrieved from hugging face transformers library 
            self._tokenizer = embedding_tokenizer #AutoTokenizer.from_pretrained(model_name)
            self._model = embedding_model #AutoModelForCausalLM.from_pretrained(model_name, device_map="cuda:0", cache_dir=cache_dir)
        except ImportError:
            raise ValueError(
                "The transformers and/or pytorch python package is not installed. Please install it with "
                "`pip install transformers` or `pip install torch`"
            )

    
    #Defines normalizing embedding vectors. Ensures the resulting embedding vector has length = 1 - This is important for cosine similarity in vector databases
    @staticmethod
    def _normalize(vector: npt.NDArray) -> npt.NDArray:
 
        """Normalizes a vector to unit length using L2 norm."""
        norm = np.linalg.norm(vector)
        if norm == 0:
            return vector
        return vector / norm


    
    #tokenizes and embeds the input, gets sentence embeddings, normalizes for cosine similarity later and returns as list of lists to store in chromaDB. 
    def __call__(self, input: D) -> Embeddings:

        if self._tokenizer:
            inputs = self._tokenizer(
                input, padding=True, truncation=True, return_tensors="pt"
            ).to('cuda:0') #Tokenizes input returns it as a tensor and moves it to GPU, passes it through transformer model
            outputs = self._model(**inputs) #Feeds the tokenized input into the transformer model
            try:
                embeddings = outputs.last_hidden_state.mean(dim=1).detach().numpy()  # mean pooling, Takes the average of all token embeddings (basic way to get sentence embeddings)
            except:
                embeddings = outputs.hidden_states[-1].mean(dim=1).detach().to(torch.float).cpu().numpy() 
    
            
        else:
            embeddings = self._model.encode(input) #If no tokenizer is set, assume the model has its own .encode() method, this supports other libraries like sentence-transformers! 
         
        
        return [e.tolist() for e in self._normalize(embeddings)] #normalizes each embedding and returns it as a list of lists, which is what ChromaDB expects for embeddings

        #ChromaDB expects [[0.12, -0.45, 0.77, ...]] That is a list of embeddings, where each embedding is itself a list (a vector).


# Define Variable Embedding_function to embed with the predefined embedding tokenizer and embedding model 

In [13]:
embedding_function = TransformerEmbeddingFunction(embedding_tokenizer=embedding_tokenizer, embedding_model=embedding_model)


# GraphRAGAgent

In [14]:
# from openai import OpenAI
# from autogen import AssistantAgent, UserProxyAgent
# from GraphReasoning import extract_keywords_to_nodes, find_shortest_path_hypersubgraph_between_nodes_local

# --- LLM + OpenAI client ---
from openai import OpenAI

# --- Autogen agents ---
from autogen import AssistantAgent, UserProxyAgent, Agent

# --- typing helpers ---
from typing import Optional, Callable, Dict, List, Tuple, Any
from typing import Literal

# --- your custom graph reasoning functions ---
from GraphReasoning import (
    extract_keywords_to_nodes,
    find_shortest_path_hypersubgraph_between_nodes_local,
    collect_hyperentities, load_chunk_dfs,
)

# --- for IPython integration (used in your agent) ---
try:
    from IPython import get_ipython
except ImportError:
    def get_ipython():
        return None

# --- graph libraries ---
import networkx as nx


# local_search(question, generate, graph, node_embeddings, embedding_tokenizer, embedding_model, N_samples=5, similarity_threshold=0.9)
class llm:
    def __init__(self, llm_config):
        self.client = OpenAI(api_key=llm_config["api_key"], #links to a serving client... in this case its actually llama CPP 
                             base_url=llm_config["base_url"], #localhost URL to llama CPP 
                             )
        self.model = llm_config["model"] #stores model name
        self.max_tokens = llm_config["max_tokens"] #stores max token count 
        
    def generate_cli(self, system_prompt="You are an expert in this field. Try your best to give a clear and concise answer.", #default if nothing is given of the definition, tone of the model
                           prompt="Hello world! I am", temperature=0, #default if nothing is given, prompt input and temperature is 0 so determistic 
                           ):     
        try:
            if system_prompt==None: #if no sytem prompt is given, then only the user's message is included 
                messages=[
                    {"role": "user", "content": prompt},

                ]

            else:
                messages=[
                    {"role": "system",  "content": system_prompt}, #otherwise if it is given prepend it as a personaltiy 
                    {"role": "user", "content": prompt},

                ]
            result=self.client.chat.completions.create( #uses the openAI cient library to talk to our model 
                    #the folloiwing are generally the four things you need - model, message, temperature max tokens. 
                    model=self.model,
                    messages=messages, #includes roles (users) , prompt, system prompt etc. 
                    temperature=temperature,
                    max_tokens=self.max_tokens,
                )

            return result.choices[0].message.content #choices is the list of responses generated, there's usually only one hence the first index. message contains role+content, content is just the response. 
        except:
            return ''

llm=llm(llm_config=config_list[0]) #autogen expects llm_config with it being a config list         
generate = llm.generate_cli #set generate as a variable for calling the llm client using our cusom function generate_cli. 


class GraphRAGAgent(UserProxyAgent):
    """(In preview) The Graph Retrieval-Augmented User Proxy retrieves information from knowledge graphs based on the embedding
    similarity, and sends them along with the question to this or next assistant
    """

    def __init__(
        self,
        name="GraphRAGChatAgent",  # default set to RetrieveChatAgent
        human_input_mode: Literal["ALWAYS", "NEVER", "TERMINATE"] = "NEVER",
        is_termination_msg: Optional[Callable[[Dict], bool]] = None, # setting defaults to none 
        generate=None, # setting defaults to none 
        node_embeddings=None, # setting defaults to none 
        embedding_tokenizer=None, # setting defaults to none 
        embedding_model=None, # setting defaults to none 
        retrieve_config: Optional[Dict] = None,  # config for the retrieve agent
        **kwargs,
    ):
        super().__init__(
            name=name,
            human_input_mode=human_input_mode,
            **kwargs,
        )
        self._retrieve_config = {} if retrieve_config is None else retrieve_config
        self._knowledge_graph = self._retrieve_config.get("knowledge_graph", None)
        self._n_results = self._retrieve_config.get("n_results", 5)
        self._distance_threshold = self._retrieve_config.get("distance_threshold", 0.9)
        self._intersection_threshold = self._retrieve_config.get("intersection_threshold", 1)
        self._k_paths = self._retrieve_config.get("k_paths", 1)

        
        self.generate = generate
        self.node_embeddings = node_embeddings
        self.embedding_tokenizer = embedding_tokenizer
        self.embedding_model = embedding_model
        
        self._ipython = get_ipython()
        self._results = []  # the results of the current query
        self._intermediate_answers = set()  # the intermediate answers
        self.register_reply(Agent, GraphRAGAgent._generate_retrieve_user_reply, position=2)

    def _reset(self, intermediate=False):
        # self._doc_idx = -1  # the index of the current used doc
        self._results = []  # the results of the current query
        if not intermediate:
            self._intermediate_answers = set()  # the intermediate answers
            self._doc_contents = []  # the contents of the current used doc
            self._doc_ids = []  # the ids of the current used doc
            
    def graphRAG(self, message):  
        nodes = extract_keywords_to_nodes(message, self.generate, self.node_embeddings, self.embedding_tokenizer, self.embedding_model, self._n_results, similarity_threshold=self._distance_threshold, H=self._knowledge_graph)
        subgraph, reports = find_shortest_path_hypersubgraph_between_nodes_local(self._knowledge_graph, nodes, s=self._intersection_threshold, k_paths=self._k_paths,)
        chunk_to_df = load_chunk_dfs("/orcd/pool/007/istewart/hypergraph/GraphReasoning_SG/Notebooks/SG/GRAPHDATA_OUTPUT_paper/updated_sub_dfs.pkl")
        return collect_hyperentities(subgraph, reports, chunk_to_df=chunk_to_df)

#Main function when a message is recieved, basically just calls graphRAG function above. 
    def _generate_retrieve_user_reply(
        self,
        messages: Optional[List[Dict]] = None,
        sender: Optional[Agent] = None,
        config: Optional[Any] = None,
    ) -> Tuple[bool, Union[str, Dict, None]]:
        """In this function, we will update the context and reset the conversation based on different conditions.
        We'll update the context and reset the conversation if update_context is True and either of the following:
        """
        
        self._reset(intermediate=True) #reset conversation 

        relationships = self.graphRAG(self._oai_messages[sender][-1]) #get out the relationship from graph 
                
        final_response = f"Please consider the following relationships of the knowledge related to the question and make your response: {relationships}"

        self.clear_history(sender)
        sender.clear_history(self)        

        return True, final_response # self._generate_message(doc_contents, problem=_message, task=self._task)


# Experiments on Path Finding 

In [15]:
# ---- Hypergraph path filtering parameters ----
intersection_threshold = 2
k_paths = 1

# Set up multiagent system 

In [16]:
user_proxy = autogen.UserProxyAgent(
    name="Admin",
    system_message="A human admin. Interact with the engineer to discuss the QA result.",
    human_input_mode="NEVER",
    code_execution_config=False, #disables the ability to execute Python code via an LLM, ensuring it's purely conversational.
    is_termination_msg=lambda x: "TERMINATE" in x.get("content", "").replace("*", "").rstrip(),  #stops conversation if the word TERMINATE appears 
    # is_termination_msg=lambda x: x.get("content", "").rstrip().endswith("TERMINATE"),
)

graph_rag_agent = GraphRAGAgent(
    name="graph_rag_agent",
    system_message="""RAG agent. 
""",
    human_input_mode="NEVER",
    code_execution_config=False,
    generate = generate,
    node_embeddings = node_embeddings,
    embedding_tokenizer = embedding_tokenizer,
    embedding_model = embedding_model,
    retrieve_config={
        "n_results": 5,
        "knowledge_graph": G,
        "distance_threshold": 1.5,
        "intersection_threshold": intersection_threshold,
        "k_paths": k_paths,
    },
    llm_config=llm_config,
    is_termination_msg=lambda x: "TERMINATE" in x.get("content", "").replace("*", "").rstrip(),
)

engineer = autogen.AssistantAgent(
    name="Engineer",
    llm_config=llm_config,
    system_message="""Engineer with scientific backgrounds. Don't write code.
Start your response with: QUESTION: ...? \n ANSWER: ... .
You should always use the information you recieve from the other agents and don't make assumption. You should keep references when you use the provided information from another agent
Write your answer strictly in academic style with citations such as '<something true> [1]' and a references section with [1] <REFERENCE TITLE>: <reasons> and following the number in all your answers to make sure your citation is not overlapping.
Don't ever cite any sources that are not from the information you have. If you have an idea that is hypothetical, only mark it in your response.
Don't indirect cite the reference from the source texts."""
)


hypothesizer = autogen.AssistantAgent(
    name="Engineer",
    llm_config=llm_config,
    system_message="""Creative Hypothesizer agent. Based on insights from previous agents about mechanistic combinations, suggest a plausible experiment or new material or composite that could reveal new insight. 
Add "\nCLEAR HISTORY graph_rag_agent" at the end of your reply
""",
    is_termination_msg=lambda x: "TERMINATE" in x.get("content", "").replace("*", "").rstrip(), 
)

# Define Groupchat Conversation

In [17]:
# agents = [user_proxy, graph_rag_agent, planner, hgraph_rag_agent, engineer, critic, summarizer]
agents = [user_proxy, graph_rag_agent, engineer, hypothesizer]
def graphRAG_speaker_selection_func(last_speaker: Agent, groupchat: autogen.GroupChat):
    """Define a customized speaker selection function.
    A recommended way is to define a transition for each speaker in the groupchat.

    Returns:
        Return an `Agent` class or a string from ['auto', 'manual', 'random', 'round_robin'] to select a default method to use.
    """
    messages = groupchat.messages #messages is a built-in property of the GroupChat object in Autogen. It stores the entire conversation history as a list of dictionaries, where each dict is a message

    if last_speaker is user_proxy: # we set some rules like if the last_speaker is a user_proxy then the next agent should always be graph_rag_agent. 
        return graph_rag_agent

    if last_speaker is graph_rag_agent: #if the last speaker is the graph_rag_agent then the next speaker must be an engineer 
        return engineer

    if last_speaker is engineer: 
        return hypothesizer
    

groupchat = autogen.GroupChat(
    agents=agents,
    messages=[], #initialize storage of messages 
    max_round=100,
    speaker_selection_method=graphRAG_speaker_selection_func, #we defined our own speaker selection, but could also be 'round_robin' 
    enable_clear_history=True, # enables message clearing with in-conversation commands like CLEAR HISTORY graph_rag_agent
    # speaker_selection_method='round_robin',
)

manager = autogen.GroupChatManager(groupchat)

# Run query

In [18]:
import shutil
#delete cache history 
try:
    shutil.rmtree('.cache') #force reloading model weights and tokenizer in case any changes were made in previous cells  
except:
    pass #move on if errors like .cache doesn't exist, permission errors 

In [19]:
q=[]
q.append('''
how can hydrogel mechanistically relate to PCL? 
''')

In [20]:
result = []
for q_ in q:
    result.append(
    user_proxy.initiate_chat(
    manager,
    message=f'''{q_}
''',)
)

[33mAdmin[0m (to chat_manager):


how can hydrogel mechanistically relate to PCL? 



--------------------------------------------------------------------------------
[32m
Next speaker: graph_rag_agent
[0m

RAW LLM OUTPUT: {"keywords": ["hydrogel", "pcl"]}
Extracted keywords: ['hydrogel', 'pcl']
Found matched nodes in embeddings: [np.str_('hydrogel'), np.str_('PCL')]
[33mgraph_rag_agent[0m (to chat_manager):

Please consider the following relationships of the knowledge related to the question and make your response: ['chitosan, collagen compose hydrogel.', 'PCL, chitosan, collagen, gelatin form scaffolds.']

--------------------------------------------------------------------------------
[32m
Next speaker: Engineer
[0m
[33mEngineer[0m (to chat_manager):

QUESTION: How can hydrogel mechanistically relate to PCL? 
ANSWER: Hydrogel can mechanistically relate to PCL through the common components used in forming scaffolds, such as chitosan and collagen [1]. Since chitosan and coll

No eligible speaker found. Terminating the conversation.


# SAVE

In [1]:
import os
import nbformat
from nbconvert import HTMLExporter
def export_filtered_notebook_cell(notebook_path, search_snippet, out_dir="Experiments_Nov23", out_filename="filtered_notebook.html"):
    # Load notebook
    nb = nbformat.read(notebook_path, as_version=4)

    # Find cells containing the snippet
    matched_cells = [cell for cell in nb.cells if cell.cell_type == "code" and search_snippet in cell.source]

    if not matched_cells:
        raise ValueError(f"No cell found containing: {search_snippet}")

    # Keep only matched cells
    nb.cells = matched_cells

    # Export to HTML with lab template (white theme)
    exporter = HTMLExporter(template_name="lab")
    body, _resources = exporter.from_notebook_node(nb)

    # Ensure output folder exists
    os.makedirs(out_dir, exist_ok=True)

    # Save HTML
    out_html = os.path.join(out_dir, out_filename)
    with open(out_html, "w", encoding="utf-8") as f:
        f.write(body)

    print(f"✅ Saved filtered notebook cells to {out_html}")

export_filtered_notebook_cell(
    notebook_path="/home/istewart/orcd/pool/hypergraph/GraphReasoning_SG/Notebooks/SG/An_LLM_hypergraph.ipynb",
    search_snippet="result = []",
    out_dir="Experiments_Nov23",
    out_filename="hydrogel_PCL_k1_2IT_hypothesis.html"
)

✅ Saved filtered notebook cells to Experiments_Nov23/hydrogel_PCL_k1_2IT_hypothesis.html
