# Environment Variables

In [1]:
%env ENABLE_WEIGHTSANDBIAS=false
%env ENABLE_MLFLOW=false
%env ENABLE_KG_SOURCE=false
%env ONTOLOGY_DATABASE=Ontology_database_agent_test #note this is my local vectordatabase name, we need this because ENABLE_KG_SOURCE=true
%env WEAVIATE_API_KEY="1#FG!NsmUVEs" #note this is my local vectordatabase key
%env OLLAMA_API_ENDPOINT=http://host.docker.internal:11434 #note this is my local vectordatabase name
%env OLLAMA_MODEL=nomic-embed-text #note this is my local vectordatabase name

env: ENABLE_WEIGHTSANDBIAS=false
env: ENABLE_MLFLOW=false
env: ENABLE_KG_SOURCE=false
env: ONTOLOGY_DATABASE=Ontology_database_agent_test #note this is my local vectordatabase name, we need this because ENABLE_KG_SOURCE=true
env: WEAVIATE_API_KEY="XFG!NQmUVEC&8" #note this is my local vectordatabase key
env: OLLAMA_API_ENDPOINT=http://host.docker.internal:11434 #note this is my local vectordatabase name
env: OLLAMA_MODEL=nomic-embed-text #note this is my local vectordatabase name


# Configuration Files

In [15]:
import json

In [3]:
agentconfig = {
  "agents": [
    {
      "id": "extractor_agent",
      "output_variable": "extracted_info",
      "role": "Neuroscience Named Entity Recognition (NER) Extractor Agent\n",
      "goal": "Perform Named Entity Recognition (NER) on neuroscience {literature} and return structured JSON output.\n",
      "backstory": "You are an AI assistant specialized in processing neuroscience and who do not hallucinate.  Your expertise includes recognizing and categorizing named entities such as anatomical regions, experimental conditions, and cell types.  Your responses strictly adhere to JSON format, ensuring accurate and structured data extraction for downstream applications.\n",
      "llm": {
        "model": "openrouter/openai/gpt-4o-2024-11-20",
        "base_url": "https://openrouter.ai/api/v1",
        "frequency_penalty": 0.1,
        "temperature": 0.7,
        "seed": 53,
        "api_key": "YOUR_OPENROUTER_API_KEY"
      }
    },
    {
      "id": "alignment_agent",
      "output_variable": "aligned_structured_terms",
      "role": "Neuroscience Named Entity Recognition (NER) Concept Alignment Agent\n",
      "goal": "Perform concept alignment to the extracted Named Entity Recognition (NER) by extractor_agent {extracted_info} and return structured JSON output.\n",
      "backstory": "You are an AI assistant specialized in processing neuroscience concept alignment with structured models, i.e., ontologies or schemas and who do not hallucinate.  Your expertise includes recognizing and categorizing extracted named entities such as anatomical regions, experimental conditions, and cell types and aligning the recognized named entities such as cell types with corresponding ontological terms.  Your responses strictly adhere to JSON format, ensuring accurate and structured data extraction for downstream applications.\n",
      "llm": {
        "model": "openrouter/openai/gpt-4o-2024-11-20",
        "base_url": "https://openrouter.ai/api/v1",
        "frequency_penalty": 0.1,
        "temperature": 0.7,
        "seed": 53,
        "api_key": "YOUR_OPENROUTER_API_KEY"
      }
    },
    {
      "id": "judge_agent",
      "output_variable": "aligned_judged_terms",
      "role": "Neuroscience Named Entity Recognition (NER) Judge Agent\n",
      "goal": "Evaluate the {aligned_structured_terms} based on predefined criteria and generate a structured JSON output reflecting the assessment results.\n",
      "backstory": "You are an AI assistant with expert knowledge in neuroscience and structured models, i.e., ontologies or schemas, and someone who does not hallucinate.   Your task is to evaluate the {aligned_structured_terms} based on the accuracy and quality of the alignment.  Assign the score between 0-1 with 1 being the highest score of your evaluation. Your responses strictly adhere to JSON format, ensuring accurate and structured data extraction for downstream applications.\n",
      "llm": {
        "model": "openrouter/openai/gpt-4o-2024-11-20",
        "base_url": "https://openrouter.ai/api/v1",
        "frequency_penalty": 0.1,
        "temperature": 0.7,
        "seed": 53,
        "api_key": "YOUR_OPENROUTER_API_KEY"
      }
    }
  ]
}

taskconfig = taskconfig = {
  "tasks": [
    {
        "id": "ner_extraction",
        "description": (
            "From the given literature extract named entities from neuroscience statements. "
            "A named entity is anything that can be referred to with a proper name. "
            "Some common named entities in neuroscience articles are animal species (e.g., mouse, drosophila, zebrafish), "
            "anatomical regions (e.g., neocortex, mushroom body, cerebellum), experimental conditions (e.g., control, tetrodotoxin treatment, Scn1a knockout), "
            "and cell types (e.g., pyramidal neuron, direction-sensitive mechanoreceptor, oligodendrocyte)\n\n"
            "Literature:\n{literature}"
        ),
        "expected_output": (
            "output format: json\n"
            "Example output.\n"
            "\"extracted_terms\": {\n"
            "    \"1\": [\n"
            "        {\n"
            "          \"entity\": \"mouse\",\n"
            "          \"label\": \"ANIMAL_SPECIES\",\n"
            "          \"sentence\": \"These particles were visualized by fluorescent immunohistochemistry using mouse monoclonal anti-human myelin basic protein (MBPh) antibody (clone SMI-99).\",\n"
            "          \"start\": 79,\n"
            "          \"end\": 84,\n"
            "          \"paper_location\": \"methods\",\n"
            "          \"paper_title\": \"Concentration of myelin debris-like myelin basic protein-immunoreactive particles in the distal (anterior)-most part of the myelinated region in the normal rat optic nerve\",\n"
            "          \"doi\": \"10.1101/2025.03.19.643597\"\n"
            "        }\n"
            "    ],\n"
            "    ...\n"
            "}"
        ),
        "agent_id": "extractor_agent"
    },
    {
        "id": "ner_alignment",
        "description": (
            "Take the output of extractor_agent {extracted_info} as input and perform the concept alignment using the ontological concepts. "
            "A concept alignment is anything where you align the given entity to the matching concept aka class from a ontology or a schema."
        ),
        "expected_output": (
            "output format: json\n"
            "Example output.\n"
            "\"aligned_ner_terms\": {\n"
            "    \"1\": [\n"
            "        {\n"
            "          \"entity\": \"oligodendrocyte\",\n"
            "          \"label\": \"CELL_TYPE\",\n"
            "          \"ontology_id\": \"CL:0000128\",\n"
            "          \"ontology_label\": \"Oligodendrocyte\",\n"
            "          \"sentence\": \"Individual oligodendrocytes provide, on average, 16 near axons with single myelin segments about 200 µm in length (Butt and Ransom, 1993).\",\n"
            "          \"start\": 14,\n"
            "          \"end\": 29,\n"
            "          \"paper_location\": \"discussion\",\n"
            "          \"paper_title\": \"Concentration of myelin debris-like myelin basic protein-immunoreactive particles in the distal (anterior)-most part of the myelinated region in the normal rat optic nerve\",\n"
            "          \"doi\": \"10.1101/2025.03.19.643597\"\n"
            "        }\n"
            "    ],\n"
            "    ...\n"
            "}"
        ),
        "agent_id": "alignment_agent"
    },
    {
        "id": "ner_judgment",
        "description": (
            "Take the output of alignment agent {aligned_structured_terms} as input and perform the following evaluation: \n"
            "1. Assess the quality and accuracy of the alignment with the ontology or schema in {aligned_structured_terms}.\n"
            "2. Assign a score between 0 and 1 as a judge_score.\n"
            "3. Update the {aligned_structured_terms} adding the judge_score."
        ),
        "expected_output": (
            "output format: json\n"
            "Example output.\n"
            "\"judge_ner_terms\": {\n"
            "    \"1\": [\n"
            "        {\n"
            "          \"entity\": \"oligodendrocyte\",\n"
            "          \"label\": \"CELL_TYPE\",\n"
            "          \"ontology_id\": \"CL:0000128\",\n"
            "          \"ontology_label\": \"Oligodendrocyte\",\n"
            "          \"sentence\": \"Individual oligodendrocytes provide, on average, 16 near axons with single myelin segments about 200 µm in length (Butt and Ransom, 1993).\",\n"
            "          \"start\": 14,\n"
            "          \"end\": 29,\n"
            "          \"judge_score\": \"0.8\",\n"
            "          \"paper_location\": \"discussion\",\n"
            "          \"paper_title\": \"Concentration of myelin debris-like myelin basic protein-immunoreactive particles in the distal (anterior)-most part of the myelinated region in the normal rat optic nerve\",\n"
            "          \"doi\": \"10.1101/2025.03.19.643597\"\n"
            "        }\n"
            "    ],\n"
            "    ...\n"
            "}"
        ),
        "agent_id": "judge_agent"
    }
  ]
}

flowconfig = {
  "flow": [
    {
      "id": "extracted_structured_information",
      "agent_key": "extractor_agent",
      "task_key": "ner_extraction",
      "inputs": {
        "literature": "{{source_text}}"
      }
    },
    {
      "id": "align_structured_information",
      "agent_key": "alignment_agent",
      "task_key": "ner_alignment",
      "inputs": {
        "extracted_info": "{{extracted_info}}"
      },
      "knowledge_source": "extracted_info"
    },
    {
      "id": "judge_alignment",
      "agent_key": "judge_agent",
      "task_key": "ner_judgment",
      "inputs": {
        "aligned_structured_terms": "{{aligned_structured_terms}}"
      },
      "knowledge_source": "aligned_structured_terms"
    }
  ]
}

#search key to be used in vector database
search_key  = {
    "search_key": [
        "entity",
        "label"
    ]
}

#embedding models
embedderconfig = {
    "embedder_config": {
        "provider": "ollama",
        "config": {
            "api_base": "http://localhost:11434",
            "model": "nomic-embed-text:latest"
        }
    }
}

sourcestr = "Additionally, mutations in the APOE gene have been linked to neurodegenerative disorders, impacting astrocytes and microglia function."

In [4]:

import nest_asyncio
import asyncio

nest_asyncio.apply()

In [8]:
from structsense import kickoff

result = kickoff(
      agentconfig=agentconfig,
      taskconfig=taskconfig,
      embedderconfig=embedderconfig,
      flowconfig=flowconfig,
        input_source=sourcestr,
    knowledgeconfig=search_key
)

2025-04-03 09:30:16,902 - utils.utils - INFO - Trying paths: ['Additionally, mutations in the APOE gene have been linked to neurodegenerative disorders, impacting astrocytes and microglia function.', '/Users/tekrajchhetri/Documents/brainypedia_codes_design/crew_ner_framework/structsense/example/programmatic_access/Additionally, mutations in the APOE gene have been linked to neurodegenerative disorders, impacting astrocytes and microglia function.', '/Users/tekrajchhetri/Documents/brainypedia_codes_design/crew_ner_framework/structsense/example/programmatic_access/Additionally, mutations in the APOE gene have been linked to neurodegenerative disorders, impacting astrocytes and microglia function.', '/Users/tekrajchhetri/Documents/brainypedia_codes_design/crew_ner_framework/structsense/example/programmatic_access/Additionally, mutations in the APOE gene have been linked to neurodegenerative disorders, impacting astrocytes and microglia function.']
2025-04-03 09:30:16,903 - utils.utils - I

[1m[35m Flow started with ID: 41544e60-d6b4-4322-9ec2-479fb63dc72a[00m
2025-04-03 09:30:16,942 - crewai.flow.flow - INFO - Flow started with ID: 41544e60-d6b4-4322-9ec2-479fb63dc72a


2025-04-03 09:30:16,945 - structsense.app - INFO - Running step: extracted_structured_information
2025-04-03 09:30:17,007 - httpx - INFO - HTTP Request: POST http://localhost:11434/api/embeddings "HTTP/1.1 200 OK"
2025-04-03 09:30:17,037 - httpx - INFO - HTTP Request: POST http://localhost:11434/api/embeddings "HTTP/1.1 200 OK"


[92m09:30:17 - LiteLLM:INFO[0m: utils.py:2896 - 
LiteLLM completion() model= openai/gpt-4o-2024-11-20; provider = openrouter


[1m[95m# Agent:[00m [1m[92mNeuroscience Named Entity Recognition (NER) Extractor Agent[00m
[95m## Task:[00m [92mFrom the given literature extract named entities from neuroscience statements. A named entity is anything that can be referred to with a proper name. Some common named entities in neuroscience articles are animal species (e.g., mouse, drosophila, zebrafish), anatomical regions (e.g., neocortex, mushroom body, cerebellum), experimental conditions (e.g., control, tetrodotoxin treatment, Scn1a knockout), and cell types (e.g., pyramidal neuron, direction-sensitive mechanoreceptor, oligodendrocyte)

Literature:
Additionally, mutations in the APOE gene have been linked to neurodegenerative disorders, impacting astrocytes and microglia function.[00m
2025-04-03 09:30:17,048 - LiteLLM - INFO - 
LiteLLM completion() model= openai/gpt-4o-2024-11-20; provider = openrouter
2025-04-03 09:30:17,324 - httpx - INFO - HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "

[92m09:30:35 - LiteLLM:INFO[0m: utils.py:1084 - Wrapper: Completed Call, calling success_handler


2025-04-03 09:30:35,405 - LiteLLM - INFO - Wrapper: Completed Call, calling success_handler


[1m[95m# Agent:[00m [1m[92mNeuroscience Named Entity Recognition (NER) Extractor Agent[00m
[95m## Final Answer:[00m [92m
```json
{
  "extracted_terms": {
    "1": [
        {
          "entity": "APOE gene",
          "label": "GENE",
          "sentence": "Additionally, mutations in the APOE gene have been linked to neurodegenerative disorders, impacting astrocytes and microglia function.",
          "start": 27,
          "end": 36,
          "paper_location": null,
          "paper_title": null,
          "doi": null
        },
        {
          "entity": "neurodegenerative disorders",
          "label": "DISORDER",
          "sentence": "Additionally, mutations in the APOE gene have been linked to neurodegenerative disorders, impacting astrocytes and microglia function.",
          "start": 51,
          "end": 77,
          "paper_location": null,
          "paper_title": null,

[92m09:30:35 - LiteLLM:INFO[0m: utils.py:2896 - 
LiteLLM completion() model= openai/gpt-4o-2024-11-20; provider = openrouter


2025-04-03 09:30:35,535 - LiteLLM - INFO - 
LiteLLM completion() model= openai/gpt-4o-2024-11-20; provider = openrouter
2025-04-03 09:30:35,823 - httpx - INFO - HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"


[92m09:30:49 - LiteLLM:INFO[0m: utils.py:1084 - Wrapper: Completed Call, calling success_handler


2025-04-03 09:30:49,244 - LiteLLM - INFO - Wrapper: Completed Call, calling success_handler
2025-04-03 09:30:49,300 - httpx - INFO - HTTP Request: POST http://localhost:11434/api/embeddings "HTTP/1.1 200 OK"
2025-04-03 09:30:49,335 - httpx - INFO - HTTP Request: POST http://localhost:11434/api/embeddings "HTTP/1.1 200 OK"
2025-04-03 09:30:49,359 - httpx - INFO - HTTP Request: POST http://localhost:11434/api/embeddings "HTTP/1.1 200 OK"
2025-04-03 09:30:49,382 - httpx - INFO - HTTP Request: POST http://localhost:11434/api/embeddings "HTTP/1.1 200 OK"
2025-04-03 09:30:49,394 - structsense.app - INFO - Running step: align_structured_information
2025-04-03 09:30:49,472 - httpx - INFO - HTTP Request: POST http://localhost:11434/api/embeddings "HTTP/1.1 200 OK"
2025-04-03 09:30:49,537 - httpx - INFO - HTTP Request: POST http://localhost:11434/api/embeddings "HTTP/1.1 200 OK"


[92m09:30:49 - LiteLLM:INFO[0m: utils.py:2896 - 
LiteLLM completion() model= openai/gpt-4o-2024-11-20; provider = openrouter


[1m[95m# Agent:[00m [1m[92mNeuroscience Named Entity Recognition (NER) Concept Alignment Agent[00m
[95m## Task:[00m [92mTake the output of extractor_agent ```json
{
  "extracted_terms": {
    "1": [
        {
          "entity": "APOE gene",
          "label": "GENE",
          "sentence": "Additionally, mutations in the APOE gene have been linked to neurodegenerative disorders, impacting astrocytes and microglia function.",
          "start": 27,
          "end": 36,
          "paper_location": null,
          "paper_title": null,
          "doi": null
        },
        {
          "entity": "neurodegenerative disorders",
          "label": "DISORDER",
          "sentence": "Additionally, mutations in the APOE gene have been linked to neurodegenerative disorders, impacting astrocytes and microglia function.",
          "start": 51,
          "end": 77,
          "paper_location": null,
          "paper_title": null,
          "doi": null
        },
        {
          "entit

[92m09:31:09 - LiteLLM:INFO[0m: utils.py:1084 - Wrapper: Completed Call, calling success_handler


2025-04-03 09:31:09,397 - LiteLLM - INFO - Wrapper: Completed Call, calling success_handler


[1m[95m# Agent:[00m [1m[92mNeuroscience Named Entity Recognition (NER) Concept Alignment Agent[00m
[95m## Final Answer:[00m [92m
```json
{
  "aligned_ner_terms": {
    "1": [
      {
        "entity": "APOE gene",
        "label": "GENE",
        "ontology_id": "HGNC:613",
        "ontology_label": "APOE",
        "sentence": "Additionally, mutations in the APOE gene have been linked to neurodegenerative disorders, impacting astrocytes and microglia function.",
        "start": 27,
        "end": 36,
        "paper_location": null,
        "paper_title": null,
        "doi": null
      },
      {
        "entity": "neurodegenerative disorders",
        "label": "DISORDER",
        "ontology_id": "MONDO:0021060",
        "ontology_label": "neurodegenerative disease",
        "sentence": "Additionally, mutations in the APOE gene have been linked to neurodegenerative disorders, impacting

[92m09:31:09 - LiteLLM:INFO[0m: utils.py:2896 - 
LiteLLM completion() model= openai/gpt-4o-2024-11-20; provider = openrouter


2025-04-03 09:31:09,520 - LiteLLM - INFO - 
LiteLLM completion() model= openai/gpt-4o-2024-11-20; provider = openrouter
2025-04-03 09:31:09,779 - httpx - INFO - HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"


[92m09:31:21 - LiteLLM:INFO[0m: utils.py:1084 - Wrapper: Completed Call, calling success_handler


2025-04-03 09:31:21,099 - LiteLLM - INFO - Wrapper: Completed Call, calling success_handler
2025-04-03 09:31:21,157 - httpx - INFO - HTTP Request: POST http://localhost:11434/api/embeddings "HTTP/1.1 200 OK"
2025-04-03 09:31:21,194 - httpx - INFO - HTTP Request: POST http://localhost:11434/api/embeddings "HTTP/1.1 200 OK"
2025-04-03 09:31:21,218 - httpx - INFO - HTTP Request: POST http://localhost:11434/api/embeddings "HTTP/1.1 200 OK"
2025-04-03 09:31:21,240 - httpx - INFO - HTTP Request: POST http://localhost:11434/api/embeddings "HTTP/1.1 200 OK"
2025-04-03 09:31:21,252 - structsense.app - INFO - Running step: judge_alignment
2025-04-03 09:31:21,428 - httpx - INFO - HTTP Request: POST http://localhost:11434/api/embeddings "HTTP/1.1 200 OK"
2025-04-03 09:31:21,595 - httpx - INFO - HTTP Request: POST http://localhost:11434/api/embeddings "HTTP/1.1 200 OK"


[92m09:31:21 - LiteLLM:INFO[0m: utils.py:2896 - 
LiteLLM completion() model= openai/gpt-4o-2024-11-20; provider = openrouter


[1m[95m# Agent:[00m [1m[92mNeuroscience Named Entity Recognition (NER) Judge Agent[00m
[95m## Task:[00m [92mTake the output of alignment agent ```json
{
  "aligned_ner_terms": {
    "1": [
      {
        "entity": "APOE gene",
        "label": "GENE",
        "ontology_id": "HGNC:613",
        "ontology_label": "APOE",
        "sentence": "Additionally, mutations in the APOE gene have been linked to neurodegenerative disorders, impacting astrocytes and microglia function.",
        "start": 27,
        "end": 36,
        "paper_location": null,
        "paper_title": null,
        "doi": null
      },
      {
        "entity": "neurodegenerative disorders",
        "label": "DISORDER",
        "ontology_id": "MONDO:0021060",
        "ontology_label": "neurodegenerative disease",
        "sentence": "Additionally, mutations in the APOE gene have been linked to neurodegenerative disorders, impacting astrocytes and microglia function.",
        "start": 51,
        "end": 77,
  

[92m09:31:29 - LiteLLM:INFO[0m: utils.py:1084 - Wrapper: Completed Call, calling success_handler


2025-04-03 09:31:29,457 - LiteLLM - INFO - Wrapper: Completed Call, calling success_handler


[1m[95m# Agent:[00m [1m[92mNeuroscience Named Entity Recognition (NER) Judge Agent[00m
[95m## Final Answer:[00m [92m
```json
{
  "aligned_ner_terms": {
    "1": [
      {
        "entity": "APOE gene",
        "label": "GENE",
        "ontology_id": "HGNC:613",
        "ontology_label": "APOE",
        "sentence": "Additionally, mutations in the APOE gene have been linked to neurodegenerative disorders, impacting astrocytes and microglia function.",
        "start": 27,
        "end": 36,
        "judge_score": 1
      },
      {
        "entity": "neurodegenerative disorders",
        "label": "DISORDER",
        "ontology_id": "MONDO:0021060",
        "ontology_label": "neurodegenerative disease",
        "sentence": "Additionally, mutations in the APOE gene have been linked to neurodegenerative disorders, impacting astrocytes and microglia function.",
        "start": 51,
        "

[92m09:31:29 - LiteLLM:INFO[0m: utils.py:2896 - 
LiteLLM completion() model= openai/gpt-4o-2024-11-20; provider = openrouter


2025-04-03 09:31:29,551 - LiteLLM - INFO - 
LiteLLM completion() model= openai/gpt-4o-2024-11-20; provider = openrouter
2025-04-03 09:31:29,774 - httpx - INFO - HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"


[92m09:31:36 - LiteLLM:INFO[0m: utils.py:1084 - Wrapper: Completed Call, calling success_handler


2025-04-03 09:31:36,262 - LiteLLM - INFO - Wrapper: Completed Call, calling success_handler
2025-04-03 09:31:36,314 - httpx - INFO - HTTP Request: POST http://localhost:11434/api/embeddings "HTTP/1.1 200 OK"
2025-04-03 09:31:36,347 - httpx - INFO - HTTP Request: POST http://localhost:11434/api/embeddings "HTTP/1.1 200 OK"
2025-04-03 09:31:36,377 - httpx - INFO - HTTP Request: POST http://localhost:11434/api/embeddings "HTTP/1.1 200 OK"
2025-04-03 09:31:36,397 - httpx - INFO - HTTP Request: POST http://localhost:11434/api/embeddings "HTTP/1.1 200 OK"


2025-04-03 09:31:36,415 - structsense.app - INFO - Returning {'output': {'aligned_ner_terms': {'1': [{'entity': 'APOE gene', 'label': 'GENE', 'ontology_id': 'HGNC:613', 'ontology_label': 'APOE', 'sentence': 'Additionally, mutations in the APOE gene have been linked to neurodegenerative disorders, impacting astrocytes and microglia function.', 'start': 27, 'end': 36, 'judge_score': 1}, {'entity': 'neurodegenerative disorders', 'label': 'DISORDER', 'ontology_id': 'MONDO:0021060', 'ontology_label': 'neurodegenerative disease', 'sentence': 'Additionally, mutations in the APOE gene have been linked to neurodegenerative disorders, impacting astrocytes and microglia function.', 'start': 51, 'end': 77, 'judge_score': 1}, {'entity': 'astrocytes', 'label': 'CELL_TYPE', 'ontology_id': 'CL:0000127', 'ontology_label': 'astrocyte', 'sentence': 'Additionally, mutations in the APOE gene have been linked to neurodegenerative disorders, impacting astrocytes and microglia function.', 'start': 89, 'end': 

In [9]:
print(result)

{'output': {'aligned_ner_terms': {'1': [{'entity': 'APOE gene', 'label': 'GENE', 'ontology_id': 'HGNC:613', 'ontology_label': 'APOE', 'sentence': 'Additionally, mutations in the APOE gene have been linked to neurodegenerative disorders, impacting astrocytes and microglia function.', 'start': 27, 'end': 36, 'judge_score': 1}, {'entity': 'neurodegenerative disorders', 'label': 'DISORDER', 'ontology_id': 'MONDO:0021060', 'ontology_label': 'neurodegenerative disease', 'sentence': 'Additionally, mutations in the APOE gene have been linked to neurodegenerative disorders, impacting astrocytes and microglia function.', 'start': 51, 'end': 77, 'judge_score': 1}, {'entity': 'astrocytes', 'label': 'CELL_TYPE', 'ontology_id': 'CL:0000127', 'ontology_label': 'astrocyte', 'sentence': 'Additionally, mutations in the APOE gene have been linked to neurodegenerative disorders, impacting astrocytes and microglia function.', 'start': 89, 'end': 99, 'judge_score': 1}, {'entity': 'microglia', 'label': 'CELL

## PDF file

In [10]:
pdf_str = "/Users/tekrajchhetri/Downloads/data/test.pdf"

In [11]:
from structsense import kickoff

result = kickoff(
      agentconfig=agentconfig,
      taskconfig=taskconfig,
      embedderconfig=embedderconfig,
      flowconfig=flowconfig,
        input_source=pdf_str,
    knowledgeconfig=search_key
)

2025-04-03 09:32:28,840 - utils.utils - INFO - Trying paths: ['/Users/tekrajchhetri/Downloads/data/test.pdf', '/Users/tekrajchhetri/Downloads/data/test.pdf', '/Users/tekrajchhetri/Downloads/data/test.pdf', '/Users/tekrajchhetri/Downloads/data/test.pdf']
2025-04-03 09:32:28,842 - utils.utils - INFO - Using path: /Users/tekrajchhetri/Downloads/data/test.pdf
2025-04-03 09:32:28,843 - utils.utils - INFO - Processing single file: /Users/tekrajchhetri/Downloads/data/test.pdf
2025-04-03 09:32:32,888 - utils.utils - INFO - Successfully extracted 17 sections


[1m[35m Flow started with ID: f62b4bc1-a134-4307-8522-6cdd44d488cf[00m
2025-04-03 09:32:32,924 - crewai.flow.flow - INFO - Flow started with ID: f62b4bc1-a134-4307-8522-6cdd44d488cf


2025-04-03 09:32:32,927 - structsense.app - INFO - Running step: extracted_structured_information
2025-04-03 09:32:33,689 - httpx - INFO - HTTP Request: POST http://localhost:11434/api/embeddings "HTTP/1.1 200 OK"
2025-04-03 09:32:34,399 - httpx - INFO - HTTP Request: POST http://localhost:11434/api/embeddings "HTTP/1.1 200 OK"


[92m09:32:34 - LiteLLM:INFO[0m: utils.py:2896 - 
LiteLLM completion() model= openai/gpt-4o-2024-11-20; provider = openrouter


[1m[95m# Agent:[00m [1m[92mNeuroscience Named Entity Recognition (NER) Extractor Agent[00m
[95m## Task:[00m [92mFrom the given literature extract named entities from neuroscience statements. A named entity is anything that can be referred to with a proper name. Some common named entities in neuroscience articles are animal species (e.g., mouse, drosophila, zebrafish), anatomical regions (e.g., neocortex, mushroom body, cerebellum), experimental conditions (e.g., control, tetrodotoxin treatment, Scn1a knockout), and cell types (e.g., pyramidal neuron, direction-sensitive mechanoreceptor, oligodendrocyte)

Literature:
{'metadata': {'title': 'Independent Continuous Tracking of Multiple Agents in the Human Hippocampus', 'authors': ['Assia Chericoni', 'Justin M Fine', 'Ana G Chavez', 'Melissa C Franch', 'Elizabeth A Mickiewicz', 'Raissa K Mathura', 'Joshua Adkinson', 'Eleonora Bartoli', 'Joshua Jacobs', 'Nicole R Provenza', 'Andrew J Watrous', 'Seng Bum', 'Michael Yoo', 'Sameer A S

[92m09:33:02 - LiteLLM:INFO[0m: utils.py:1084 - Wrapper: Completed Call, calling success_handler


2025-04-03 09:33:02,123 - LiteLLM - INFO - Wrapper: Completed Call, calling success_handler


[1m[95m# Agent:[00m [1m[92mNeuroscience Named Entity Recognition (NER) Extractor Agent[00m
[95m## Final Answer:[00m [92m
```json
{
  "extracted_terms": {
    "1": [
      {
        "entity": "hippocampus",
        "label": "ANATOMICAL_REGION",
        "sentence": "Among regions associated with navigation, the hippocampus is the most well studied.",
        "start": 42,
        "end": 52,
        "paper_location": "introduction",
        "paper_title": "Independent Continuous Tracking of Multiple Agents in the Human Hippocampus"
      },
      {
        "entity": "place cells",
        "label": "CELL_TYPE",
        "sentence": "The hippocampus contains place cells that track the allocentric position of the self in physical space.",
        "start": 19,
        "end": 30,
        "paper_location": "introduction",
        "paper_title": "Independent Continuous Tracking of Multiple Agents

[92m09:33:02 - LiteLLM:INFO[0m: utils.py:2896 - 
LiteLLM completion() model= openai/gpt-4o-2024-11-20; provider = openrouter


2025-04-03 09:33:02,304 - LiteLLM - INFO - 
LiteLLM completion() model= openai/gpt-4o-2024-11-20; provider = openrouter
2025-04-03 09:33:02,631 - httpx - INFO - HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"


[92m09:33:13 - LiteLLM:INFO[0m: utils.py:1084 - Wrapper: Completed Call, calling success_handler


2025-04-03 09:33:13,230 - LiteLLM - INFO - Wrapper: Completed Call, calling success_handler
2025-04-03 09:33:13,286 - httpx - INFO - HTTP Request: POST http://localhost:11434/api/embeddings "HTTP/1.1 200 OK"
2025-04-03 09:33:13,320 - httpx - INFO - HTTP Request: POST http://localhost:11434/api/embeddings "HTTP/1.1 200 OK"
2025-04-03 09:33:13,350 - httpx - INFO - HTTP Request: POST http://localhost:11434/api/embeddings "HTTP/1.1 200 OK"
2025-04-03 09:33:13,378 - httpx - INFO - HTTP Request: POST http://localhost:11434/api/embeddings "HTTP/1.1 200 OK"
2025-04-03 09:33:13,412 - httpx - INFO - HTTP Request: POST http://localhost:11434/api/embeddings "HTTP/1.1 200 OK"
2025-04-03 09:33:13,437 - httpx - INFO - HTTP Request: POST http://localhost:11434/api/embeddings "HTTP/1.1 200 OK"
2025-04-03 09:33:13,462 - httpx - INFO - HTTP Request: POST http://localhost:11434/api/embeddings "HTTP/1.1 200 OK"
2025-04-03 09:33:13,484 - structsense.app - INFO - Running step: align_structured_information
20

[92m09:33:13 - LiteLLM:INFO[0m: utils.py:2896 - 
LiteLLM completion() model= openai/gpt-4o-2024-11-20; provider = openrouter


[1m[95m# Agent:[00m [1m[92mNeuroscience Named Entity Recognition (NER) Concept Alignment Agent[00m
[95m## Task:[00m [92mTake the output of extractor_agent ```json
{
  "extracted_terms": {
    "1": [
      {
        "entity": "hippocampus",
        "label": "ANATOMICAL_REGION",
        "sentence": "Among regions associated with navigation, the hippocampus is the most well studied.",
        "start": 42,
        "end": 52,
        "paper_location": "introduction",
        "paper_title": "Independent Continuous Tracking of Multiple Agents in the Human Hippocampus"
      },
      {
        "entity": "place cells",
        "label": "CELL_TYPE",
        "sentence": "The hippocampus contains place cells that track the allocentric position of the self in physical space.",
        "start": 19,
        "end": 30,
        "paper_location": "introduction",
        "paper_title": "Independent Continuous Tracking of Multiple Agents in the Human Hippocampus"
      },
      {
        "entity"

[92m09:33:36 - LiteLLM:INFO[0m: utils.py:1084 - Wrapper: Completed Call, calling success_handler


2025-04-03 09:33:36,455 - LiteLLM - INFO - Wrapper: Completed Call, calling success_handler


[1m[95m# Agent:[00m [1m[92mNeuroscience Named Entity Recognition (NER) Concept Alignment Agent[00m
[95m## Final Answer:[00m [92m
```json
{
  "aligned_ner_terms": {
    "1": [
      {
        "entity": "hippocampus",
        "label": "ANATOMICAL_REGION",
        "ontology_id": "UBERON:0001950",
        "ontology_label": "hippocampus",
        "sentence": "Among regions associated with navigation, the hippocampus is the most well studied.",
        "start": 42,
        "end": 52,
        "paper_location": "introduction",
        "paper_title": "Independent Continuous Tracking of Multiple Agents in the Human Hippocampus"
      },
      {
        "entity": "place cells",
        "label": "CELL_TYPE",
        "ontology_id": "CL:0000613",
        "ontology_label": "place cell",
        "sentence": "The hippocampus contains place cells that track the allocentric position of the self in physi

[92m09:33:36 - LiteLLM:INFO[0m: utils.py:2896 - 
LiteLLM completion() model= openai/gpt-4o-2024-11-20; provider = openrouter


2025-04-03 09:33:36,634 - LiteLLM - INFO - 
LiteLLM completion() model= openai/gpt-4o-2024-11-20; provider = openrouter
2025-04-03 09:33:36,976 - httpx - INFO - HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"


[92m09:33:48 - LiteLLM:INFO[0m: utils.py:1084 - Wrapper: Completed Call, calling success_handler


2025-04-03 09:33:48,293 - LiteLLM - INFO - Wrapper: Completed Call, calling success_handler
2025-04-03 09:33:48,352 - httpx - INFO - HTTP Request: POST http://localhost:11434/api/embeddings "HTTP/1.1 200 OK"
2025-04-03 09:33:48,388 - httpx - INFO - HTTP Request: POST http://localhost:11434/api/embeddings "HTTP/1.1 200 OK"
2025-04-03 09:33:48,416 - httpx - INFO - HTTP Request: POST http://localhost:11434/api/embeddings "HTTP/1.1 200 OK"
2025-04-03 09:33:48,448 - httpx - INFO - HTTP Request: POST http://localhost:11434/api/embeddings "HTTP/1.1 200 OK"
2025-04-03 09:33:48,484 - httpx - INFO - HTTP Request: POST http://localhost:11434/api/embeddings "HTTP/1.1 200 OK"
2025-04-03 09:33:48,512 - httpx - INFO - HTTP Request: POST http://localhost:11434/api/embeddings "HTTP/1.1 200 OK"
2025-04-03 09:33:48,535 - httpx - INFO - HTTP Request: POST http://localhost:11434/api/embeddings "HTTP/1.1 200 OK"
2025-04-03 09:33:48,550 - structsense.app - INFO - Running step: judge_alignment
2025-04-03 09:3

[92m09:33:49 - LiteLLM:INFO[0m: utils.py:2896 - 
LiteLLM completion() model= openai/gpt-4o-2024-11-20; provider = openrouter


[1m[95m# Agent:[00m [1m[92mNeuroscience Named Entity Recognition (NER) Judge Agent[00m
[95m## Task:[00m [92mTake the output of alignment agent ```json
{
  "aligned_ner_terms": {
    "1": [
      {
        "entity": "hippocampus",
        "label": "ANATOMICAL_REGION",
        "ontology_id": "UBERON:0001950",
        "ontology_label": "hippocampus",
        "sentence": "Among regions associated with navigation, the hippocampus is the most well studied.",
        "start": 42,
        "end": 52,
        "paper_location": "introduction",
        "paper_title": "Independent Continuous Tracking of Multiple Agents in the Human Hippocampus"
      },
      {
        "entity": "place cells",
        "label": "CELL_TYPE",
        "ontology_id": "CL:0000613",
        "ontology_label": "place cell",
        "sentence": "The hippocampus contains place cells that track the allocentric position of the self in physical space.",
        "start": 19,
        "end": 30,
        "paper_location": "

[92m09:34:30 - LiteLLM:INFO[0m: utils.py:1084 - Wrapper: Completed Call, calling success_handler


2025-04-03 09:34:30,623 - LiteLLM - INFO - Wrapper: Completed Call, calling success_handler


[1m[95m# Agent:[00m [1m[92mNeuroscience Named Entity Recognition (NER) Judge Agent[00m
[95m## Final Answer:[00m [92m
```json
{
  "judge_ner_terms": {
    "1": [
      {
        "entity": "hippocampus",
        "label": "ANATOMICAL_REGION",
        "ontology_id": "UBERON:0001950",
        "ontology_label": "hippocampus",
        "sentence": "Among regions associated with navigation, the hippocampus is the most well studied.",
        "start": 42,
        "end": 52,
        "paper_location": "introduction",
        "paper_title": "Independent Continuous Tracking of Multiple Agents in the Human Hippocampus",
        "judge_score": 1.0
      },
      {
        "entity": "place cells",
        "label": "CELL_TYPE",
        "ontology_id": "CL:0000613",
        "ontology_label": "place cell",
        "sentence": "The hippocampus contains place cells that track the allocentric position of the

[92m09:34:30 - LiteLLM:INFO[0m: utils.py:2896 - 
LiteLLM completion() model= openai/gpt-4o-2024-11-20; provider = openrouter


2025-04-03 09:34:30,801 - LiteLLM - INFO - 
LiteLLM completion() model= openai/gpt-4o-2024-11-20; provider = openrouter
2025-04-03 09:34:31,166 - httpx - INFO - HTTP Request: POST https://openrouter.ai/api/v1/chat/completions "HTTP/1.1 200 OK"


[92m09:34:39 - LiteLLM:INFO[0m: utils.py:1084 - Wrapper: Completed Call, calling success_handler


2025-04-03 09:34:39,960 - LiteLLM - INFO - Wrapper: Completed Call, calling success_handler
2025-04-03 09:34:40,014 - httpx - INFO - HTTP Request: POST http://localhost:11434/api/embeddings "HTTP/1.1 200 OK"
2025-04-03 09:34:40,051 - httpx - INFO - HTTP Request: POST http://localhost:11434/api/embeddings "HTTP/1.1 200 OK"
2025-04-03 09:34:40,081 - httpx - INFO - HTTP Request: POST http://localhost:11434/api/embeddings "HTTP/1.1 200 OK"
2025-04-03 09:34:40,107 - httpx - INFO - HTTP Request: POST http://localhost:11434/api/embeddings "HTTP/1.1 200 OK"
2025-04-03 09:34:40,141 - httpx - INFO - HTTP Request: POST http://localhost:11434/api/embeddings "HTTP/1.1 200 OK"
2025-04-03 09:34:40,163 - httpx - INFO - HTTP Request: POST http://localhost:11434/api/embeddings "HTTP/1.1 200 OK"
2025-04-03 09:34:40,187 - httpx - INFO - HTTP Request: POST http://localhost:11434/api/embeddings "HTTP/1.1 200 OK"


2025-04-03 09:34:40,210 - structsense.app - INFO - Returning {'output': {'judge_ner_terms': {'1': [{'entity': 'hippocampus', 'label': 'ANATOMICAL_REGION', 'ontology_id': 'UBERON:0001950', 'ontology_label': 'hippocampus', 'sentence': 'Among regions associated with navigation, the hippocampus is the most well studied.', 'start': 42, 'end': 52, 'paper_location': 'introduction', 'paper_title': 'Independent Continuous Tracking of Multiple Agents in the Human Hippocampus', 'judge_score': 1.0}, {'entity': 'place cells', 'label': 'CELL_TYPE', 'ontology_id': 'CL:0000613', 'ontology_label': 'place cell', 'sentence': 'The hippocampus contains place cells that track the allocentric position of the self in physical space.', 'start': 19, 'end': 30, 'paper_location': 'introduction', 'paper_title': 'Independent Continuous Tracking of Multiple Agents in the Human Hippocampus', 'judge_score': 1.0}, {'entity': 'posterior hippocampus', 'label': 'ANATOMICAL_REGION', 'ontology_id': 'UBERON:0002791', 'ontolo

In [14]:
print(json.dumps(result, indent=2))

{
  "output": {
    "judge_ner_terms": {
      "1": [
        {
          "entity": "hippocampus",
          "label": "ANATOMICAL_REGION",
          "ontology_id": "UBERON:0001950",
          "ontology_label": "hippocampus",
          "sentence": "Among regions associated with navigation, the hippocampus is the most well studied.",
          "start": 42,
          "end": 52,
          "paper_location": "introduction",
          "paper_title": "Independent Continuous Tracking of Multiple Agents in the Human Hippocampus",
          "judge_score": 1.0
        },
        {
          "entity": "place cells",
          "label": "CELL_TYPE",
          "ontology_id": "CL:0000613",
          "ontology_label": "place cell",
          "sentence": "The hippocampus contains place cells that track the allocentric position of the self in physical space.",
          "start": 19,
          "end": 30,
          "paper_location": "introduction",
          "paper_title": "Independent Continuous Tracking o