Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
a82a822
Enhance invoke method to return reasoning traces alongside tool histo…
bkb2135 Sep 2, 2025
d359051
Bump project version to 3.0.5
bkb2135 Sep 2, 2025
336b09c
Add reasoning_traces list to Pipeline class for enhanced data handling
bkb2135 Sep 2, 2025
e488cee
Fix typo in LoggerWandb class: change "reasoning_traces" to "reasonin…
bkb2135 Sep 2, 2025
0b65290
Update LoggerWandb class to log reasoning traces directly instead of …
bkb2135 Sep 2, 2025
0d41e31
Initial Commit
bkb2135 Sep 3, 2025
639f83d
Research Report
bkb2135 Sep 3, 2025
9d2d52e
Refactor deep_research_langchain.py to improve code readability by fo…
bkb2135 Sep 3, 2025
be994cb
Refactor deep_research_langchain.py and update tests to enhance clari…
bkb2135 Sep 3, 2025
8026063
Add PythonREPL support to DeepResearchLangchain for executing Python …
bkb2135 Sep 3, 2025
e7ac5bc
Enhance DeepResearchLangchain by adding max_tokens parameters for var…
bkb2135 Sep 3, 2025
2ab4a93
Suppress failed web search
dbobrenko Sep 4, 2025
376ad71
Lower exception level
dbobrenko Sep 4, 2025
d263a60
Add retries, debug logs
dbobrenko Sep 5, 2025
2b4562b
Lower warning to debug
dbobrenko Sep 5, 2025
807698e
Merge pull request #813 from macrocosm-os/fix/suppress-web-fail
dbobrenko Sep 5, 2025
21f51f0
Merge pull request #812 from macrocosm-os/features/initial-code-execu…
bkb2135 Sep 5, 2025
033b63e
Align early stop prompt with final
dbobrenko Sep 5, 2025
3356cae
Merge pull request #814 from macrocosm-os/feature/align-final-prompts
dbobrenko Sep 5, 2025
c1d29ee
Remove redundant stdouts
dbobrenko Sep 5, 2025
a8ce12a
Merge pull request #815 from macrocosm-os/fix/suppress-code-exec
dbobrenko Sep 5, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion apex/services/deep_research/deep_research_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,5 @@
class DeepResearchBase(LLMBase):
async def invoke(
self, messages: list[dict[str, str]], body: dict[str, Any] | None = None
) -> tuple[str, list[dict[str, str]]]:
) -> tuple[str, list[dict[str, str]], list[dict[str, Any]]]:
raise NotImplementedError
391 changes: 340 additions & 51 deletions apex/services/deep_research/deep_research_langchain.py

Large diffs are not rendered by default.

5 changes: 3 additions & 2 deletions apex/services/llm/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def __init__(self, base_url: str, model: str, key: str):

async def invoke(
self, messages: list[dict[str, str]], body: dict[str, Any] | None = None
) -> tuple[str, list[dict[str, str]]]:
) -> tuple[str, list[dict[str, str]], list[dict[str, Any]]]:
headers = {
"Authorization": "Bearer " + self._key,
"Content-Type": "application/json",
Expand All @@ -35,7 +35,8 @@ async def invoke(

data = await response.json()
content = data.get("choices", [{}])[0].get("message", {}).get("content", "")
return str(content), []
# This base LLM does not build multi-step chains; return empty reasoning_traces
return str(content), [], []

def __str__(self) -> str:
return f"{self.__class__.__name__}({self._base_url}, {self._model})"
2 changes: 1 addition & 1 deletion apex/services/llm/llm_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@
class LLMBase:
async def invoke(
self, messages: list[dict[str, str]], body: dict[str, Any] | None = None
) -> tuple[str, list[dict[str, str]]]:
) -> tuple[str, list[dict[str, str]], list[dict[str, Any]]]:
raise NotImplementedError
12 changes: 8 additions & 4 deletions apex/validator/generate_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,14 @@
async def generate_query(llm: LLMBase, websearch: WebSearchBase) -> str:
random_words = " ".join(random.sample(get_english_words(), 3))
# Perform a lightweight search and pick a single result as context.
search_results = await websearch.search(random_words, max_results=5)
search_website = random.choice(search_results)
search_content = search_website.content
try:
search_results = await websearch.search(random_words, max_results=5)
search_website = random.choice(search_results)
search_content = search_website.content
except Exception as exc:
logger.debug(f"Error during web search: {exc}")
search_content = ""
query = QUERY_PROMPT_TEMPLATE.format(context=search_content)
query_response, _ = await llm.invoke([{"role": "user", "content": query}])
query_response, _, _ = await llm.invoke([{"role": "user", "content": query}])
logger.debug(f"Generated query.\nPrompt: '{query}'\nResponse: '{query_response}'")
return query_response
16 changes: 8 additions & 8 deletions apex/validator/generate_reference.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
from typing import Any

from loguru import logger

from apex.services.deep_research.deep_research_base import DeepResearchBase


async def generate_reference(llm: DeepResearchBase, query: str) -> tuple[str, list[dict[str, str]]]:
async def generate_reference(
llm: DeepResearchBase, query: str
) -> tuple[str, list[dict[str, str]], list[dict[str, Any]]]:
"""Generate a reference response for the given prompt.

Args:
Expand All @@ -22,13 +26,9 @@ async def generate_reference(llm: DeepResearchBase, query: str) -> tuple[str, li
}
user_message: dict[str, str] = {
"role": "user",
"content": (
f"Research Question: {query}\n\n"
"Please think through the answer carefully, annotate each step with citations like [1], [2], etc., "
'and conclude with a "References:" list mapping each [n] to its source URL or title.'
),
"content": query,
}

response, tool_history = await llm.invoke([system_message, user_message])
response, tool_history, reasoning_traces = await llm.invoke([system_message, user_message])
logger.debug(f"Generated reference.\nPrompt: '{user_message}'\nResponse: '{response}'")
return response, tool_history
return response, tool_history, reasoning_traces
2 changes: 2 additions & 0 deletions apex/validator/logger_wandb.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,15 @@ async def log(
reference: str | None = None,
discriminator_results: MinerDiscriminatorResults | None = None,
tool_history: list[dict[str, str]] | None = None,
reasoning_traces: list[dict[str, Any]] | None = None,
) -> None:
"""Log an event to wandb."""
if self.run:
if discriminator_results:
processed_event = self.process_event(discriminator_results.model_dump())
processed_event["reference"] = reference
processed_event["tool_history"] = tool_history
processed_event["reasoning_trace"] = reasoning_traces
self.run.log(processed_event)

def process_event(self, event: Mapping[str, Any]) -> dict[str, Any]:
Expand Down
14 changes: 11 additions & 3 deletions apex/validator/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,12 +84,15 @@ async def run_single(self, task: QueryTask) -> str:

reference = None
tool_history: list[dict[str, str]] = []
reasoning_traces: list[dict[str, Any]] = []
if random.random() < self.reference_rate:
try:
generator_results = None
ground_truth = 0
logger.debug(f"Generating task reference for query: {query[:20]}..")
reference, tool_history = await generate_reference(llm=self.deep_research, query=query)
reference, tool_history, reasoning_traces = await generate_reference(
llm=self.deep_research, query=query
)
except BaseException as exc:
logger.exception(f"Failed to generate reference: {exc}")

Expand All @@ -100,7 +103,9 @@ async def run_single(self, task: QueryTask) -> str:
if random.random() < self.redundancy_rate:
try:
logger.debug(f"Generating redundant task reference for query: {query[:20]}..")
reference, tool_history = await generate_reference(llm=self.deep_research, query=query)
reference, tool_history, reasoning_traces = await generate_reference(
llm=self.deep_research, query=query
)
except BaseException as exc:
logger.warning(f"Failed to generate redundant reference: {exc}")

Expand All @@ -111,7 +116,10 @@ async def run_single(self, task: QueryTask) -> str:

if self.logger_wandb:
await self.logger_wandb.log(
reference=reference, discriminator_results=discriminator_results, tool_history=tool_history
reference=reference,
discriminator_results=discriminator_results,
tool_history=tool_history,
reasoning_traces=reasoning_traces,
)

if self._debug:
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "apex"
version = "3.0.4"
version = "3.0.5"
description = "Bittensor Subnet 1: Apex"
readme = "README.md"
requires-python = "~=3.11"
Expand Down Expand Up @@ -36,6 +36,7 @@ dependencies = [
"pytest-mock>=3.14.1",
"wandb>=0.21.1",
"ruff>=0.12.5",
"langchain-experimental>=0.3.4",
]


Expand Down
Loading