Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions prompting/base/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,9 @@ def run(self):
self.loop.run_until_complete(
asyncio.wait_for(task, timeout=forward_timeout)
)
except torch.cuda.OutOfMemoryError as e:
bt.logging.error(f"Out of memory error: {e}")
continue
except MaxRetryError as e:
bt.logging.error(f"MaxRetryError: {e}")
continue
Expand Down
29 changes: 13 additions & 16 deletions prompting/forward.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,28 +16,30 @@
# DEALINGS IN
# THE SOFTWARE.

import asyncio
import random
import sys
import time
import random
import asyncio
import traceback
from dataclasses import dataclass
from typing import Awaitable, Dict, List

import bittensor as bt
import numpy as np
import torch

import bittensor as bt
from typing import List, Dict, Awaitable
from prompting.agent import HumanAgent
from prompting.conversation import create_task
from prompting.dendrite import DendriteResponseEvent
from prompting.conversation import create_task
from prompting.protocol import StreamPromptingSynapse
from prompting.rewards import RewardResult
from prompting.tasks import QuestionAnsweringTask
from prompting.utils.uids import get_random_uids
from prompting.utils.logging import log_event
from prompting.utils.misc import async_log, serialize_exception_to_string
from prompting.utils.uids import get_random_uids
from dataclasses import dataclass

@async_log
async def generate_reference(agent):
loop = asyncio.get_running_loop()
result = await loop.run_in_executor(None, agent.task.generate_reference, agent.llm_pipeline)
return result

@async_log
async def execute_dendrite_call(dendrite_call):
Expand Down Expand Up @@ -165,6 +167,7 @@ def log_stream_results(stream_results: List[StreamResult]):
f"Failed response for uid {failed_response.uid}: {formatted_exception}"
)


async def run_step(
self, agent: HumanAgent, roles: List[str], messages: List[str], k: int, timeout: float, exclude: list = None
):
Expand Down Expand Up @@ -271,8 +274,6 @@ async def forward(self):
"""
Encapsulates a full conversation between the validator and miners. Contains one or more rounds of request-response.

Raises:
torch.cuda.OutOfMemoryError: CUDA out of memory error.
"""
bt.logging.info("🚀 Starting forward loop...")
forward_start_time = time.time()
Expand Down Expand Up @@ -353,10 +354,6 @@ async def forward(self):
messages.append(agent.challenge)
turn += 1

except torch.cuda.OutOfMemoryError as err:
bt.logging.error("CUDA out of memory", str(err))
raise err

except BaseException as e:
unexpected_errors = serialize_exception_to_string(e)
bt.logging.error(
Expand Down
22 changes: 14 additions & 8 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,28 +1,34 @@
angle_emb
bittensor==6.10.1
bs4
angle_emb==0.4.4
bittensor==7.0.0
bs4==0.0.2
click==8.1.3
datasets==2.14.6
deprecation==2.1.0
torch==2.1.1
torch==2.3.0
torchmetrics
pre-commit==3.3.2
git+https://github.com/synapse-alpha/mathgenerator.git@main#egg=mathgenerator
numpy==1.22.0
rouge
rouge==1.0.1
scipy==1.10.1
sentencepiece
wandb==0.15.10
tenacity
antlr4-python3-runtime==4.11
wikipedia
wikipedia==1.4.0
openai==1.9.0
langchain==0.1.5
langchain==0.2.1
langchain_core==0.2.3
langchainhub==0.1.14
python-dotenv==1.0.1
wikipedia_sections==2.0.0
vllm==0.4.3
loguru==0.7.2
argostranslate==1.9.6
python-dotenv
wikipedia_sections
vllm
loguru
argostranslate
transformers==4.41.1
transformers==4.41.2
autoawq==0.2.5