macrocosm-os · p-ferreira · Jun 5, 2024 · May 16, 2024 · May 16, 2024 · May 16, 2024
diff --git a/README.md b/README.md
@@ -47,9 +47,12 @@ bash install.sh
 
 # Compute Requirements
 
-1. To run a **validator**, you will need at least 24GB of VRAM. 
-2. To run the default huggingface **miner**, you will need at least 18GB of VRAM. 
+1. To run a **validator**, you will need at least 62GB of VRAM. 
+2. To run the default huggingface **miner**, you will need at least 62GB of VRAM.
 
+
+**It is important to note that the baseminers are not recommended for main, and exist purely as an example. Running a base miner on main will result in no emissions and a loss in your registration fee.**
+If you have any questions please reach out in the SN1 channel in the Bittensor Discord.
 </div>
 
 # How to Run
@@ -77,10 +80,11 @@ For ease of use, you can run the scripts as well with PM2. Installation of PM2 i
 sudo apt update && sudo apt install jq && sudo apt install npm && sudo npm install pm2 -g && pm2 update
 ``` 
 
-Example of running a SOLAR miner: 
+Example of running a Llama3 miner:
+
 ```bash
-pm2 start neurons/miners/huggingface/miner.py --interpreter python3 --name solar_miner -- --netuid 1  --subtensor.network finney --wallet.name my_wallet --wallet.hotkey m1 --neuron.model_id casperhansen/llama-3-70b-instruct-awq --axon.port 21988 --logging.debug 
-``` 
+pm2 start neurons/miners/huggingface/miner.py --interpreter python3 --name llama3_miner -- --netuid 1  --subtensor.network finney --wallet.name my_wallet --wallet.hotkey m1 --neuron.model_id casperhansen/llama-3-70b-instruct-awq --neuron.load_in_4bit True --axon.port 21988 --logging.debug
+```
 
 # Testnet 
 We highly recommend that you run your miners on testnet before deploying on main. This is give you an opportunity to debug your systems, and ensure that you will not lose valuable immunity time. The SN1 testnet is **netuid 61**. 
@@ -90,7 +94,7 @@ In order to run on testnet, you will need to go through the same hotkey registra
 To run:
 
 ```bash
-pm2 start neurons/miners/huggingface/miner.py --interpreter python3 --name solar_miner -- --netuid 61  --subtensor.network test --wallet.name my_test_wallet --wallet.hotkey m1 --neuron.model_id casperhansen/llama-3-70b-instruct-awq --axon.port 21988 --logging.debug 
+pm2 start neurons/miners/huggingface/miner.py --interpreter python3 --name llama3_miner -- --netuid 61 --subtensor.network test --wallet.name my_test_wallet --wallet.hotkey m1 --neuron.model_id casperhansen/llama-3-70b-instruct-awq --neuron.load_in_4bit True --axon.port 21988 --logging.debug
 ```
 
 # Limitations

diff --git a/min_compute.yml b/min_compute.yml
@@ -22,12 +22,12 @@ compute_spec:
 
     gpu:
       required: True                       # Does the application require a GPU?
-      min_vram: 20                         # Minimum GPU VRAM (GB)
-      recommended_vram: 24                 # Recommended GPU VRAM (GB)
+      min_vram: 62                         # Minimum GPU VRAM (GB)
+      recommended_vram: 80                 # Recommended GPU VRAM (GB)
       cuda_cores: 1024                     # Minimum number of CUDA cores (if applicable)
       min_compute_capability: 6.0          # Minimum CUDA compute capability
       recommended_compute_capability: 7.0  # Recommended CUDA compute capability
-      recommended_gpu: "NVIDIA A10"       # Recommended GPU to purchase/rent
+      recommended_gpu: "NVIDIA A100"       # Recommended GPU to purchase/rent
 
     memory:
       min_ram: 16          # Minimum RAM (GB)
@@ -36,7 +36,7 @@ compute_spec:
       ram_type: "DDR4"     # RAM type (e.g., DDR4, DDR3, etc.)
 
     storage:
-      min_space: 24           # Minimum free storage space (GB)
+      min_space: 60           # Minimum free storage space (GB)
       recommended_space: 100  # Recommended free storage space (GB)
       type: "SSD"             # Preferred storage type (e.g., SSD, HDD)
       min_iops: 1000          # Minimum I/O operations per second (if applicable)
@@ -57,7 +57,7 @@ compute_spec:
 
     gpu:
       required: True                       # Does the application require a GPU?
-      min_vram: 80                         # Minimum GPU VRAM (GB)
+      min_vram: 62                         # Minimum GPU VRAM (GB)
       recommended_vram: 80                 # Recommended GPU VRAM (GB)
       cuda_cores: 1024                     # Minimum number of CUDA cores (if applicable)
       min_compute_capability: 6.0          # Minimum CUDA compute capability
@@ -71,7 +71,7 @@ compute_spec:
       ram_type: "DDR4"     # RAM type (e.g., DDR4, DDR3, etc.)
 
     storage:
-      min_space: 40           # Minimum free storage space (GB)
+      min_space: 60           # Minimum free storage space (GB)
       recommended_space: 100  # Recommended free storage space (GB)
       type: "SSD"             # Preferred storage type (e.g., SSD, HDD)
       min_iops: 1000          # Minimum I/O operations per second (if applicable)

diff --git a/prompting/__init__.py b/prompting/__init__.py
@@ -16,7 +16,7 @@
 # DEALINGS IN THE SOFTWARE.
 
 # Define the version of the template module.
-__version__ = "2.3.1"
+__version__ = "2.4.0"
 version_split = __version__.split(".")
 __spec_version__ = (
     (10000 * int(version_split[0]))

diff --git a/prompting/dendrite.py b/prompting/dendrite.py
@@ -1,45 +1,59 @@
 import torch
-import bittensor as bt
 from typing import List
+from dataclasses import dataclass
+from prompting.protocol import StreamPromptingSynapse
+from prompting.utils.misc import serialize_exception_to_string
+
+
+@dataclass
+class SynapseStreamResult:
+    exception: BaseException = None
+    uid: int = None
+    accumulated_chunks: List[str] = None
+    accumulated_chunks_timings: List[float] = None
+    tokens_per_chunk: List[int] = None
+    synapse: StreamPromptingSynapse = None
 
 
 class DendriteResponseEvent:
     def __init__(
-        self, responses: List[bt.Synapse], uids: torch.LongTensor, timeout: float
+        self, stream_results: SynapseStreamResult, uids: torch.LongTensor, timeout: float
     ):
         self.uids = uids
         self.completions = []
         self.status_messages = []
         self.status_codes = []
         self.timings = []
+        self.stream_results_uids = []
+        self.stream_results_exceptions = []
+        self.stream_results_all_chunks = []
+        self.stream_results_all_chunks_timings = []
+        self.stream_results_all_tokens_per_chunk = []
+
+        for stream_result in stream_results:
+            synapse = stream_result.synapse
 
-        for synapse in responses:
             self.completions.append(synapse.completion)
             self.status_messages.append(synapse.dendrite.status_message)
+            status_code = synapse.dendrite.status_code
 
-            if len(synapse.completion) == 0 and synapse.dendrite.status_code == 200:
-                synapse.dendrite.status_code = 204
+            if len(synapse.completion) == 0 and status_code == 200:
+                status_code = 204
 
-            self.status_codes.append(synapse.dendrite.status_code)
-
-            if (synapse.dendrite.process_time) and (
-                synapse.dendrite.status_code == 200
-                or synapse.dendrite.status_code == 204
-            ):
-                self.timings.append(synapse.dendrite.process_time)
-            elif synapse.dendrite.status_code == 408:
+            self.status_codes.append(status_code)
+            process_time = synapse.dendrite.process_time or 0
+            if status_code == 200 or status_code == 204:
+                self.timings.append(process_time)
+            elif status_code == 408:
                 self.timings.append(timeout)
             else:
-                self.timings.append(0)  # situation where miner is not alive
+                self.timings.append(0)
 
-        self.completions = [synapse.completion for synapse in responses]
-        self.timings = [
-            synapse.dendrite.process_time or timeout for synapse in responses
-        ]
-        self.status_messages = [
-            synapse.dendrite.status_message for synapse in responses
-        ]
-        self.status_codes = [synapse.dendrite.status_code for synapse in responses]
+            self.stream_results_uids.append(stream_result.uid)
+            self.stream_results_exceptions.append(serialize_exception_to_string(stream_result.exception))
+            self.stream_results_all_chunks.append(stream_result.accumulated_chunks)
+            self.stream_results_all_chunks_timings.append(stream_result.accumulated_chunks_timings)
+            self.stream_results_all_tokens_per_chunk.append(stream_result.tokens_per_chunk)
 
     def __state_dict__(self):
         return {
@@ -48,6 +62,11 @@ def __state_dict__(self):
             "timings": self.timings,
             "status_messages": self.status_messages,
             "status_codes": self.status_codes,
+            "stream_results_uids": self.stream_results_uids,
+            "stream_results_exceptions": self.stream_results_exceptions,
+            "stream_results_all_chunks": self.stream_results_all_chunks,
+            "stream_results_all_chunks_timings": self.stream_results_all_chunks_timings,
+            "stream_results_all_tokens_per_chunk": self.stream_results_all_tokens_per_chunk,
         }
 
     def __repr__(self):