In [1]:
import bittensor as bt
import pydantic
from starlette.types import Send
from starlette.responses import Response, StreamingResponse
from functools import partial
from typing import Callable, Awaitable, List, Tuple
import asyncio
from transformers import GPT2Tokenizer

bt.debug()


# This is a subclass of StreamingSynapse for prompting network functionality
class StreamPrompting(bt.StreamingSynapse):
    """
    StreamPrompting is a subclass of StreamingSynapse that is specifically designed for prompting network functionality.
    It overrides abstract methods from the parent class to provide concrete implementations for processing streaming responses,
    deserializing the response, and extracting JSON data.

    Attributes:
        roles: List of roles associated with the prompt.
        messages: List of messages to be processed.
        completion: A string to store the completion result.
    """

    roles: List[str] = pydantic.Field(
        ...,
        title="Roles",
        description="A list of roles in the Prompting scenario. Immuatable.",
        allow_mutation=False,
    )

    messages: List[str] = pydantic.Field(
        ...,
        title="Messages",
        description="A list of messages in the Prompting scenario. Immutable.",
        allow_mutation=False,
    )

    completion: str = pydantic.Field(
        "",
        title="Completion",
        description="Completion status of the current Prompting object. This attribute is mutable and can be updated.",
    )

    async def process_streaming_response(self, response):
        """
        Asynchronously processes chunks of a streaming response, decoding the chunks from utf-8 to strings 
        and appending them to the `completion` attribute. The primary goal of this method is to accumulate the 
        content from the streaming response in a sequential manner.

        This method is particularly vital when the streaming response from the server is broken down into multiple 
        chunks, and a comprehensive result needs to be constructed from these individual chunks.

        Args:
            response: The response object from which the streamed content is fetched. This content typically 
                    contains chunks of string data that are being streamed from the server.

        Raises:
            ValueError: If there is an issue decoding the streamed chunks.

        Note:
            This method is designed for utf-8 encoded strings. If the streamed content has a different encoding, 
            it may need to be adjusted accordingly.
        """
        if self.completion is None:
            self.completion = ""
        async for chunk in response.content.iter_any():
            tokens = chunk.decode('utf-8').split('\n')
            for token in tokens:
                if token:
                    self.completion += token

    def deserialize(self):
        """
        Deserializes the response by returning the completion attribute.

        Returns:
            str: The completion result.
        """
        return self.completion

    def extract_response_json(self, response):
        """
        Extracts various components of the response object, including headers and specific information related 
        to dendrite and axon, into a structured JSON format. This method aids in simplifying the raw response 
        object into a format that's easier to read and interpret.

        The method is particularly useful for extracting specific metadata from the response headers which 
        provide insights about the response or the server's configurations. Moreover, details about dendrite 
        and axon extracted from headers can provide information about the neural network layers that were 
        involved in the request-response cycle.

        Args:
            response: The response object, typically an instance of an HTTP response, containing the headers 
                    and the content that needs to be extracted.

        Returns:
            dict: A dictionary containing the structured data extracted from the response object. This includes 
                data such as the server's name, timeout details, data sizes, and information about dendrite 
                and axon among others.

        Raises:
            KeyError: If expected headers or response components are missing.

        Note:
            This method assumes a certain structure and naming convention for the headers. If the server 
            changes its header naming convention or structure, this method may need adjustments.
        """
        headers = {k.decode('utf-8'): v.decode('utf-8') for k, v in response.__dict__["_raw_headers"]}

        def extract_info(prefix):
            return {key.split('_')[-1]: value for key, value in headers.items() if key.startswith(prefix)}

        return {
            "name": headers.get('name', ''),
            "timeout": float(headers.get('timeout', 0)),
            "total_size": int(headers.get('total_size', 0)),
            "header_size": int(headers.get('header_size', 0)),
            "dendrite": extract_info('bt_header_dendrite'),
            "axon": extract_info('bt_header_axon'),
            "roles": self.roles,
            "messages": self.messages,
            "completion": self.completion,
        }

# This should encapsulate all the logic for generating a streaming response
def prompt(synapse: StreamPrompting) -> StreamPrompting:
    """
    Generates a streaming response for the provided synapse.

    This function serves as the main entry point for handling streaming prompts. It takes
    the incoming synapse which contains messages to be processed and returns a streaming
    response. The function uses the GPT-2 tokenizer and a simulated model to tokenize and decode
    the incoming message, and then sends the response back to the client token by token.

    Args:
        synapse (StreamPrompting): The incoming StreamPrompting instance containing the messages to be processed.

    Returns:
        StreamPrompting: The streaming response object which can be used by other functions to
                        stream back the response to the client.

    Usage:
        This function can be extended and customized based on specific requirements of the
        miner. Developers can swap out the tokenizer, model, or adjust how streaming responses
        are generated to suit their specific applications.
    """
    tokenizer = GPT2Tokenizer.from_pretrained("gpt2")

    # Simulated function to decode token IDs into strings. In a real-world scenario,
    # this can be replaced with an actual model inference step.
    def model(ids):
        return (tokenizer.decode(id) for id in ids)

    async def _prompt(text: str, send: Send):
        """
        Asynchronously processes the input text and sends back tokens as a streaming response.

        This function takes an input text, tokenizes it using the GPT-2 tokenizer, and then
        uses the simulated model to decode token IDs into strings. It then sends each token
        back to the client as a streaming response, with a delay between tokens to simulate
        the effect of real-time streaming.

        Args:
            text (str): The input text message to be processed.
            send (Send): An asynchronous function that allows sending back the streaming response.

        Usage:
            This function can be adjusted based on the streaming requirements, speed of
            response, or the model being used. Developers can also introduce more sophisticated
            processing steps or modify how tokens are sent back to the client.
        """
        input_ids = tokenizer(text, return_tensors="pt").input_ids.squeeze()
        buffer = []
        N = 4  # Number of tokens to send back to the client at a time
        for token in model(input_ids):
            buffer.append(token)
            # If buffer has N tokens, send them back to the client.
            if len(buffer) == N:
                await asyncio.sleep(0.3) # Sleep just to show streaming effect
                joined_buffer = "".join(buffer)
                await send(
                    {
                        "type": "http.response.body",
                        "body": joined_buffer.encode("utf-8"),
                        "more_body": True,
                    }
                )
                bt.logging.debug(f"Streamed tokens: {joined_buffer}")
                buffer = []  # Clear the buffer for next batch of tokens

        # Send any remaining tokens in the buffer
        if buffer:
            joined_buffer = "".join(buffer)
            await send(
                {
                    "type": "http.response.body",
                    "body": joined_buffer.encode("utf-8"),
                    "more_body": False,  # No more tokens to send
                }
            )
            bt.logging.trace(f"Streamed tokens: {joined_buffer}")

    message = synapse.messages[0]
    token_streamer = partial(_prompt, message)
    return synapse.create_streaming_response(token_streamer)

def blacklist(synapse: StreamPrompting) -> Tuple[bool, str]:
    """
    Determines whether the synapse should be blacklisted.

    Args:
        synapse: A StreamPrompting instance.

    Returns:
        Tuple[bool, str]: Always returns False, indicating that the synapse should not be blacklisted.
    """
    return False, ""

def priority(synapse: StreamPrompting) -> float:
    """
    Determines the priority of the synapse.

    Args:
        synapse: A StreamPrompting instance.

    Returns:
        float: Always returns 0.0, indicating the default priority.
    """
    return 0.0


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Create an Axon instance on port 8099.
axon = bt.axon(port=8099)

# Attach the forward, blacklist, and priority functions to the Axon.
# forward_fn: The function to handle forwarding logic.
# blacklist_fn: The function to determine if a request should be blacklisted.
# priority_fn: The function to determine the priority of the request.
axon.attach(
    forward_fn=prompt,
    blacklist_fn=blacklist,
    priority_fn=priority
)

# Start the Axon to begin listening for requests.
axon.start()

# Create a Dendrite instance to handle client-side communication.
d = bt.dendrite()
d

dendrite(5C86aJ2uQawR6P6veaJQXNK9HaWh6NMbUhTiLs65kq4ZW3NH)

In [3]:
# Send a request to the Axon using the Dendrite, passing in a StreamPrompting instance with roles and messages.
# The response is awaited, as the Dendrite communicates asynchronously with the Axon.
resp = await d(
    [axon],
    StreamPrompting(roles=["user"], messages=["hello this is a test of a streaming response. Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum."])
)

# The response object contains the result of the streaming operation.
resp

[34m2023-09-28 20:17:08.264[0m | [34m[1m     DEBUG      [0m | dendrite | --> | 4377 B | StreamPrompting | 5C86aJ2uQawR6P6veaJQXNK9HaWh6NMbUhTiLs65kq4ZW3NH | 149.137.225.62:8099 | 0 | Success


[34m2023-09-28 20:17:08.470[0m | [34m[1m     DEBUG      [0m | axon     | <-- | 1334 B | StreamPrompting | 5C86aJ2uQawR6P6veaJQXNK9HaWh6NMbUhTiLs65kq4ZW3NH | 127.0.0.1:36262 | 200 | Success 
[34m2023-09-28 20:17:08.670[0m | [34m[1m     DEBUG      [0m | axon     | --> | -1 B | StreamPrompting | 5C86aJ2uQawR6P6veaJQXNK9HaWh6NMbUhTiLs65kq4ZW3NH | 127.0.0.1:36262  | 200 | Success
[34m2023-09-28 20:17:09.109[0m | [34m[1m     DEBUG      [0m | Streamed tokens: hello this is a
[34m2023-09-28 20:17:09.611[0m | [34m[1m     DEBUG      [0m | Streamed tokens:  test of a streaming
[34m2023-09-28 20:17:10.114[0m | [34m[1m     DEBUG      [0m | Streamed tokens:  response. Lorem
[34m2023-09-28 20:17:10.614[0m | [34m[1m     DEBUG      [0m | Streamed tokens:  ipsum d     
[34m2023-09-28 20:17:11.115[0m | [34m[1m     DEBUG      [0m | Streamed tokens: olor sit amet
[34m2023-09-28 20:17:11.616[0m | [34m[1m     DEBUG      [0m | Streamed tokens: , consectet  
[34m2023-09

['hello this is a test of a streaming response. Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis a']

[34m2023-09-28 20:17:20.654[0m | [34m[1m     DEBUG      [0m | Streamed tokens: ute irure d  
[34m2023-09-28 20:17:21.155[0m | [34m[1m     DEBUG      [0m | Streamed tokens: olor in repre
[34m2023-09-28 20:17:21.658[0m | [34m[1m     DEBUG      [0m | Streamed tokens: henderit in  
[34m2023-09-28 20:17:22.161[0m | [34m[1m     DEBUG      [0m | Streamed tokens:  voluptate vel
[34m2023-09-28 20:17:22.663[0m | [34m[1m     DEBUG      [0m | Streamed tokens: it esse c    
[34m2023-09-28 20:17:23.165[0m | [34m[1m     DEBUG      [0m | Streamed tokens: illum dol    
[34m2023-09-28 20:17:23.667[0m | [34m[1m     DEBUG      [0m | Streamed tokens: ore eu fug   
[34m2023-09-28 20:17:24.169[0m | [34m[1m     DEBUG      [0m | Streamed tokens: iat nulla par
[34m2023-09-28 20:17:24.670[0m | [34m[1m     DEBUG      [0m | Streamed tokens: iatur. Except
[34m2023-09-28 20:17:25.173[0m | [34m[1m     DEBUG      [0m | Streamed tokens: eur sint occ 
[34m2023-09-28 20: