In [1]:
! pip install -e .

Obtaining file:///Users/const/Workspace/bittensor
Collecting argparse
  Using cached argparse-1.4.0-py2.py3-none-any.whl (23 kB)




Installing collected packages: argparse, bittensor
  Attempting uninstall: bittensor
    Found existing installation: bittensor 1.0.4
    Uninstalling bittensor-1.0.4:
      Successfully uninstalled bittensor-1.0.4
  Running setup.py develop for bittensor
Successfully installed argparse-1.4.0 bittensor


In [2]:
import bittensor
import os
import torch
import time
import torch.multiprocessing as mp 
import queue
import nest_asyncio 
nest_asyncio.apply()

In [6]:
# WALLET: Keys for running your miner and controlling funds.

# *****
# IMPORTANT: Store the mnemonic for **both** your hot and coldkey 
# you will need these if you want to recover the files off this machine.
# ******

# Fill in below to name your wallet and keys.
YOUR_WALLET_NAME = 'colab'
YOUR_HOTKEY_NAME = 'colab_hot'

# Fill in below if your need to regenerate your keys.
use_mnemonic = True # Set to true for key regeneration.
coldkey_mnemonic = "fancy catalog grant scatter summer minute luxury gym spot taxi theme initial"
hotkey_mnemonic = "away canvas cost drip soldier retreat match choice inject envelope grit asset"

# Create the wallet object.
wallet = bittensor.wallet.Wallet(
    path = "~/.bittensor/wallets/",
    name = YOUR_WALLET_NAME,
    hotkey = YOUR_HOTKEY_NAME
)

# Optionally regens/creates your wallet keys.
if not wallet.has_coldkeypub:
    if use_mnemonic:
        wallet.regenerate_coldkey(mnemonic = coldkey_mnemonic, use_password = True)
    else:
        wallet.create_new_coldkey(n_words = 12, use_password = True )
if not wallet.has_hotkey:
    if use_mnemonic:
        wallet.regenerate_hotkey(mnemonic = hotkey_mnemonic, use_password = True)
    else:
        wallet.create_new_hotkey(n_words = 12)


[32mLoaded coldkey.pub: 0x3c9cd1679888e5660b0c8e4b8a17a1719c0cb7f05b5c624a856b421b52290515[0m
[32mLoaded hotkey: 0x80cacfbdf7b155b39de22680a7cb14c61a8f95df702c92f5f142d25cca37c545[0m


In [12]:
# Subtensor: Connection to the blockchain
# Needed for token transfers and querying network state.
subtensor = bittensor.subtensor.Subtensor(
    wallet = wallet, # Used for signing transactions.
    network = 'kusanagi' # 'akira' for testnet, 'kusanagi' for mainnet
)

# Metagraph: Chain state object.
metagraph = bittensor.metagraph.Metagraph(
    wallet = wallet, # Identifies yourself.
    subtensor = subtensor # Chain connection for polling state. 
)

# Dendrite: Tensor RPC client.
dendrite = bittensor.dendrite.Dendrite(
    wallet = wallet, # Used for signing RPC requests.
    metagraph = metagraph, # Used for identifying peers.
)

# Axon: Tensor RPC server.
axon = bittensor.axon.Axon(
    wallet = wallet, # Used for signing RPC responses.
    metagraph = metagraph, # Used for identifying peers.
)

neuron = bittensor.neuron.Neuron(
    wallet = wallet,
    metagraph = metagraph,
    dendrite = dendrite,
    axon = axon,
)


INFO    |bittensor.axon:check_config:198 - UPNPC: OFF
INFO    |bittensor.axon:check_config:201 - Using external endpoint: 132.191.3.149:8091
INFO    |bittensor.axon:check_config:202 - Using local endpoint: 127.0.0.1:8091
INFO    |bittensor.neuron:stop:181 - Shutting down the Axon server ...
INFO    |bittensor.neuron:stop:184 - Axon server stopped


In [24]:
# The Model.
import argparse
import random
import torch
import torch.nn.functional as F

from transformers import GPT2Config, GPT2Model
from torch import nn
from munch import Munch
from types import SimpleNamespace

import bittensor
from routers.pkm import PKMRouter

class GPT2Pooler(nn.Module):

    def __init__(self, config):
        super().__init__()
        self.dense = nn.Linear(config.n_embd, config.n_embd)
        self.activation = nn.Tanh()

    def forward(self, hidden_states):
        # We "pool" the model by simply taking the hidden state corresponding
        # to the first token.
        first_token_tensor = hidden_states[:, 0]
        pooled_output = self.dense(first_token_tensor)
        pooled_output = self.activation(pooled_output)
        return pooled_output

class GPT2LMNucleus(bittensor.nucleus.Nucleus):
    """ A Bittensor Synapse training GPT2 with Causal Language Modelling (CLM)
    """
    def __init__(self, config: Munch = None, **kwargs):
        r""" Init a new GPT2 synapse module.

            Args:
                config (:obj:`munch.Munch`, `required`): 
                    munched config class.
        """
        super(GPT2LMNucleus, self).__init__(config = config, **kwargs)
        if config == None:
            config = GPT2LMNucleus.default_config()
        bittensor.config.Config.update_with_kwargs(config.synapse, kwargs) 
        GPT2LMNucleus.check_config(config)
        self.config = config

        # Build huggingface config.
        huggingface_config = GPT2Config(
                vocab_size=bittensor.__vocab_size__, 
                n_embd=bittensor.__network_dim__,
                n_layer=config.synapse.n_layer,
                n_head=config.synapse.n_head, 
                n_inner=config.synapse.n_inner, 
                activation_function=config.synapse.activation_function, 
                resid_pdrop=config.synapse.resid_pdrop, 
                embd_pdrop=config.synapse.embd_pdrop, 
                attn_pdrop=config.synapse.attn_pdrop, 
                layer_norm_epsilon=config.synapse.layer_norm_epsilon, 
                initializer_range=config.synapse.initializer_range, 
                summary_type=config.synapse.summary_type, 
                summary_use_proj=config.synapse.summary_use_proj, 
                summary_activation=config.synapse.summary_activation, 
                summary_proj_to_labels=config.synapse.summary_proj_to_labels, 
                summary_first_dropout=config.synapse.summary_first_dropout, 
        )

        # encoder_layer: encodes tokenized sequences to network dim.
        # [batch_size, sequence_len] -> [batch_size, sequence_len, bittensor.__network_dim__]
        self.transformer = GPT2Model(huggingface_config)

        # pooler_layer: pools the hidden units for use by the pkm dendrite rpc query.
        # [batch_size, bittensor.__network_dim__, sequence_len] -> [batch_size, bittensor.__network_dim__]
        self.pooler = GPT2Pooler(huggingface_config)

        # router: (PKM layer) queries network using pooled embeddings as context.
        # [batch_size, bittensor.__network_dim__] -> topk * [batch_size, bittensor.__network_dim__]
        self.router = PKMRouter(config, query_dim = bittensor.__network_dim__)

        # hidden_layer: transforms context and encoding to network_dim hidden units.
        # [batch_size, sequence_dim, 2 * bittensor.__network_dim__] -> [batch_size, sequence_len, bittensor.__network_dim__]
        self.hidden_layer = nn.Linear( bittensor.__network_dim__, bittensor.__network_dim__ )

        # target_layer: maps from hidden layer to vocab dimension for each token. Used by MLM loss.
        # [batch_size, sequence_len, bittensor.__network_dim__] -> [batch_size, sequence_len, bittensor.__vocab_size__]
        self.target_layer = nn.Linear( bittensor.__network_dim__, bittensor.__vocab_size__, bias=False )
        
        # Loss function: MLM cross-entropy loss.
        # predicted: [batch_size, sequence_len, 1], targets: [batch_size, sequence_len, 1] -> [1]
        self.loss_fct = nn.CrossEntropyLoss()

        self.to(self.device)

    @staticmethod   
    def default_config() -> Munch:
        parser = argparse.ArgumentParser(); 
        GPT2LMNucleus.add_args(parser) 
        config = bittensor.config.Config.to_config(parser); 
        return config

    @staticmethod
    def add_args(parser: argparse.ArgumentParser):    
        r""" Add custom params to the parser.
        """
        parser.add_argument('--synapse.n_head', default=1, type=int, 
                            help='Number of attention heads for each attention layer in the Transformer encoder.')
        parser.add_argument('--synapse.n_layer', default=2, type=int, 
                            help='Number of hidden layers in the Transformer encoder.')
        parser.add_argument('--synapse.n_inner', default=8, type=int, 
                            help='The dimensionality of the inner feed-forward layers. :obj:`None` will set it to 4 times n_embd')
        parser.add_argument('--synapse.activation_function', default='gelu_new', type=str, 
                            help='Activation function, to be selected in the list :obj:`["relu", "silu", "gelu", "tanh", "gelu_new"]')
        parser.add_argument('--synapse.resid_pdrop', default=0.1, type=float, 
                            help='GPT residual dropout probabilit.')
        parser.add_argument('--synapse.embd_pdrop', default=0.1, type=float, 
                            help='GPT embedding dropout probability.')
        parser.add_argument('--synapse.attn_pdrop', default=0.1, type=float, 
                            help='GPT attention dropout probability.')
        parser.add_argument('--synapse.layer_norm_epsilon', default=1e-05, type=float, 
                            help='GPT the epsilon to use in the layer normalization layers')
        parser.add_argument('--synapse.summary_type', default='cls_index', type=str, 
                            help='Supply a Tensor of classification token position (like GPT/GPT-2).')
        parser.add_argument('--synapse.initializer_range', default=0.02, type=float, 
                            help='The standard deviation of the truncated_normal_initializer for initializing all weight matrices.')
        parser.add_argument('--synapse.summary_use_proj', default=True, type=bool, 
                            help='Whether or not to add a projection after the vector extraction.')
        parser.add_argument('--synapse.summary_activation', type=str, 
                            help='Pass "tanh" for a tanh activation to the output, any other value will result in no activation.')
        parser.add_argument('--synapse.summary_proj_to_labels', default=True, type=bool, 
                            help='Whether the projection outputs should have config.num_labels or config.hidden_size classes.')
        parser.add_argument('--synapse.summary_first_dropout', default=0.1, type=float, 
                            help='The dropout ratio to be used after the projection and activation.')
        parser.add_argument('--synapse.n_block_filter', default=100, type=int, help='Stale neurons are filtered after this many blocks.')
        PKMRouter.add_args(parser)

    @staticmethod
    def check_config(config: Munch):
        pass

    def forward_text(self, inputs: torch.LongTensor):
        """ Local forward inputs through the MLM GPT Synapse.

            Args:
                inputs (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_len)`, `required`): 
                    Batch_size length list of tokenized sentences.
            
            Returns:
                hidden (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_len, bittensor.__network_dim__)`, `required`): 
                    Hidden layer representation produced using the local_context.
        """
        hidden = self.local_forward(inputs=inputs.to(self.device), training = False).local_hidden
        return hidden

    def local_forward(self, inputs: torch.LongTensor, training: bool = True) -> SimpleNamespace:
        r""" Forward pass through GPT synapse.

            Args:
                inputs (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_len)`, `required`): 
                    Batch_size length list of text sentences.

                training (:obj:`bool')`, `optional`, defaults to True):
                    Switch to True if this forward pass computes an MLM loss.

            SimpleNamespace {
                    local_context (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_len, bittensor.__network_dim__)`, `required`):
                        Hidden layer context.

                    local_hidden (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_len, bittensor.__network_dim__)`, `required`):
                        Hidden layer encoding produced using local_context.

                    local_target (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_len, bittensor.__vocab_size__)`, `optional`):
                        GPT MLM Target predictions produced using local_context. 

                    local_target_loss (:obj:`torch.FloatTensor` of shape :obj:`(1)`, `optional`): 
                        GPT MLM loss using local_context.
                }
        """
        inputs = torch.clamp(inputs, 0, bittensor.__vocab_size__) # Filter out of range tokens.

        # Return vars to be filled.
        output = SimpleNamespace()
        
        # local_context: distilled version of remote_context.
        # local_context.shape = [batch_size, sequence_len, bittensor.__network_dim__]
        output.local_context = self.transformer(input_ids=inputs, return_dict=True).last_hidden_state

        # local_hidden: hidden layer encoding of sequence with local_context.
        # local_hidden.shape = [batch_size, sequence_len, bittensor.__network_dim__]
        output.local_hidden = self.hidden_layer(output.local_context)

        if training:
            # local_target: projection of local_hidden onto target dimension.
            # local_target.shape = [batch_size, sequence_len, bittensor.__vocab_size__]
            output.local_target = self.target_layer(output.local_hidden)

            # local_target_loss: MLM loss between local_target and passed targets.
            # local_target_loss.shape = [1]
            shift_logits = output.local_target[..., :-1, :].contiguous()
            shift_labels = inputs[..., 1:].contiguous()
            output.local_target_loss = self.loss_fct(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1))
                   
        return output

    def remote_forward(self, neuron: bittensor.neuron.Neuron, inputs: torch.LongTensor, training: bool) -> SimpleNamespace:
        """ Forward pass inputs and labels through the GPT2 module.


        Args:
            neuron (:obj: `bittensor.neuron.Neuron`, `required`):
                    Bittensor neuron, used for making queries to the remote network.

            inputs (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_len)`, `required`): 
                    Batch_size length list of text sentences.

            training (:obj:`bool')`, `optional`, defaults to True):
                Switch to True if this forward pass computes an MLM loss.

        Returns:
            self.local_forward() + SimpleNamespace ( 

                    remote_hidden (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_len, bittensor.__network_dim__)`, `optional`): 
                        Hidden layer encoding produced using the remote_context.

                    remote_target (:obj:`torch.FloatTensor` of shape :obj:`(batch_size,  bittensor.__vocab_size__)`, `optional`):
                        GPT MLM Target predictions using the remote_context.

                    remote_target_loss (:obj:`torch.FloatTensor` of shape :obj:`(1)`, `optional`):
                        GPT MLM loss using the remote_context.

                    distillation_loss (:obj:`torch.FloatTensor` of shape :obj:`(1)`, `optional`): 
                        Distillation loss between local_context and remote_context.

                    router (:obj:`SimpleNamespace`, `required`): 
                        Outputs from the pkm dendrite.
            )
        """
        inputs = torch.clamp(inputs, 0, bittensor.__vocab_size__) # Filter out of range tokens.

        # Run the local model.
        # output = SimpleNamespace
        output = self.local_forward(inputs, training)

        # pooled: pooled hidden layer from local run, used as our query context.
        # pooled.shape = [batch_size, bittensor.__network_dim__]
        pooled = self.pooler(output.local_hidden.detach())

        # remote_context: joined responses from a dendrite.forward_text call.
        # remote_context.shape = [batch_size, sequence_len, bittensor.__network_dim__]
        output.router = self.router.forward_text(neuron, inputs.to(self.device), pooled)
        remote_context = output.router.response

        # distillation_loss: distillation loss between local_context and remote_context
        # distillation_loss.shape = [1]
        output.distillation_loss = F.mse_loss(output.local_context, remote_context.detach())

        # remote_hidden: hidden layer encoding using remote_context.
        # remote_hidden.shape = [batch_size, sequence_len, bittensor.__network_dim__]
        output.remote_hidden = self.hidden_layer(remote_context)

        if training:
            # remote_target: projection of remote_hidden onto target dimension.
            # remote_target.shape = [batch_size, sequence_len, bittensor.__vocab_size__]
            output.remote_target = self.target_layer(output.remote_hidden)

            # remote_target_loss: MLM loss between remote_target and passed targets.
            # remote_target_loss.shape = [1]
            shift_logits = output.remote_target[..., :-1, :].contiguous()
            shift_labels = inputs[..., 1:].contiguous()
            output.remote_target_loss = self.loss_fct(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1))
        
        return output




In [19]:
def forward_process( 
        running_event: mp.Event, 
        model: 'bittensor.synapse.Synapse',
    ):
    # Forward process: Answers Forward queries on this endpoint. 
    while not running_event.is_set():
        try:
            # Pull next query
            print ('got entry')
            pong, pubkey, inputs, modality = axon.forward_queue.get(block=True, timeout=5.0)
            print ('got request', pubkey, inputs.shape)

            # Process request:
            try:
                outputs = model.forward_text( inputs )
                pong.send( outputs )
                print ('sent response', pubkey, outputs.shape)
            except:
                pong.send(None)
        except queue.Empty:
            continue

In [25]:
# Subscribing your endpoint.
success = subtensor.subscribe(
                axon.config.axon.external_ip, 
                axon.config.axon.external_port,
                neuron.config.neuron.modality,
                wallet.coldkeypub,
                wait_for_finalization = True,
                timeout = 4 * bittensor.__blocktime__,
)

[32mAlready subscribed with [ip: 132.191.3.149, port: 8091, modality: 0, coldkey: 0x3c9cd1679888e5660b0c8e4b8a17a1719c0cb7f05b5c624a856b421b52290515][0m


In [26]:
axon.start()
model = GPT2LMNucleus()

ERROR   |bittensor.axon:_serve:496 - Cannot start already-started server!


NameError: name 'GPT2LMSynapse' is not defined

In [22]:
print ('Constumer: Started...')
event = mp.Event()
x = mp.Process( target=forward_process, args=(event, model), daemon=False)
x.start()

Constumer: Started...
got entry
got entry
got entry
got entry
got entry
got entry
got entry
got entry
got entry
got entry
got entry
got entry
got entry
got entry
got entry
got entry
got entry
got entry
got entry
got entry
got entry
got entry
got entry
got entry
got entry
got entry
got entry


In [24]:
event.set()
x.join()

In [None]:
# Simple training architecture.
from typing import Tuple
import random

def train( model: 'torch.nn.Module' )

# Training params.
n_steps = 1000
batch_size = 100
learning_rate = 0.01
momentum = 0.99

# Model and optimizer.
tokenizer = bittensor.__tokenizer__()
model = PoemSentimentClassifier()
optimizer = torch.optim.SGD( model.parameters(), lr = learning_rate, momentum = momentum)
loss_function = torch.nn.CrossEntropyLoss(ignore_index=-1)

# Batch iterator: Produces random tokenized batches from the poem dataset.
def next_batch(batch_size: int, dataset, tokenizer) -> Tuple[torch.LongTensor, torch.LongTensor]:
  inputs = []
  targets = []
  for i in range(batch_size):
    random_index = random.randint(0, len(dataset)-1)
    inputs.append( dataset[random_index]['verse_text'] )
    targets.append( dataset[random_index]['label'] )
  inputs = tokenizer(inputs, return_tensors='pt', padding=True, truncation=True)['input_ids']
  targets = torch.tensor( targets, dtype=torch.int64 )
  return inputs, targets
  
# Training loop:
for batch_index in range(n_steps):
  inputs, targets = next_batch(batch_size, dataset['train'], tokenizer)
  logits = model( inputs )
  loss = loss_function( logits.view(-1, 4), targets )
  loss.backward()
  optimizer.step()
  optimizer.zero_grad()
  print ('step: ', batch_index, ' loss: ', loss.item())

            

In [None]:
neuron = bittensor.proto.Neuron(
    address = '127.0.0.1',
    port = 8091,
    public_key = wallet.hotkey.public_key
)
print ('Endpoint: ', neuron)


def producer():
    while True:
        print ('Procducer: Sending message...')
        tensor = torch.tensor([[1,2]], dtype=torch.int64)

        responses, codes = dendrite.forward_text(
            [neuron],
            [tensor]
        )
        print ('Procducer: Response code:', codes[0])

def consumer():
    while True:
        try:
            print ('Consumer: waiting on axon queue')
            pong, pubkey, inputs, modality = axon.forward_queue.get(block=True, timeout=120.0)
            print ('Consumer: inputs', type(inputs))
            print (inputs.shape)
            pong.send( torch.zeros([1,2, 512]) )
        except queue.Empty:
            print ('Consumer: done waiting')

print ('Constumer: Started...')
x = mp.Process( target=consumer, daemon=False)
x.start()

print ('Prodcuer: Started...')
y = mp.Process( target=producer, daemon=False)
y.start()

print ('Consumer: Waiting for join.')
x.join()
print ('Consumer: Joined')

print ('Producer: Waiting for join')
y.join()
print ('Producer: Joined')

In [None]:
import torch.nn as nn
import torch.nn.functional as F
from transformers import GPT2Config, GPT2Model
from types import SimpleNamespace


class Pooler(nn.Module):
    def __init__(self):
        super(Pooler, self).__init__()
        self.dense = nn.Linear(bittensor.__network_dim__, bittensor.__network_dim__)
        self.activation = nn.Tanh()

    def forward(self, x: torch.FloatTensor):
        # Take last sequence encoding as the sentence's representation.
        first_representation = x[:, -1]
        pooled_output = self.dense(first_representation)
        pooled_output = self.activation(pooled_output)
        return pooled_output

class PoemSentimentClassifier(nn.Module):
    def __init__(self):
        super().__init__()

        # For projecting sequences of representations into a single represenation.
        self.pooler = Pooler()

        # A Feedforward dense layer.
        self.hidden = nn.Linear(bittensor.__network_dim__, bittensor.__network_dim__)
        
        # For projecting our learned feature space onto the target dimension.
        self.target = nn.Linear(bittensor.__network_dim__, 4)
      
        # GPT-distillation model for extracting knowledge from the network.
        huggingface_config = GPT2Config(
                vocab_size=bittensor.__vocab_size__, 
                n_embd=bittensor.__network_dim__,
        )
        self.student = GPT2Model(huggingface_config)
        
    def forward_text(self, x: torch.LongTensor) -> torch.FloatTensor:
        
        # Return local hidden output.
        return self.local_forward( x ).local_hidden
    
        
    def remote_forward(self, x: torch.LongTensor) -> SimpleNamespace:
        output = SimpleNamespace()
        
        # Remote model.
        network_query = [ x for _ in metagraph.neurons]
        responses, _ = dendrite.forward_text( metagraph.neurons, network_query )
        output.remote_context = torch.mean(torch.stack(responses, dim=2), dim=2) # Average responses.
 
        # Distillation model.
        output.local_context = self.student( x )
        
        # Pooling layer:
        # context -> pooled
        # [batch_size, sequence_len, network_dim] -> [batch_size, network_dim]
        output.local_pooled = self.pooler( local_context )
        output.remote_pooled = self.pooler( remote_context )
        output.distillation_loss = F.mse_loss(output.local_pooled, output.remote_pooled.detach())
        
        # Hidden layer
        # [batch_size, network_dim] -> [batch_size, network_dim]
        output.local_hidden = self.hidden( output.local_pooled )
        output.remote_hidden = self.hidden( output.remote_pooled )
        
        # Target Layer
        output.local_logits = self.target( output.local_hidden )
        output.remote_logits = self.target( output.remote_hidden )
        
        # Softmax output
        output.local_outputs = F.softmax( output.local_logits, dim=1 )
        output.remote_outputs = F.softmax( output.remote_logits, dim=1 )
        
        return output       
    
    
    def local_forward(self, x: torch.LongTensor) -> SimpleNamespace:

        # Distillation model.
        output.local_context = self.student( x )
        
        # Pooling layer.
        output.local_pooled = self.pooler( output.local_context )
        
        # Hidden layer
        output.local_hidden = self.hidden( output.local_pooled )
        
        # Target Layer
        output.local_logits = self.target( output.local_hidden )
        
        # Softmax output
        output.local_outputs = F.softmax( output.local_logits, dim=1 )
        
        return output 

    