# Folding Demo

This demo shows how a registered validator can:
 1) Query the top miner uid 
 2) Demonstrate each reward/penalty mechanism/Scoring of the top miner response
 3) Queries the API and/or links to a frontend(if applicable)

In order to do this we perform the following steps:
1. Checks wandb for currently active pdbs (preferably owned by the users’ hotkey)
2. Uses a registered key to initialize a validator neuron
3. Queries the miner hotkeys with the specified pdb
4. Demonstrates how the responses are scores
5. Plots the best configuration


## Requirements
In order to run this notebook you must meet the following requirements:
1. Have a registered key on SN25 (we use opentensor as an example)
2. Have a wandb account 
3. Have a GROMACS 2024 installed


In [18]:
import os
import wandb
import argparse
import pandas as pd
import bittensor as bt
from pprint import pprint
from inspect import signature

from neurons.validator import Validator
from folding.store import Job
from folding.validators.protein import Protein
from folding.protocol import FoldingSynapse
from folding.validators.reward import get_energies
from folding.utils.ops import get_response_info

WALLET_NAME = 'opentensor'
HOTKEY_NAME = 'main'
SUBTENSOR_NETWORK = 'finney'

parser = argparse.ArgumentParser()
parser.add_argument('--wallet.name', type=str, default=WALLET_NAME)
parser.add_argument('--wallet.hotkey', type=str, default=HOTKEY_NAME)
parser.add_argument('--neuron.axon_off', type=bool, default=True)
config = bt.config(parser=parser)

## Setup the desired wallet

In [19]:
validator = Validator(config=config)

subtensor = validator.subtensor
metagraph = validator.metagraph
wallet = validator.wallet

wallet

full path: /root/.bittensor/miners/opentensor/main/netuid25/validator
[34m2024-06-24 15:10:47.658[0m | [1m      INFO      [0m | 
no_prompt: false
wallet:
  name: opentensor
  hotkey: main
  path: ~/.bittensor/wallets/
subtensor:
  network: finney
  chain_endpoint: wss://entrypoint-finney.opentensor.ai:443
  _mock: false
logging:
  debug: false
  trace: false
  record_log: false
  logging_dir: ~/.bittensor/miners
axon:
  port: 8091
  ip: '[::]'
  external_port: null
  external_ip: null
  max_workers: 10
netuid: 25
neuron:
  device: cpu
  metagraph_resync_length: 100
  epoch_length: 500
  mock: false
  events_retention_size: 2 GB
  dont_save_events: false
  name: validator
  timeout: 45
  update_interval: 300
  num_concurrent_forwards: 1
  queue_size: 10
  sample_size: 10
  disable_set_weights: false
  moving_average_alpha: 0.1
  axon_off: true
  vpermit_tao_limit: 4096
  full_path: /root/.bittensor/miners/opentensor/main/netuid25/validator
mock: false
protein:
  pdb_id: null
  ff: n

wallet(opentensor, main, ~/.bittensor/wallets/)

## Extract information from Wandb --> Pandas.DataFrame

In [29]:
api = wandb.Api(timeout=120)

def load_run(run_path):

    print('Loading run:', run_path)
    run = api.run(run_path)
    df = pd.DataFrame(list(run.scan_history()))
    for col in ['updated_at', 'best_loss_at', 'created_at']:
        if col in df.columns:
            df[col] = pd.to_datetime(df[col])
    print(f'+ Loaded {len(df)} records')
    return df


wandb_project = os.path.join(validator.config.wandb.entity, validator.config.wandb.project_name)
netuid = validator.config.netuid
max_runs = 10
min_steps = 10
hotkey = wallet.hotkey.ss58_address
filters = {'state': 'running', 'config.netuid': netuid, "tags": {"$in": [hotkey]}}

print(f'Searching for runs with filters: {filters}')

# Grab runs on folding wandb
for i, run in enumerate(api.runs(wandb_project, filters=filters)):


    if i >= max_runs:
        break

    num_steps = run.summary.get("_step")
    print(f'run {run}, id: {run.id}, steps: {num_steps}, tags: {run.tags}')
    
    if num_steps is None or num_steps < min_steps:
        continue

    df = load_run('/'.join(run.path))
    version, spec_version, hotkey, netuid_tag, *_ = run.tags
    df['version'] = version
    df['spec_version'] = spec_version
    df['vali_hotkey'] = hotkey
    df['netuid_tag'] = netuid_tag
    df['run_id'] = run.id
    # break # I have this break here to only get 1 run for now

df = df[(df.best_hotkey != 'nan') & (df.best_hotkey != 'None')]
df.reset_index(drop=True, inplace=True)

print(df)

Searching for runs with filters: {'state': 'running', 'config.netuid': 25, 'tags': {'$in': ['5F4tQyWrhfGVcNhoqeiNsR6KjD4wMZ2kfhLj4oHYuyHbZAc3']}}
run <Run opentensor-dev/folding-validators/v7ofulgt (running)>, id: v7ofulgt, steps: 19, tags: ['0.1.0', '100', '5F4tQyWrhfGVcNhoqeiNsR6KjD4wMZ2kfhLj4oHYuyHbZAc3', 'netuid_25']
Loading run: opentensor-dev/folding-validators/v7ofulgt
+ Loaded 20 records
run <Run opentensor-dev/folding-validators/cgw6x0wa (running)>, id: cgw6x0wa, steps: 117, tags: ['0.2.0', '200', '5F4tQyWrhfGVcNhoqeiNsR6KjD4wMZ2kfhLj4oHYuyHbZAc3', 'disable_set_weights', 'netuid_25']
Loading run: opentensor-dev/folding-validators/cgw6x0wa
+ Loaded 118 records
             updated_at                                       best_hotkey  \
0   2024-06-24 12:23:42  5CFRBdz6D3Jcg42btePVkm57DNmF65ksfjaRVH2hXebH1TXw   
1   2024-06-24 12:24:53  5CPsYbVUgggMPnzMJrQFAQi3uBHUKWUhiFBRsSkmuUkjiXkS   
2   2024-06-24 12:25:49  5Dd4x1fQEbbm2qcUSbci9mH3G7PXAQPrE5h3EkxLiz1c7cF3   
3   2024-06-24 

In [30]:
# get most recent event log for my validator hotkey
last_event = df.loc[df._step.argmax()]
print(last_event)

updated_at                                                     2024-06-24 15:11:50
best_hotkey                       5GbPQBNQvBBurei71U8K8fXsvb3zunk64pebphP8x1qHaet3
response_returned_files          [[], [nvt.mdp, nvt.tpr, nvt.log, nvt.xtc, nvt....
_step                                                                          117
is_valid                         [False, True, False, True, True, False, True, ...
validator_search_status                                                       True
pdb_complexity                   [{'REMARK': 350, 'MASTER': 1, 'ANISOU': 3000, ...
active                                                                        True
hotkeys                          [5H8WFHpjYZtfofKB83mQu4x5LfMqwAq73fyAYbimNHdd3...
response_status_codes            [503, 200, 200, 200, 200, 503, 200, 408, 503, ...
reported_energy                  [0, -891420.5, 0, -813456.6875, -813173.0625, ...
md_inputs_sizes                  [2825401, 32637, 32637, 575919, 1172, 575919, ...
chec

In [31]:
# in particular, we want the pdb_id of the last event and the hotkeys assigned to that job
# NOTE: We cannot guarantee that the top miner is actually assigned to the last job

pdb_id = last_event.pdb_id
hotkeys = last_event.hotkeys

rankings = metagraph.I.argsort(descending=True)

uids = [metagraph.hotkeys.index(hotkey) if hotkey in metagraph.hotkeys else None for hotkey in hotkeys]
incentives = [metagraph.I[uid].item() if uid is not None else None for uid in uids]
rankings = [rankings[uid].item() if uid is not None else None for uid in uids]

df_hotkeys = pd.DataFrame({'hotkey': hotkeys, 'uid': uids, 'incentive': incentives, 'ranking': rankings})

print(f'Miners assigned to protein {pdb_id} job:')
df_hotkeys.sort_values('incentive', ascending=False)


Miners assigned to protein 3ohe job:


Unnamed: 0,hotkey,uid,incentive,ranking
1,5GbPQBNQvBBurei71U8K8fXsvb3zunk64pebphP8x1qHaet3,218,0.041428,126
3,5CLys6do8hXvwy1w3k1eiqzNc4Cbr9DgRnsYrEt96UAyFuqS,192,0.002792,28
4,5CcTUwicisy1eJusqPzXPxQVMbicc92GTRboy7xWWSnrSMmQ,127,0.001984,170
5,5HmvXC5okhfaUd6XHsn8Bv5jyC565zj6mid5NZNqa8c2FSPX,101,9.2e-05,140
0,5H8WFHpjYZtfofKB83mQu4x5LfMqwAq73fyAYbimNHdd3ebt,172,0.0,8
2,5EZ7raEJoz6xJPCGDjH1YZ5B5fNXyerZMPTK7hKFfikdb9j7,128,0.0,169
6,5ELSJgvJQGJRTWw5Pzp8wXcuAgcNoEzD4ARwc9YnXU8d9FZo,209,0.0,117
7,5GCd56UXT5KTun8nJgnDCWXZdaET3sYhGc5wQkvuYLmFwrXv,7,0.0,196
8,5GNsBBB3MBT2mLpvTtdHGFGL2JB3uVnMZh28yRQ2QmWHuQGx,134,0.0,164
9,5Dsug8DoFAUqcgDrAoaXwiYApBV3jZDuw6KA5Astx3fD7Mha,0,0.0,254


In [32]:
# Reconstruct the job object from the event log
job = Job(**{k: last_event[k] for k in signature(Job).parameters.keys() if k in last_event})
job


Job(pdb='3ohe', ff='amber03', box='cubic', water='tip3p', hotkeys=['5H8WFHpjYZtfofKB83mQu4x5LfMqwAq73fyAYbimNHdd3ebt', '5GbPQBNQvBBurei71U8K8fXsvb3zunk64pebphP8x1qHaet3', '5EZ7raEJoz6xJPCGDjH1YZ5B5fNXyerZMPTK7hKFfikdb9j7', '5CLys6do8hXvwy1w3k1eiqzNc4Cbr9DgRnsYrEt96UAyFuqS', '5CcTUwicisy1eJusqPzXPxQVMbicc92GTRboy7xWWSnrSMmQ', '5HmvXC5okhfaUd6XHsn8Bv5jyC565zj6mid5NZNqa8c2FSPX', '5ELSJgvJQGJRTWw5Pzp8wXcuAgcNoEzD4ARwc9YnXU8d9FZo', '5GCd56UXT5KTun8nJgnDCWXZdaET3sYhGc5wQkvuYLmFwrXv', '5GNsBBB3MBT2mLpvTtdHGFGL2JB3uVnMZh28yRQ2QmWHuQGx', '5Dsug8DoFAUqcgDrAoaXwiYApBV3jZDuw6KA5Astx3fD7Mha'], created_at=Timestamp('2024-06-24 13:38:06'), updated_at=Timestamp('2024-06-24 15:11:50'), active=True, best_loss=-891420.5, best_loss_at=Timestamp('2024-06-24 14:02:42'), best_hotkey='5GbPQBNQvBBurei71U8K8fXsvb3zunk64pebphP8x1qHaet3', commit_hash='', gro_hash='', update_interval=600.0, updated_count=6.0, max_time_no_improvement=3600.0, min_updates=10.0, epsilon=33935.56529943, event=None)

In [33]:
# Ensure that you have the protein in your local database:
from folding.utils.ops import check_and_download_pdbs

pdb_id = job.pdb + ".pdb"
check_and_download_pdbs(
    pdb_directory=f"./folding/data/{pdb_id}", pdb_id = pdb_id, force = True
)

True

In [34]:
# Reconstruct the protein object from the job
protein = Protein.from_job(job, config=None)
protein

100%|██████████| 1/1 [00:00<00:00, 70.90it/s]


Protein(pdb_id=3ohe, ff=amber03, box=cubic

In [35]:

# Create a synapse to query the network
synapse = FoldingSynapse(
    pdb_id=protein.pdb_id, md_inputs=protein.md_inputs, mdrun_args=""
)

axons = [metagraph.axons[uid] for uid in [40]]

# Make a synchronous to the network with the reconstructed protein
responses = validator.dendrite.query(
    axons=axons,
    synapse=synapse,
    timeout=45,
    deserialize=True,  # decodes the bytestream response inside of md_outputs.
)
responses


[FoldingSynapse(pdb_id='3ohe', md_inputs={'em.gro': 'HISTIDINE TRIAD (HIT) PROTEIN in water\n62785\n    0GLY      N    1   2.376   2.853   3.693\n    0GLY     H1    2   2.287   2.863   3.740\n    0GLY     H2    3   2.408   2.758   3.703\n    0GLY     H3    4   2.439   2.911   3.747\n    0GLY     CA    5   2.375   2.902   3.547\n    0GLY    HA1    6   2.328   3.000   3.547\n    0GLY    HA2    7   2.307   2.834   3.496\n    0GLY      C    8   2.519   2.906   3.473\n    0GLY      O    9   2.596   2.829   3.525\n    2PHE      N   10   2.560   2.976   3.358\n    2PHE      H   11   2.519   2.935   3.275\n    2PHE     CA   12   2.675   3.077   3.318\n    2PHE     HA   13   2.663   3.168   3.377\n    2PHE     CB   14   2.826   3.033   3.332\n    2PHE    HB1   15   2.842   2.944   3.271\n    2PHE    HB2   16   2.847   3.006   3.436\n    2PHE     CG   17   2.935   3.137   3.287\n    2PHE    CD1   18   2.965   3.246   3.371\n    2PHE    HD1   19   2.913   3.253   3.465\n    2PHE    CE1   20   3.0

In [36]:

# For now we just want to get the losses, we are not rewarding yet
energies, energy_event  = get_energies(protein=protein, responses=responses, uids=uids)
response_info = get_response_info(responses=responses)


In [37]:
pprint(f"energies: {energies.tolist()}")
pprint(f"energy_event: {energy_event}")
pprint(f"response_info: {response_info}")

'energies: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]'
("energy_event: {'is_valid': [False, False, False, False, False, False, False, "
 "False, False, False], 'checked_energy': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "
 "'reported_energy': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'rmsds': [0, 0, 0, 0, 0, "
 '0, 0, 0, 0, 0]}')
("response_info: {'response_times': [1.2498421669006348], "
 '\'response_status_messages\': ["Internal server error with error: [Errno 2] '
 'No such file or directory: '
 '\'/root/folding/miner-data/5DRY6eG3/1uhk/1uhk_state.txt\'"], '
 "'response_status_codes': ['500'], 'response_returned_files': [[]], "
 "'response_returned_files_sizes': [[]]}")
