# Overview of the SN25 pipelines for Product

In [115]:
import bittensor as bt
from typing import List, Dict
import requests

import pandas as pd 


# This is a plot of the last part of their simulation but we will need the entire curve. We don't have this yet. 
import plotly.graph_objects as go
import numpy as np

In [9]:
# necessary preprocessing pipelines 

def response_to_dict(response) -> List[Dict]:
    response = response.json()["results"][0]
    if "error" in response.keys():
        raise ValueError(f"Failed to get all PDBs: {response['error']}")
    elif "values" not in response.keys():
        return {}
    columns = response["columns"]
    values = response["values"]
    data = [dict(zip(columns, row)) for row in values]
    return data

# data cols that are present in the GJP
data_columns = [
    "id", 
    "pdb_id", 
    "system_config", 
    "s3_links", 
    "priority", 
    "hotkeys", 
    "is_organic", 
    "active", 
    "update_interval", 
    "max_time", 
    "epsilon", 
    "min_updates", 
    "updated_at", 
    "best_loss", 
    "best_loss_at", 
    "best_hotkey", 
    "updated", 
    "created_at", 
    "best_cpt_links", 
    "job_type", 
    "event", 
    "validator_hotkey", 
    "job_id", 
    "computed_rewards"
]

## Things have changed 

We have changed since the last dashboard but the data is more or less the same, just comes in a different format. We should be entirely reliant on the Global Job Pool (GJP), which is a centralized db that automatically syncs across all miners and validators. 

## The Global Job Pool
Properies: 
1. Centrally hosted by us

    a. Production db: "174.138.3.61:8030"

    b. Test db: "167.99.209.27:4001"

2. queryable using simple http requests 

I will break down the major sections on Tizi's dashboard with the code that is necessary to get the represenative data. 


![title](dashboard/proteins_folded.png)

In [5]:
# Here you can get all jobs from the GJP, but you just need to choose the right db. 
GJP_ADDRESS = "174.138.3.61:8030" #prod 
GJP_ADDRESS = "167.99.209.27:4001" #test 

response = requests.get(
    f"http://{GJP_ADDRESS}/db/query",
    params={
        "q": f"SELECT * FROM jobs"
    },
)


In [7]:
data = response_to_dict(response)
print(len(data))

134


In [11]:
data[0]

{'id': 1,
 'pdb_id': '6u1u',
 'system_config': '{"ff": "amber14-all.xml", "box": "cube", "water": "amber14/tip3pfb.xml", "system_kwargs": {"friction": 1.04, "temperature": 208.61}}',
 's3_links': '{"cpt": "https://ams3.digitaloceanspaces.com/sn25-vali-testnet-bucket/inputs/6u1u/5GRDsru2/2025-03-27_10-10-05/em.cpt", "pdb": "https://ams3.digitaloceanspaces.com/sn25-vali-testnet-bucket/inputs/6u1u/5GRDsru2/2025-03-27_10-10-05/6u1u.pdb"}',
 'priority': '1',
 'hotkeys': '["5FuqdmiQbEGpNY22x7S9M1ZhBXXRxjikt7MaZCrXddccA2mM", "5GH9ndDk587mMsyNx67v8ErS6gtPEdefqnDrh24JGwbUfFDg", "5FvhZkPjC4QCxEZ2NU3GkDLvU7VVxinEbQesXFBy7RUsidjt", "5EPNDHukxfa8RAut9sGy1DF9H8mG1REbiDjLpRwoqKb2eAEq", "5CB3BU3HJKc3ZsVsT7ek3w2s2465tsokFmXceREMCwTmq9xW", "5HL6MGoFnwigReCwKTEg2eYE4TFhtASYCWEPTwpE6wikct45", "5HTNkgyjiNgLu4EmpyztN3pReoTbvx2enx4bo74BytACH5DR", "5C7MN3iuvFu8FG8h5Y9v7vi93SffcDvS33eRQJ2tGuL8ZZtH", "5G69cpZV9gAp6YmK6Bep68QzJHb91wtkxYRZ1jJV4MNx3JdN", "5EjqaAQfxp7vo3ss9Gpovp2nnguu1fVmGw5QfW3Ri8TWfCQZ", "5FeMHoD

As you can see, there are some time columns that you need: 
1. created_at 
2. updated_at --> this is the time that the job was actually closed, since RIGHT NOW we only do only one update. 

In practice, we should also have a "time_closed" param 

But using the above information, you should have what you need to make the above plot. 

![title](dashboard/gjp.png)

You're going to use the same query as above, but you could segement them by job status and merge if you want

In [16]:
response_active = requests.get(
    f"http://{GJP_ADDRESS}/db/query",
    params={
        "q": f"SELECT * FROM jobs WHERE active = 1"
    },
)

response_inactive = requests.get(
    f"http://{GJP_ADDRESS}/db/query",
    params={
        "q": f"SELECT * FROM jobs WHERE active = 0"
    },
)

data_active = response_to_dict(response_active)
data_inactive = response_to_dict(response_inactive)



In [21]:
# Importantly, the KEY column variable to know the job TYPE (organic or synthetic) is job_type.
# Right now, we only have SyntheticMD in the GJP, but eventually we will have more (OrganicMD) 

print(data_active[0]["job_type"])

SyntheticMD


![title](dashboard/completed_tasks.png)

In [30]:
# you will be able to access the pdb files using the s3_links column. 
import json

s3_links = json.loads(data_inactive[0]["s3_links"])
print(s3_links)
print(s3_links["pdb"]) # this is the one you need. 

{'cpt': 'https://ams3.digitaloceanspaces.com/sn25-vali-testnet-bucket/inputs/6u1u/5GRDsru2/2025-03-27_10-10-05/em.cpt', 'pdb': 'https://ams3.digitaloceanspaces.com/sn25-vali-testnet-bucket/inputs/6u1u/5GRDsru2/2025-03-27_10-10-05/6u1u.pdb'}
https://ams3.digitaloceanspaces.com/sn25-vali-testnet-bucket/inputs/6u1u/5GRDsru2/2025-03-27_10-10-05/6u1u.pdb


In [38]:
data_inactive[0]["event"]

'{"best_cpt": [""], "block": 4188999, "box": "cube", "checked_energy": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, [-28057.26301985058, -28150.81920642503, -28265.945266885163, -28118.301797870983, -28110.014946367188, -28249.517962750306, -28214.037254985906, -28088.555596295784, -28067.463264985265, -28160.964403695823, -28214.59234161269, -28125.743353925616, -28172.919390140072, -28113.192224432943, -28229.541239654016, -28124.20001942312, -28121.407950755733, -28193.003879669384, -28246.449667384262, -28209.514537802745, -28162.436353715428, -28199.768814600087, -28294.79144273494, -28238.282856187863, -28179.191655975745, -28116.607310905016, -28115.70854973504, -28357.542939426887, -28157.57872018868, -28136.19577448955, -28086.21098727105, -28155.37212247244, -28127.496515344938, -28087.277638466803, -

In [39]:
# Miner energies can be found 
event = json.loads(data_inactive[0]["event"])
event.keys()
# zip(event["checked_energy"], event["checked_loss"])

# You can also access the pdb files using the following code: 


dict_keys(['best_cpt', 'block', 'box', 'checked_energy', 'energies', 'epsilon', 'ff', 'hp_sample_time', 'hp_tries', 'init_energy', 'input_source', 'is_duplicate', 'is_run_valid_time', 'is_valid', 'job_type', 'md_inputs', 'md_inputs_sizes', 'miner_energy', 'ns_computed', 'pdb_complexity', 'pdb_id', 'process_md_output_time', 'reason', 'reported_energy', 'response_returned_files', 'response_returned_files_sizes', 'response_status_codes', 'response_status_messages', 'response_times', 'rmsds', 's3_links', 'seed', 'step_length', 'system_kwargs', 'uid_search_time', 'uids', 'validator_search_status', 'water'])

In [72]:
data[0]

{'id': 1,
 'pdb_id': '6u1u',
 'system_config': '{"ff": "amber14-all.xml", "box": "cube", "water": "amber14/tip3pfb.xml", "system_kwargs": {"friction": 1.04, "temperature": 208.61}}',
 's3_links': '{"cpt": "https://ams3.digitaloceanspaces.com/sn25-vali-testnet-bucket/inputs/6u1u/5GRDsru2/2025-03-27_10-10-05/em.cpt", "pdb": "https://ams3.digitaloceanspaces.com/sn25-vali-testnet-bucket/inputs/6u1u/5GRDsru2/2025-03-27_10-10-05/6u1u.pdb"}',
 'priority': '1',
 'hotkeys': '["5FuqdmiQbEGpNY22x7S9M1ZhBXXRxjikt7MaZCrXddccA2mM", "5GH9ndDk587mMsyNx67v8ErS6gtPEdefqnDrh24JGwbUfFDg", "5FvhZkPjC4QCxEZ2NU3GkDLvU7VVxinEbQesXFBy7RUsidjt", "5EPNDHukxfa8RAut9sGy1DF9H8mG1REbiDjLpRwoqKb2eAEq", "5CB3BU3HJKc3ZsVsT7ek3w2s2465tsokFmXceREMCwTmq9xW", "5HL6MGoFnwigReCwKTEg2eYE4TFhtASYCWEPTwpE6wikct45", "5HTNkgyjiNgLu4EmpyztN3pReoTbvx2enx4bo74BytACH5DR", "5C7MN3iuvFu8FG8h5Y9v7vi93SffcDvS33eRQJ2tGuL8ZZtH", "5G69cpZV9gAp6YmK6Bep68QzJHb91wtkxYRZ1jJV4MNx3JdN", "5EjqaAQfxp7vo3ss9Gpovp2nnguu1fVmGw5QfW3Ri8TWfCQZ", "5FeMHoD

In [94]:
# Here you will get: 
# 1. energies, which is the miner's final energy
# 2. miner_energy curve, which is the energy of the miner at each step (miner reported)
# 3. checked_energy curve, which is the energy of the miner at each step (validator reprod)
# 4. computed_rewards, which is the reward of the miner at each step (validator distributed)

sorted_data = sorted(
    zip(event["energies"], event["miner_energy"], event["checked_energy"], json.loads(data_inactive[0]["computed_rewards"])),
    key=lambda x: x[3], # sort by computed rewards
    reverse=True
)

# This is important because you will also need the top K results to plot the top K results in an other panel. 
sorted_data

[(-28196.950248104404,
  [-28057.263019850587,
   -28150.819206425043,
   -28265.94526688516,
   -28118.30179787098,
   -28110.0149463672,
   -28249.51796275031,
   -28214.03725498589,
   -28088.555596295795,
   -28067.463264985272,
   -28160.964403695834,
   -28214.5923416127,
   -28125.743353925616,
   -28172.919390140065,
   -28113.19222443293,
   -28229.541239654023,
   -28124.20001942313,
   -28121.407950755733,
   -28193.003879669384,
   -28246.449667384262,
   -28209.514537802737,
   -28162.436353715428,
   -28199.76881460008,
   -28294.79144273493,
   -28238.282856187878,
   -28179.19165597574,
   -28116.60731090504,
   -28115.708549735053,
   -28357.542939426887,
   -28157.578720188674,
   -28136.195774489544,
   -28086.210987271035,
   -28155.372122472447,
   -28127.49651534494,
   -28087.27763846679,
   -28014.41306516008,
   -28102.806353967542,
   -28096.46580235456,
   -28113.984094553707,
   -28248.235918761853,
   -28195.605887818543,
   -28184.11065940623,
   -28167.34

In [126]:
print(sorted_data[0][0]) # THIS IS THE FINAL MINER ENERGY FOR THE BEST MINER. 
print(sorted_data[1][0]) # THIS IS THE FINAL MINER ENERGY FOR THE SECOND BEST MINER. 
#....

-28196.950248104404
0.0


![title](dashboard/energy_vs_time.png)

In [129]:
# This is an example of plotting the miner and checked energy curves for the FIRST (best) miner. 
example_label_data = {key:sorted_data[0][ii]  for ii, key in enumerate(["final_miner_energy", "miner_energy_curve", "checked_energy_curve", "computed_rewards"])}

fig = go.Figure()
# Add each line as a separate trace
fig.add_trace(go.Line(x = np.arange(len(example_label_data["miner_energy_curve"])), y = example_label_data["miner_energy_curve"], name='Miner Energy'))
fig.add_trace(go.Line(x = np.arange(len(example_label_data["checked_energy_curve"])), y = example_label_data["checked_energy_curve"], name='Checked Energy'))
fig.update_layout(title="Energy Curves for a SMALL SEGMENT OF THE TOTAL MINER ENERGY CURVE", xaxis_title="Step", yaxis_title="Energy")


## important: The above energy curve is simply a short REPRODUCED segement vs the miner's self reported energy. This is the reproduced energy lines that Tizi has

### We need to start saving the miner energy logs.... I thought we did but looks like im mistaken 