## FABlib API References Examples

- [fablib.show_config](https://fabric-fablib.readthedocs.io/en/latest/fablib.html#fabrictestbed_extensions.fablib.fablib.FablibManager.show_config)
- [fablib.list_sites](https://fabric-fablib.readthedocs.io/en/latest/fablib.html#fabrictestbed_extensions.fablib.fablib.FablibManager.list_sites)
- [fablib.list_hosts](https://fabric-fablib.readthedocs.io/en/latest/fablib.html#fabrictestbed_extensions.fablib.fablib.FablibManager.list_hosts)
- [fablib.new_slice](https://fabric-fablib.readthedocs.io/en/latest/fablib.html#fabrictestbed_extensions.fablib.fablib.FablibManager.new_slice)
- [slice.add_node](https://fabric-fablib.readthedocs.io/en/latest/slice.html#fabrictestbed_extensions.fablib.slice.Slice.add_node)
- [slice.submit](https://fabric-fablib.readthedocs.io/en/latest/slice.html#fabrictestbed_extensions.fablib.slice.Slice.submit)
- [slice.get_nodes](https://fabric-fablib.readthedocs.io/en/latest/slice.html#fabrictestbed_extensions.fablib.slice.Slice.get_nodes)
- [slice.list_nodes](https://fabric-fablib.readthedocs.io/en/latest/slice.html#fabrictestbed_extensions.fablib.slice.Slice.list_nodesß)
- [slice.show](https://fabric-fablib.readthedocs.io/en/latest/slice.html#fabrictestbed_extensions.fablib.slice.Slice.show)
- [node.execute](https://fabric-fablib.readthedocs.io/en/latest/node.html#fabrictestbed_extensions.fablib.node.Node.execute)
- [slice.delete](https://fabric-fablib.readthedocs.io/en/latest/slice.html#fabrictestbed_extensions.fablib.slice.Slice.delete) 

In [1]:
import datetime
import json
import asyncio

from fabrictestbed_extensions.fablib.fablib import FablibManager as fablib_manager

fablib = fablib_manager()

fablib.show_config();

User: m.r.dijk@student.vu.nl bastion key is valid!
Configuration is valid


0,1
Orchestrator,orchestrator.fabric-testbed.net
Credential Manager,cm.fabric-testbed.net
Core API,uis.fabric-testbed.net
Artifact Manager,artifacts.fabric-testbed.net
Token File,/home/fabric/.tokens.json
Project ID,49f65ad7-d8a2-4ab9-8ca0-ba777a2e0ea2
Bastion Host,bastion.fabric-testbed.net
Bastion Username,m_r_dijk_0000315553
Bastion Private Key File,/home/fabric/work/fabric_config/fabric_bastion_key
Slice Public Key File,/home/fabric/work/fabric_config/slice_key.pub


In [2]:
slice_name = 'DYNAMOS-on-FABRIC'
image = "default_ubuntu_24"

# Please adhere to the following regex for naming: /[a-z][a-z0-9]+/

node_configurations = [
    {
        "type": "control",
        "cores": 2,
        "ram": 8,
        "disk": 20,
        "site": "AMST",
        "host": "amst-w3.fabric-testbed.net",
    },
    {
        "type": "dynamos",
        "cores": 4,
        "ram": 16,
        "disk": 20,
        "site": "AMST",
        "host": "amst-w3.fabric-testbed.net",
    },
    {
        "type": "agent",
        "name": "server",
        "cores": 4,
        "ram": 16,
        "disk": 20,
        "site": "AMST",
        "host": "amst-w3.fabric-testbed.net",
    },
    {
        "type": "agent",
        "name": "clientone",
        "cores": 4,
        "ram": 16,
        "disk": 20,
        "site": "LOSA",
        "host": "losa-w3.fabric-testbed.net",
    },
    {
        "type": "agent",
        "name": "clienttwo",
        "cores": 4,
        "ram": 16,
        "disk": 20,
        "site": "AMST",
        "host": "amst-w3.fabric-testbed.net",
    },
    {
        "type": "agent",
        "name": "clientthree",
        "cores": 4,
        "ram": 16,
        "disk": 20,
        "site": "TOKY",
        "host": "toky-w3.fabric-testbed.net",
    }
]

sites = list(set([configuration["site"] for configuration in node_configurations]))
agents = [configuration["name"] for configuration in node_configurations if configuration["type"] == "agent"]
thirdparties = [configuration["name"] for configuration in node_configurations if configuration["type"] == "thirdparty"]

def create_node(slice, configuration):
    if (configuration["type"] == "control"): 
        configuration["name"] = "control"

    if (configuration["type"] == "dynamos"): 
        configuration["name"] = "dynamos"
    
    return slice.add_node(name=configuration["name"], 
                          site=configuration["site"], 
                          host=configuration["host"], 
                          cores=configuration["cores"], 
                          ram=configuration["ram"], 
                          disk=configuration["disk"], 
                          validate=True, 
                          raise_exception=True, 
                          image=image)
    

In [7]:
# Create a slice
slice = fablib.new_slice(name=slice_name)

# Add Nodes with the specific variables
# Also validate the node can be created and raise an exception in case of failure
print('Adding nodes...')
nodes = [create_node(slice, configuration) for configuration in node_configurations]
nodes_per_site = [
    (site, [node for node in nodes if node.get_site() == site])
    for site in sites
]

print('Adding network interfaces...')
interfaces_per_site = [
    (site, [node.add_component(model='NIC_Basic', name='NIC').get_interfaces()[0] for node in nodes])
    for (site, nodes) in nodes_per_site
]

print('Adding network...')
networks = [
    slice.add_l3network(name=f'Network-{site}', interfaces=interfaces, type="IPv4")
    for (site, interfaces) in interfaces_per_site
]

print(networks, [n.get_gateway() for n in networks], [n.get_subnet() for n in networks])

# Calculate the lease end time for 2 weeks from now with timezone information
lease_end_time = datetime.datetime.now(datetime.timezone.utc) + datetime.timedelta(weeks=2)

# Submit the slice, using an end date 2 weeks from now (the current maximum lease time) 
# to make sure that the slice can be used for a longer period of time. Progress shows an indicator of the current progression.
# Wait until the state is finished and use an interval (it may take some time before the slice and nodes are created)
print('Creating slice...')
slice.submit(wait=True, wait_timeout=3600, wait_interval=20, progress=True, wait_jupyter='text', lease_end_time=lease_end_time);


Retry: 11, Time: 308 sec


0,1
ID,54d59c33-9747-4ac2-87fb-6c77d71988b7
Name,DYNAMOS-on-FABRIC
Lease Expiration (UTC),2025-10-13 08:46:01 +0000
Lease Start (UTC),2025-09-29 08:46:01 +0000
Project ID,49f65ad7-d8a2-4ab9-8ca0-ba777a2e0ea2
State,StableOK
Email,m.r.dijk@student.vu.nl
UserId,562513f3-8663-474a-9a7c-9a9d4ace8429


ID,Name,Cores,RAM,Disk,Image,Image Type,Host,Site,Username,Management IP,State,Error,SSH Command,Public SSH Key File,Private SSH Key File
d2b0e044-22a1-4d21-8ee9-e4dd61d45a92,clientone,4,16,100,default_ubuntu_24,qcow2,losa-w3.fabric-testbed.net,LOSA,ubuntu,2001:400:a100:3070:f816:3eff:fe12:9a78,Active,,ssh -i /home/fabric/work/fabric_config/slice_key -F /home/fabric/work/fabric_config/ssh_config ubuntu@2001:400:a100:3070:f816:3eff:fe12:9a78,/home/fabric/work/fabric_config/slice_key.pub,/home/fabric/work/fabric_config/slice_key
b81f34b6-7f34-4812-ab70-478355aaec0e,clientthree,4,16,100,default_ubuntu_24,qcow2,toky-w3.fabric-testbed.net,TOKY,ubuntu,133.69.160.244,Active,,ssh -i /home/fabric/work/fabric_config/slice_key -F /home/fabric/work/fabric_config/ssh_config ubuntu@133.69.160.244,/home/fabric/work/fabric_config/slice_key.pub,/home/fabric/work/fabric_config/slice_key
b8751a17-5456-4805-bbb5-16ca274503c7,clienttwo,4,16,100,default_ubuntu_24,qcow2,amst-w3.fabric-testbed.net,AMST,ubuntu,2001:610:2d0:fabc:f816:3eff:fe14:5f51,Active,,ssh -i /home/fabric/work/fabric_config/slice_key -F /home/fabric/work/fabric_config/ssh_config ubuntu@2001:610:2d0:fabc:f816:3eff:fe14:5f51,/home/fabric/work/fabric_config/slice_key.pub,/home/fabric/work/fabric_config/slice_key
bde63f32-7253-4676-b5a1-0786ebff3249,control,2,8,100,default_ubuntu_24,qcow2,amst-w3.fabric-testbed.net,AMST,ubuntu,2001:610:2d0:fabc:f816:3eff:fed8:fa83,Active,,ssh -i /home/fabric/work/fabric_config/slice_key -F /home/fabric/work/fabric_config/ssh_config ubuntu@2001:610:2d0:fabc:f816:3eff:fed8:fa83,/home/fabric/work/fabric_config/slice_key.pub,/home/fabric/work/fabric_config/slice_key
5cb448e8-0dc7-453e-8a72-ebfa4fecbb44,dynamos,4,16,100,default_ubuntu_24,qcow2,amst-w3.fabric-testbed.net,AMST,ubuntu,2001:610:2d0:fabc:f816:3eff:fe99:c618,Active,,ssh -i /home/fabric/work/fabric_config/slice_key -F /home/fabric/work/fabric_config/ssh_config ubuntu@2001:610:2d0:fabc:f816:3eff:fe99:c618,/home/fabric/work/fabric_config/slice_key.pub,/home/fabric/work/fabric_config/slice_key
01018402-f3fe-4e5a-b003-78000ed8f521,server,4,16,100,default_ubuntu_24,qcow2,amst-w3.fabric-testbed.net,AMST,ubuntu,2001:610:2d0:fabc:f816:3eff:fe4f:b7fc,Active,,ssh -i /home/fabric/work/fabric_config/slice_key -F /home/fabric/work/fabric_config/ssh_config ubuntu@2001:610:2d0:fabc:f816:3eff:fe4f:b7fc,/home/fabric/work/fabric_config/slice_key.pub,/home/fabric/work/fabric_config/slice_key


ID,Name,Layer,Type,Site,Subnet,Gateway,State,Error
5e6563f0-abc6-40a7-9878-801a96f587e9,Network-AMST,L3,FABNetv4,AMST,10.145.6.0/24,10.145.6.1,Active,
43abb4c9-5c77-464e-b483-b1f0ffc74991,Network-LOSA,L3,FABNetv4,LOSA,10.137.6.0/24,10.137.6.1,Active,
d1f08ad7-ce2e-4322-a5a2-7d81c9f95871,Network-TOKY,L3,FABNetv4,TOKY,10.146.4.0/24,10.146.4.1,Active,


KeyboardInterrupt: 

In [3]:
slice = fablib.get_slice(name="DYNAMOS-on-FABRIC");
nodes = slice.get_nodes();

control_node = slice.get_node(name='control')
dynamos_node = slice.get_node(name='dynamos')

In [9]:
# slice = fablib.get_slice(name="DYNAMOS-on-FABRIC");
# nodes = slice.get_nodes();

nodes_and_network_per_site = [
    (site, [node for node in nodes if node.get_site() == site], slice.get_network(name=f"Network-{site}"))
    for site in sites
]
networks = [network for (_, _, network) in nodes_and_network_per_site]

nodes_network_ips_per_site = [
    (site, nodes, network, network.get_available_ips(len(nodes)))
    for (site, nodes, network) in nodes_and_network_per_site
]

In [10]:
slice.list_nodes();

ID,Name,Cores,RAM,Disk,Image,Image Type,Host,Site,Username,Management IP,State,Error,SSH Command,Public SSH Key File,Private SSH Key File
d2b0e044-22a1-4d21-8ee9-e4dd61d45a92,clientone,4,16,100,default_ubuntu_24,qcow2,losa-w3.fabric-testbed.net,LOSA,ubuntu,2001:400:a100:3070:f816:3eff:fe12:9a78,Active,,ssh -i /home/fabric/work/fabric_config/slice_key -F /home/fabric/work/fabric_config/ssh_config ubuntu@2001:400:a100:3070:f816:3eff:fe12:9a78,/home/fabric/work/fabric_config/slice_key.pub,/home/fabric/work/fabric_config/slice_key
b81f34b6-7f34-4812-ab70-478355aaec0e,clientthree,4,16,100,default_ubuntu_24,qcow2,toky-w3.fabric-testbed.net,TOKY,ubuntu,133.69.160.244,Active,,ssh -i /home/fabric/work/fabric_config/slice_key -F /home/fabric/work/fabric_config/ssh_config ubuntu@133.69.160.244,/home/fabric/work/fabric_config/slice_key.pub,/home/fabric/work/fabric_config/slice_key
b8751a17-5456-4805-bbb5-16ca274503c7,clienttwo,4,16,100,default_ubuntu_24,qcow2,amst-w3.fabric-testbed.net,AMST,ubuntu,2001:610:2d0:fabc:f816:3eff:fe14:5f51,Active,,ssh -i /home/fabric/work/fabric_config/slice_key -F /home/fabric/work/fabric_config/ssh_config ubuntu@2001:610:2d0:fabc:f816:3eff:fe14:5f51,/home/fabric/work/fabric_config/slice_key.pub,/home/fabric/work/fabric_config/slice_key
bde63f32-7253-4676-b5a1-0786ebff3249,control,2,8,100,default_ubuntu_24,qcow2,amst-w3.fabric-testbed.net,AMST,ubuntu,2001:610:2d0:fabc:f816:3eff:fed8:fa83,Active,,ssh -i /home/fabric/work/fabric_config/slice_key -F /home/fabric/work/fabric_config/ssh_config ubuntu@2001:610:2d0:fabc:f816:3eff:fed8:fa83,/home/fabric/work/fabric_config/slice_key.pub,/home/fabric/work/fabric_config/slice_key
5cb448e8-0dc7-453e-8a72-ebfa4fecbb44,dynamos,4,16,100,default_ubuntu_24,qcow2,amst-w3.fabric-testbed.net,AMST,ubuntu,2001:610:2d0:fabc:f816:3eff:fe99:c618,Active,,ssh -i /home/fabric/work/fabric_config/slice_key -F /home/fabric/work/fabric_config/ssh_config ubuntu@2001:610:2d0:fabc:f816:3eff:fe99:c618,/home/fabric/work/fabric_config/slice_key.pub,/home/fabric/work/fabric_config/slice_key
01018402-f3fe-4e5a-b003-78000ed8f521,server,4,16,100,default_ubuntu_24,qcow2,amst-w3.fabric-testbed.net,AMST,ubuntu,2001:610:2d0:fabc:f816:3eff:fe4f:b7fc,Active,,ssh -i /home/fabric/work/fabric_config/slice_key -F /home/fabric/work/fabric_config/ssh_config ubuntu@2001:610:2d0:fabc:f816:3eff:fe4f:b7fc,/home/fabric/work/fabric_config/slice_key.pub,/home/fabric/work/fabric_config/slice_key


In [11]:
for node in slice.get_nodes():
    stdout, stderr = node.execute('echo Hello, FABRIC from node `hostname -s`')

Hello, FABRIC from node control
Hello, FABRIC from node dynamos
Hello, FABRIC from node server
Hello, FABRIC from node clientone
Hello, FABRIC from node clienttwo
Hello, FABRIC from node clientthree


In [12]:
def assign_ip(site, network, available_ips, node):
    interface = node.get_interface(network_name=f"Network-{site}")
    address = available_ips.pop(0)
    network_gateway = network.get_gateway()
    network_subnet = network.get_subnet()

    network.allocate_ip(address)
    interface.ip_addr_add(addr=address, subnet=network_subnet)
    node.ip_route_add(subnet=network_subnet, gateway=network_gateway)

    # For the multisite IPv4 connection
    for network in networks:
        node.ip_route_add(subnet=network.get_subnet(), gateway=network_gateway)

    return address

ips = [assign_ip(site, network, ips, node) for (site, nodes, network, ips) in nodes_network_ips_per_site for node in nodes];

In [15]:
for node in nodes:
    node_node = node.get_name()
    ssh_command = node.get_ssh_command().replace(
        "-i /home/fabric/work/fabric_config/slice_key", "-i ~/.ssh/slice_key"
    ).replace(
        "-F /home/fabric/work/fabric_config/ssh_config ", ""
    )
    
    print(node_node, ": ", ssh_command);

control :  ssh -i ~/.ssh/slice_key ubuntu@2001:610:2d0:fabc:f816:3eff:fed8:fa83
dynamos :  ssh -i ~/.ssh/slice_key ubuntu@2001:610:2d0:fabc:f816:3eff:fe99:c618
server :  ssh -i ~/.ssh/slice_key ubuntu@2001:610:2d0:fabc:f816:3eff:fe4f:b7fc
clientone :  ssh -i ~/.ssh/slice_key ubuntu@2001:400:a100:3070:f816:3eff:fe12:9a78
clienttwo :  ssh -i ~/.ssh/slice_key ubuntu@2001:610:2d0:fabc:f816:3eff:fe14:5f51
clientthree :  ssh -i ~/.ssh/slice_key ubuntu@133.69.160.244


In [16]:
# slice = fablib.get_slice(name="DYNAMOS-on-FABRIC");
# nodes = slice.get_nodes();
print("Uploading the node setup...")
threads = [node.upload_file_thread(local_file_path="node_scripts/node_setup.sh", remote_file_path="setup.sh")
           for node in nodes]
[thread.result() for thread in threads]

print("Executing the node setup...")
threads = [node.execute_thread(f"chmod +x setup.sh && ./setup.sh")
           for node in nodes]
[thread.result() for thread in threads]

Uploading the node setup...
Executing the node setup...


[('Hit:1 http://security.ubuntu.com/ubuntu noble-security InRelease\nHit:2 http://nova.clouds.archive.ubuntu.com/ubuntu noble InRelease\nHit:3 http://nova.clouds.archive.ubuntu.com/ubuntu noble-updates InRelease\nHit:4 http://nova.clouds.archive.ubuntu.com/ubuntu noble-backports InRelease\nHit:5 https://prod-cdn.packages.k8s.io/repositories/isv:/kubernetes:/core:/stable:/v1.30/deb  InRelease\nReading package lists...\nReading package lists...\nBuilding dependency tree...\nReading state information...\nCalculating upgrade...\nThe following packages have been kept back:\n  linux-headers-generic linux-headers-virtual linux-image-virtual\n  linux-virtual\n0 upgraded, 0 newly installed, 0 to remove and 4 not upgraded.\nHit:2 http://security.ubuntu.com/ubuntu noble-security InRelease\nHit:3 http://nova.clouds.archive.ubuntu.com/ubuntu noble InRelease\nHit:4 http://nova.clouds.archive.ubuntu.com/ubuntu noble-updates InRelease\nHit:5 http://nova.clouds.archive.ubuntu.com/ubuntu noble-backports

In [17]:
control_node.upload_file(local_file_path="node_scripts/install_k9s.sh", remote_file_path="k9s.sh")
control_node.execute(f"chmod +x k9s.sh && ./k9s.sh");

[31m--2025-09-29 09:06:19--  https://github.com/derailed/k9s/releases/download/v0.32.5/k9s_linux_amd64.deb
[0m[31mResolving github.com (github.com)... [0m[31m2600:2701:5000:5001::8c52:7104, 140.82.113.4
Connecting to github.com (github.com)|2600:2701:5000:5001::8c52:7104|:443... [0m[31mconnected.
[0m[31mHTTP request sent, awaiting response... [0m[31m302 Found
Location: https://release-assets.githubusercontent.com/github-production-release-asset/167596393/7cc41638-6a22-4598-9b02-646efaaa1053?sp=r&sv=2018-11-09&sr=b&spr=https&se=2025-09-29T09%3A56%3A14Z&rscd=attachment%3B+filename%3Dk9s_linux_amd64.deb&rsct=application%2Foctet-stream&skoid=96c2d410-5711-43a1-aedd-ab1947aa7ab0&sktid=398a6654-997b-47e9-b12b-9515b896b4de&skt=2025-09-29T08%3A56%3A03Z&ske=2025-09-29T09%3A56%3A14Z&sks=b&skv=2018-11-09&sig=N1EMaZP2ZT3fWZ4JsMTWJCa3UwzHNjWRQo4O9q%2FM8vs%3D&jwt=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmVsZWFzZS1hc3NldHMuZ2l0aHVidXNlcmNvbnRlbnQuY29tIiwia2

In [4]:
def get_ip(node):
    interface = node.get_interface(network_name=f"Network-{node.get_site()}")
    return interface.get_ip_addr()

ips = [get_ip(node) for node in nodes];
names = [node.get_name() for node in nodes];
names_and_ips = [(names[i], ips[i]) for i in range(len(ips))];

In [5]:
inventory = (
    f"[kube_control_plane]\n"
    f"control ansible_host={ips[0]} ip={ips[0]} etcd_member_name=etcd1\n"
    f"\n"
    f"[etcd:children]\n"
    f"kube_control_plane\n"
    f"\n"
    f"[kube_node]\n"
    f"dynamos ansible_host={ips[1]} ip={ips[1]}\n"
)

for i, (name, ip) in enumerate(names_and_ips[2:]):
    inventory += f"{name} ansible_host={ip} ip={ip}\n"

with open('kubespray/inventory.ini', 'w') as f:
    f.write(inventory)

In [6]:
control_node.upload_file(local_file_path="node_scripts/control_kubespray_setup.sh", remote_file_path="kubespray_setup.sh");
control_node.execute("chmod +x kubespray_setup.sh && ./kubespray_setup.sh");
control_node
control_node.upload_file(local_file_path="kubespray/inventory.ini", remote_file_path="kubespray/inventory/dynamos/inventory.ini");
control_node.upload_file(local_file_path="kubespray/ansible.cfg", remote_file_path="kubespray/ansible.cfg");
control_node.upload_file(local_file_path="node_scripts/dot_kube.sh", remote_file_path="dot_kube.sh");
control_node.execute("chmod +x ./dot_kube.sh");
control_node.upload_file(local_file_path="/home/fabric/work/fabric_config/slice_key", remote_file_path="/home/ubuntu/.ssh/slice_key");
control_node.execute("chmod 600 /home/ubuntu/.ssh/slice_key");

[31mCloning into 'kubespray'...
[0m[31mSwitched to a new branch 'release-2.27'
[0mbranch 'release-2.27' set up to track 'origin/release-2.27'.
Collecting ansible==9.13.0 (from -r requirements.txt (line 1))
  Downloading ansible-9.13.0-py3-none-any.whl.metadata (8.0 kB)
Collecting cryptography==44.0.2 (from -r requirements.txt (line 3))
  Downloading cryptography-44.0.2-cp39-abi3-manylinux_2_34_x86_64.whl.metadata (5.7 kB)
Collecting jmespath==1.0.1 (from -r requirements.txt (line 5))
  Downloading jmespath-1.0.1-py3-none-any.whl.metadata (7.6 kB)
Collecting netaddr==1.3.0 (from -r requirements.txt (line 7))
  Downloading netaddr-1.3.0-py3-none-any.whl.metadata (5.0 kB)
Collecting ansible-core~=2.16.14 (from ansible==9.13.0->-r requirements.txt (line 1))
  Downloading ansible_core-2.16.14-py3-none-any.whl.metadata (6.9 kB)
Collecting cffi>=1.12 (from cryptography==44.0.2->-r requirements.txt (line 3))
  Downloading cffi-2.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.wh

In [7]:
control_node.upload_file(local_file_path="node_scripts/start_kubespray.sh", remote_file_path="start.sh");
control_node.execute(f"chmod +x start.sh && ./start.sh");

/home/ubuntu/kubespray/roles/bootstrap-os/tasks/main.yml, line 29, column 7,
found a duplicate dict key (paths). Using last defined value only.
[0mUsing /home/ubuntu/kubespray/ansible.cfg as config file

PLAY [Check Ansible version] ***************************************************
Monday 29 September 2025  09:16:39 +0000 (0:00:00.008)       0:00:00.008 ****** 

TASK [Check 2.16.4 <= Ansible version < 2.17.0] ********************************
ok: [dynamos] => {
    "changed": false,
    "msg": "All assertions passed"
}
Monday 29 September 2025  09:16:39 +0000 (0:00:00.019)       0:00:00.027 ****** 

TASK [Check that python netaddr is installed] **********************************
ok: [dynamos] => {
    "changed": false,
    "msg": "All assertions passed"
}
Monday 29 September 2025  09:16:39 +0000 (0:00:00.089)       0:00:00.117 ****** 

TASK [Check that jinja is not too old (install via pip)] ***********************
ok: [dynamos] => {
    "changed": false,
    "msg": "All assertions p

In [15]:
# This is for resetting the kubespray cluster. 
# Use this if you are troubleshooting your Kubernetes cluster
# and you want to redeploy fresh.

# nodes[0].upload_file(local_file_path="node_scripts/reset_kubespray.sh", remote_file_path="reset.sh");
# nodes[0].execute(f"chmod +x reset.sh && ./reset.sh");

In [9]:
# Add the relevant etcd data to the dynamos node
control_node.upload_file(local_file_path="node_scripts/define_etcd_data.sh", remote_file_path="define_etcd_data.sh");
control_node.execute(f"chmod +x define_etcd_data.sh && ./define_etcd_data.sh");

[31mCloning into 'DYNAMOS'...
[0m

In [10]:
# Preconfigure Helm for DYNAMOS
control_node.upload_file(local_file_path="node_scripts/install_dynamos.sh", remote_file_path="dynamos.sh");
control_node.execute(f"chmod +x dynamos.sh && ./dynamos.sh");

# Configure DYNAMOS for the FABRIC nodes
agents_string = ",".join(agents)
thirdparties_string = ",".join(thirdparties)

control_node.upload_file(local_file_path="node_scripts/configure_dynamos.sh", remote_file_path="configure_dynamos.sh");
control_node.execute(f"chmod +x configure_dynamos.sh && ./configure_dynamos.sh {agents_string} {thirdparties_string}");

Hit:1 http://security.ubuntu.com/ubuntu noble-security InRelease
Hit:3 http://nova.clouds.archive.ubuntu.com/ubuntu noble InRelease
Hit:4 http://nova.clouds.archive.ubuntu.com/ubuntu noble-updates InRelease
Hit:5 http://nova.clouds.archive.ubuntu.com/ubuntu noble-backports InRelease
Hit:2 https://prod-cdn.packages.k8s.io/repositories/isv:/kubernetes:/core:/stable:/v1.30/deb  InRelease
Reading package lists...
Reading package lists...
Building dependency tree...
Reading state information...
curl is already the newest version (8.5.0-2ubuntu10.6).
gpg is already the newest version (2.4.4-2ubuntu17.3).
apt-transport-https is already the newest version (2.8.3).
The following packages were automatically installed and are no longer required:
  bridge-utils dns-root-data dnsmasq-base pigz ubuntu-fan
Use 'sudo apt autoremove' to remove them.
0 upgraded, 0 newly installed, 0 to remove and 4 not upgraded.
deb [signed-by=/usr/share/keyrings/helm.gpg] https://packages.buildkite.com/helm-linux/helm-

In [11]:
# Start the DYNAMOS configuration.

# control_node.upload_file(local_file_path="/home/fabric/work/Scattered-Directive/configuration/etcd_launch_files/datasets.json", 
#                          remote_file_path="DYNAMOS/configuration/etcd_launch_files/datasets.json")
control_node.execute(f"~/DYNAMOS/configuration/dynamos-configuration.sh")

Setting up paths...
definitions_example.json copied over definitions.json to ensure a clean file
Generating RabbitMQ password...
Replacing tokens...
Installing namespaces...
Release "namespaces" does not exist. Installing it now.
NAME: namespaces
LAST DEPLOYED: Mon Sep 29 10:00:48 2025
NAMESPACE: default
STATUS: deployed
REVISION: 1
TEST SUITE: None
Preparing PVC
pod/temp-pod created
pod/temp-pod-orch created
Waiting for temp-pod to be Running...
pod/temp-pod condition met
pod/temp-pod-orch condition met
./
./datasets.json
./agreements.json
./requestType.json
./archetype.json
./microservices.json
./optional_microservices.json
./
./datasets.json
./agreements.json
./requestType.json
./archetype.json
./microservices.json
./optional_microservices.json
pod "temp-pod" deleted
pod "temp-pod-orch" deleted
Installing Prometheus...
"prometheus-community" has been added to your repositories
Hang tight while we grab the latest from your chart repositories...
...Successfully got an update from the 

('Setting up paths...\ndefinitions_example.json copied over definitions.json to ensure a clean file\nGenerating RabbitMQ password...\nReplacing tokens...\nInstalling namespaces...\nRelease "namespaces" does not exist. Installing it now.\nNAME: namespaces\nLAST DEPLOYED: Mon Sep 29 10:00:48 2025\nNAMESPACE: default\nSTATUS: deployed\nREVISION: 1\nTEST SUITE: None\nPreparing PVC\npod/temp-pod created\npod/temp-pod-orch created\nWaiting for temp-pod to be Running...\npod/temp-pod condition met\npod/temp-pod-orch condition met\n./\n./datasets.json\n./agreements.json\n./requestType.json\n./archetype.json\n./microservices.json\n./optional_microservices.json\n./\n./datasets.json\n./agreements.json\n./requestType.json\n./archetype.json\n./microservices.json\n./optional_microservices.json\npod "temp-pod" deleted\npod "temp-pod-orch" deleted\nInstalling Prometheus...\n"prometheus-community" has been added to your repositories\nHang tight while we grab the latest from your chart repositories...\n

In [12]:
# Install monitoring namespace
# control_node.execute(f"helm uninstall prometheus")
control_node.upload_directory("/home/fabric/work/Scattered-Directive/charts/monitoring", "DYNAMOS/charts");
control_node.upload_file(local_file_path="node_scripts/monitoring.sh", remote_file_path="monitoring.sh");
control_node.execute(f"chmod +x monitoring.sh && ./monitoring.sh");

Setting up paths...
namespace/monitoring created
"prometheus-community" already exists with the same configuration, skipping
Hang tight while we grab the latest from your chart repositories...
...Successfully got an update from the "prometheus-community" chart repository
Update Complete. ⎈Happy Helming!⎈
Release "prometheus" does not exist. Installing it now.
NAME: prometheus
LAST DEPLOYED: Mon Sep 29 10:05:00 2025
NAMESPACE: monitoring
STATUS: deployed
REVISION: 1
NOTES:
kube-prometheus-stack has been installed. Check its status by running:
  kubectl --namespace monitoring get pods -l "release=prometheus"

Visit https://github.com/prometheus-operator/kube-prometheus for instructions on how to create & configure Alertmanager and Prometheus instances using the Operator.
NAME                                                   READY   STATUS    RESTARTS   AGE
prometheus-kube-prometheus-operator-857648bfd5-b5psx   1/1     Running   0          68s
prometheus-kube-state-metrics-6f5574c548-66j

In [None]:
control_node.execute(f"kubectl patch svc prometheus-kube-prometheus-prometheus -n monitoring -p '{"spec": {"type": "NodePort"}}'")
control_node.execute(f"kubectl patch svc prometheus-grafana -n monitoring -p '{"spec": {"type": "NodePort"}}'")

In [18]:
control_node.upload_file("/home/fabric/work/Scattered-Directive/configuration/dynamos-configs.sh", "DYNAMOS/configuration/dynamos-configs.sh")
control_node.execute(f"chmod +x DYNAMOS/configuration/dynamos-configs.sh");

```sh
# IPs and ports for services
clientone       10.139.1.2
clientthree     10.146.1.2
clienttwo       10.143.3.5
control         10.143.3.2
dynamos         10.143.3.3
server          10.143.3.4

nginx           80:31141
prometheus      9090:32535
grafana         80:31022

# SSH tunnel for prometheus and Grafana
ssh -i ~/.ssh/slice_key -F ssh_config -L 9090:10.145.6.3:32379 ubuntu@2001:610:2d0:fabc:f816:3eff:fe28:721a
ssh -i ~/.ssh/slice_key -F ssh_config -L 3000:10.145.6.3:31616 ubuntu@2001:610:2d0:fabc:f816:3eff:fed8:fa83

# Address for nodes
control :  ssh -i ~/.ssh/slice_key ubuntu@2001:400:a100:3090:f816:3eff:fe5d:6c58
dynamos :  ssh -i ~/.ssh/slice_key ubuntu@2001:400:a100:3090:f816:3eff:feb6:b5fa
server :  ssh -i ~/.ssh/slice_key ubuntu@2001:400:a100:3090:f816:3eff:fea8:a7c4
clientone :  ssh -i ~/.ssh/slice_key ubuntu@2001:400:a100:3080:f816:3eff:fe87:97fd
clienttwo :  ssh -i ~/.ssh/slice_key ubuntu@2001:400:a100:3090:f816:3eff:fe31:f1ac
clientthree :  ssh -i ~/.ssh/slice_key ubuntu@133.69.160.227


# Request to invoke training
curl  -H "Host: api-gateway.api-gateway.svc.cluster.local" http://10.139.1.2:31141/api/v1/requestApproval --header 'Content-Type: application/json' --data-raw '{
        "type": "hflTrainModelRequest",
        "user": {
                "id": "1234",
                "userName": "maurits.dijk@student.uva.nl@student.uva.nl"
        },
        "dataProviders": ["server", "clientone", "clienttwo", "clientthree"],
        "data_request": {
        "data": {
                "learning_rate": 0.05,
                "cycles": 10,
                "change_policies": -1
            },
                "type": "hflTrainModelRequest",
                "requestMetadata": {}
        }
}'


# Default values in the api-gateway
learning_rate: 0.05,
cycles: 10,
change_policies: -1
                                 




```

In [25]:
# control_node.upload_file(local_file_path="/home/fabric/work/Scattered-Directive/fabric/energy_experiments/prepare_node.sh", 
#                          remote_file_path="DYNAMOS/scripts/prepare_node.sh");
# control_node.execute(f"chmod +x DYNAMOS/scripts/prepare_node.sh && ./DYNAMOS/scripts/prepare_node.sh")

control_node.upload_file(local_file_path="/home/fabric/work/Scattered-Directive/fabric/energy_experiments/test.py", 
                         remote_file_path="./experiments/test.py");

In [37]:
NUMBER_OF_NODES = 4
CYCLES = 10
# CHANGE_POLICIES = 10

try:
    # Get slice by name: https://fabric-fablib.readthedocs.io/en/latest/fablib.html#fabrictestbed_extensions.fablib.fablib.FablibManager.get_slice
    slice = fablib.get_slice(name=slice_name)
    # Get the correct node to run the experiment on: k8s-control-plane node, since we execute all the scripts there, and dynamos-core is required for the rest.
    control_node = slice.get_node(name='control')
    
    # Upload required files for the experiments:
    control_node.upload_file(local_file_path="energy_experiments/constants.py", remote_file_path="./experiments/constants.py")
    control_node.upload_file(local_file_path="energy_experiments/execute_experiments_fl.py", remote_file_path="./experiments/execute_experiments_fl.py")
    
    # Run the experiment. This needs to run the python script to allow the output to be added in the notebook output, with a separate script that did not happen
    stdout, stderr = control_node.execute(
        (
            # Make the script executable
            f"chmod +x ./experiments/execute_experiments_fl.py && "
            # Go to the corresponding location
            f"cd ~/experiments && "
            # Activate the venv
            f"source dynamos-env/bin/activate && "
            # Execute the actual experiments. Use -u to use unbuffered mode for stdout, stderr, and stdin, 
            # so that print() calls and logs from inside the python script appear live (or at least flush immediately after each action)
            # f"python3 -u execute_experiments.py {LEARNING_RATE} {CYCLES} {CHANGE_POLICIES}"
            f"python3 -u execute_experiments_fl.py {NUMBER_OF_NODES} {CYCLES}"
        )
    )

except Exception as e:
    print(f"Fail: {e}")
    traceback.print_exc()


Starting experiment
Waiting for idle period...
Idle Energy: {'kernel_processes': '8555.947999996444', 'system_processes': '35323.87599999069', 'policy': '233.55600000027817', 'sidecar': '5689.491999999999', 'api-gateway': '145.93600000000006', 'rabbitmq': '590.9359999999997', 'policy-enforcer': '147.92799999999994', 'orchestrator': '169.948'} (in J)
Wait for VFL to run
[31mTraceback (most recent call last):
  File "/home/ubuntu/experiments/execute_experiments_fl.py", line 257, in <module>
    run_experiment(output_dir, exp_cycles)
  File "/home/ubuntu/experiments/execute_experiments_fl.py", line 136, in run_experiment
    logs = get_logs()
           ^^^^^^^^^^
  File "/home/ubuntu/experiments/execute_experiments_fl.py", line 55, in get_logs
    logs = v1.read_namespaced_pod_log(name=pod_name, namespace=namespace, container=container_name)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/ubuntu/experiments/dynamos-env/l

In [None]:
│ hfl-train-model 2025-10-28 12:21:47,555 - main.py - main.py:67 - DEBUG - Loaded server dataset successfully.                                                                            
│ hfl-train-model 2025-10-28 12:21:47,557 - main.py - ms_init.py:189 - DEBUG - NewConfiguration hfl-train-model,                                                                         
│ hfl-train-model job_name: maurits-dijk-df3be075server1                                                                                                                                  
│ hfl-train-model port: 50053,                                                                                                                                                            
│ hfl-train-model first_service: 1,                                                                                                                                                       
│ hfl-train-model last_service: 0                                                                                                                                                         
│ hfl-train-model 2025-10-28 12:21:47,566 - main.py - grpc_server.py:162 - INFO - gRPC server started on localhost:50053                                                                  
│ hfl-train-model 2025-10-28 12:21:47,567 - main.py - grpc_client.py:72 - DEBUG - Try connecting to: localhost:50051                                                                      
│ hfl-train-model 2025-10-28 12:21:47,569 - main.py - grpc_client.py:81 - WARNING - Could not check: failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:50051: Failed to connect to remote host: connect: Connection refused (111)                                                                                                                          │
│ hfl-train-model 2025-10-28 12:21:47,569 - main.py - grpc_client.py:83 - INFO - Sleep 1 second                                                                                           
│ hfl-train-model 2025-10-28 12:21:48,570 - main.py - grpc_client.py:78 - INFO - Successfully connected to gRPC server at localhost:50051                                                 
│ hfl-train-model 2025-10-28 12:21:48,572 - main.py - grpc_client.py:72 - DEBUG - Try connecting to: localhost:50054                                                                      
│ hfl-train-model Attempting to instrument while already instrumented                                                                                                                     
│ hfl-train-model 2025-10-28 12:21:48,574 - main.py - grpc_client.py:81 - WARNING - Could not check: failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:50054: Failed to connect to remote host: connect: Connection refused (111)                                                                                                                          │
│ hfl-train-model 2025-10-28 12:21:48,574 - main.py - grpc_client.py:83 - INFO - Sleep 1 second                                                                                           
│ hfl-train-model 2025-10-28 12:21:49,574 - main.py - grpc_client.py:81 - WARNING - Could not check: failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:50054: Failed to connect to remote host: connect: Connection refused (111)                                                                                                                          
│ hfl-train-model 2025-10-28 12:21:49,574 - main.py - grpc_client.py:83 - INFO - Sleep 1 second                                                                                           
│ Stream closed EOF for server/maurits-dijk-df3be075server1-595dg (hfl-train)                                                                                                             
│ hfl-train-model 2025-10-28 12:21:50,575 - main.py - grpc_client.py:81 - WARNING - Could not check: failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:50054: Failed to connect to remote host: connect: Connection refused (111)                                                                                                                          │
│ hfl-train-model 2025-10-28 12:21:50,575 - main.py - grpc_client.py:83 - INFO - Sleep 1 second                                                                                           
│ hfl-train-model 2025-10-28 12:21:51,576 - main.py - grpc_client.py:81 - WARNING - Could not check: failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:50054: Failed to connect to remote host: connect: Connection refused (111)                                                                                                                          │
│ hfl-train-model 2025-10-28 12:21:51,576 - main.py - grpc_client.py:83 - INFO - Sleep 1 second                                                                                           
│ hfl-train-model 2025-10-28 12:21:52,577 - main.py - grpc_client.py:81 - WARNING - Could not check: failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:50054: Failed to connect to remote host: connect: Connection refused (111)                                                                                                                          │
│ hfl-train-model 2025-10-28 12:21:52,577 - main.py - grpc_client.py:83 - INFO - Sleep 1 second                                                                                           
│ hfl-train-model 2025-10-28 12:21:53,578 - main.py - grpc_client.py:81 - WARNING - Could not check: failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:50054: Failed to connect to remote host: connect: Connection refused (111)                                                                                                                          │
│ hfl-train-model 2025-10-28 12:21:53,578 - main.py - grpc_client.py:83 - INFO - Sleep 1 second                                                                                           
│ hfl-train-model 2025-10-28 12:21:54,579 - main.py - grpc_client.py:81 - WARNING - Could not check: failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:50054: Failed to connect to remote host: connect: Connection refused (111)                                                                                                                          │
│ hfl-train-model 2025-10-28 12:21:54,579 - main.py - grpc_client.py:83 - INFO - Sleep 1 second                                                                                           
│ hfl-train-model Traceback (most recent call last):                                                                                                                                      
│ hfl-train-model   File "/app/hfl-train-model/main.py", line 261, in <module>                                                                                                            
│ hfl-train-model     main()                                                                                                                                                              
│ hfl-train-model   File "/app/hfl-train-model/main.py", line 241, in main                                                                                                                
│ hfl-train-model     ms_config = NewConfiguration(                                                                                                                                       
│ hfl-train-model                 ^^^^^^^^^^^^^^^^^                                                                                                                                       
│ hfl-train-model   File "/usr/local/lib/python3.12/site-packages/dynamos/ms_init.py", line 210, in NewConfiguration                                                                      
│ hfl-train-model     conf.next_client = GRPCClient(                                                                                                                                      
│ hfl-train-model                        ^^^^^^^^^^^                                                                                                                                      
│ hfl-train-model   File "/usr/local/lib/python3.12/site-packages/dynamos/grpc_client.py", line 43, in __init__                                                                           
│ hfl-train-model     self.channel = self.get_grpc_connection(grpc_addr)                                                                                                                  
│ hfl-train-model                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^                                                                                                                  
│ hfl-train-model   File "/usr/local/lib/python3.12/site-packages/dynamos/grpc_client.py", line 86, in get_grpc_connection                                                                
│ hfl-train-model     raise Exception(f"Could not connect with gRPC {grpc_addr} after {i} tries")                                                                                         
│ hfl-train-model Exception: Could not connect with gRPC localhost:50054 after 7 tries