# Project Notebook

## General Imports

In [None]:
import os
import json
import traceback

## Import the fablib Library

In [None]:
from fabrictestbed_extensions.fablib.fablib import FablibManager as fablib_manager
fablib = fablib_manager() 
conf = fablib.show_config()

## MFLib Imports

In [None]:
import mflib 
print(f"MFLib version  {mflib.__version__} " )

from mflib.mflib import MFLib

In [None]:
from fabrictestbed_extensions.fablib.fablib import FablibManager as fablib_manager

try:
    fablib = fablib_manager()
                    
    fablib.show_config()
except Exception as e:
    print(f"Exception: {e}")

## Establish the sets of nodes

"node_conf" will hold the collection of L3 Nodes
"net_conf" will hold the L2 nodes connecting the L3 Nodes
"route_conf" is the live connections between nodes

In [None]:
#Needs polish
slice_name="topology-test-" + fablib.get_bastion_username()

[site1,site2,site3,site4,site5] = fablib.get_random_sites(count=5, avoid=["TOKY","FIU","CERN","DALL","GPN","LBNL","RENC","SALT","TACC","UKY","WASH","NCSA","LOSA","GATECH","INDI","MAX", "MASS","NEWY","SRI","UCSD"])

node_conf = [
 {'name': "consumer1", 'site': site1, 'cores': 2, 'ram': 4, 'disk': 10, 'image': 'default_ubuntu_20', 'packages': ['net-tools', 'iperf3', 'moreutils']}, 
 {'name': "consumer2", 'site': site2, 'cores': 2, 'ram': 4, 'disk': 10, 'image': 'default_ubuntu_20', 'packages': ['net-tools', 'iperf3', 'moreutils']}, 
 {'name': "server",  'site': site3, 'cores': 2, 'ram': 4, 'disk': 10, 'image': 'default_ubuntu_20', 'packages': ['net-tools', 'iperf3', 'moreutils']}, 
 {'name': "router1", 'site': site4, 'cores': 2, 'ram': 4, 'disk': 10, 'image': 'default_ubuntu_20', 'packages': ['net-tools']}, 
 {'name': "router2", 'site': site5, 'cores': 2, 'ram': 4, 'disk': 10, 'image': 'default_ubuntu_20', 'packages': ['net-tools']}
]

net_conf = [
 {"name": "net_c1r1", "subnet": "10.10.1.0/24", "nodes": [{"name": "consumer1", "addr": "10.10.1.100"}, {"name": "router1", "addr": "10.10.1.10"}]},
 {"name": "net_c1r2", "subnet": "10.10.2.0/24", "nodes": [{"name": "consumer1", "addr": "10.10.2.200"}, {"name": "router2", "addr": "10.10.2.20"}]},
 {"name": "net_c2r2", "subnet": "10.10.3.0/24", "nodes": [{"name": "consumer2", "addr": "10.10.3.200"}, {"name": "router2", "addr": "10.10.3.20"}]},
 {"name": "net_sr1",  "subnet": "10.10.4.0/24", "nodes": [{"name": "server", "addr": "10.10.4.100"}, {"name": "router1", "addr": "10.10.4.10"}]},
 {"name": "net_sr2",  "subnet": "10.10.5.0/24", "nodes": [{"name": "server", "addr": "10.10.5.200"}, {"name": "router2", "addr": "10.10.5.20"}]}
]

route_config = [
    #consumer 1 links
    {"addr": "10.10.4.0/24", "gw": "10.10.1.10", "nodes": ["consumer1"]}, #can reach 10.10.4.0 (server) subnet through router1 gw
    {"addr": "10.10.3.0/24", "gw": "10.10.2.20", "nodes": ["consumer1"]}, #can reach 10.10.3.0 (consumer2) subnet through router2 gw
    {"addr": "10.10.5.0/24", "gw": "10.10.2.20", "nodes": ["consumer1"]}, #can reach 10.10.5.0 (server) subnet through router2 gw
    #consumer 2 (no fwd-ing link to rotuer 1 set-up here)
    {"addr": "10.10.2.0/24", "gw": "10.10.3.20", "nodes": ["consumer2"]}, #can reach 10.10.2.0 (consumer1) subnet through router2 gw
    {"addr": "10.10.5.0/24", "gw": "10.10.3.20", "nodes": ["consumer2"]}, #can reach 10.10.5.0 (server) subnet through router2 gw
    #server links
    {"addr": "10.10.1.0/24", "gw": "10.10.4.10", "nodes": ["server"]}, #can reach 10.10.1.0 (consumer1) subnet through router1 gw
    {"addr": "10.10.2.0/24", "gw": "10.10.5.20", "nodes": ["server"]}, #can reach 10.10.2.0 (consumer1) subnet through router2 gw
    {"addr": "10.10.3.0/24", "gw": "10.10.5.20", "nodes": ["server"]}  #can reach 10.10.3.0 (consumer2) subnet through router2 gw
]

print(f"Setting up slice {slice_name}")
print(f"Using sites {site1}, {site2}, {site3}, {site4}, {site5}")

## Check resources

Check if you have a pre-existing slice by this name already running

Don't attempt to submit the slice if you've already defined one by this name

In [None]:
try:
    slice = fablib.get_slice(slice_name)
    print("You already have a slice by this name!")
    print("If you previously reserved resources, skip to the 'log in to resources' section.")
except:
    print("You don't have a slice named %s yet." % slice_name)
    print("Continue to the next step to make one.")
    slice = fablib.new_slice(name=slice_name)

## Node Set-up

Following example code, this will set-up the L2 & L3 networks per the configured lists

In [None]:
# this cell sets up the nodes
for n in node_conf:
    slice.add_node(name=n['name'], site=n['site'], 
                   cores=n['cores'], 
                   ram=n['ram'], 
                   disk=n['disk'], 
                   image=n['image'])

In [None]:
# this cell iterates each entry in the net_conf table
for n in net_conf:
    ifaces = []
    for node in n['nodes']:
        ifaces.append( slice.get_node(node["name"]).add_component(model="NIC_Basic", name=n["name"]).get_interfaces()[0] )
    slice.add_l2network(name=n["name"], interfaces=ifaces)

## Set-up the MFLib monitoring node
Create a node that will monitor the status of the other nodes

This step GREATLY increases slice requisition time. If debugging node links, do not add the MFLib monitoring tool.

In [None]:
# Add measurement node to topology using static method.
MFLib.addMeasNode(slice, disk=100, image='docker_ubuntu_20')
print("MFLib Node added")

## Submit the slice for use

In [None]:
slice.submit()

This step will pend until the above slice is ready for use. Helps prevent trying to configure nodes that don't exist.

In [None]:
slice.get_state()
slice.wait_ssh(progress=True)

## Configure Resources

In [None]:
slice = fablib.get_slice(name=slice_name)

In [None]:
# install packages
# this will take a while and will run in background while you do other steps
for n in node_conf:
    if len(n['packages']):
        node = slice.get_node(n['name'])
        pkg = " ".join(n['packages'])
        node.execute_thread("sudo apt update; sudo apt -y install %s" % pkg)

In [None]:
# bring interfaces up and either assign an address (if there is one) or flush address
from ipaddress import ip_address, IPv4Address, IPv4Network

for net in net_conf:
    for n in net['nodes']:
        if_name = n['name'] + '-' + net['name'] + '-p1'
        iface = slice.get_interface(if_name)
        iface.ip_link_up()
        if n['addr']:
            iface.ip_addr_add(addr=n['addr'], subnet=IPv4Network(net['subnet']))
        else:
            iface.get_node().execute("sudo ip addr flush dev %s"  % iface.get_device_name())

In [None]:
# prepare a "hosts" file that has names and addresses of every node
hosts_txt = [ "%s\t%s" % ( n['addr'], n['name'] ) for net in net_conf  for n in net['nodes'] if type(n) is dict and n['addr']]
for n in slice.get_nodes():
    print("")
    for h in hosts_txt:
        n.execute("echo %s | sudo tee -a /etc/hosts" % h)

In [None]:
# enable IPv4 forwarding on all nodes
for n in slice.get_nodes():
    n.execute("sudo sysctl -w net.ipv4.ip_forward=1")

In [None]:
# set up static routes
for rt in route_config:
    for n in rt['nodes']:
        slice.get_node(name=n).ip_route_add(subnet=IPv4Network(rt['addr']), gateway=rt['gw'])
        
#for initialization purposes, force Consumer 1 to route through Router 2
#by disabling it's visibility into Router 1
slice.get_node(name="consumer1").ip_route_del(subnet=IPv4Network("10.10.4.0/24"), gateway="10.10.1.10")

## Optional: Draw the Topology

Draw the configuration based on the list arguments declared towards the start of this Notebook.

Note -- This does not draw from what FABRIC actually created. It draws what we wanted FABRIC to create. If methods aren't called correctly there will be a disconnect between these two.

In [None]:
l2_nets = []
hosts   = []
l3_nets = []
for n in slice.get_l2networks():
    print( "L2 " + n.get_name() )
    l2_nets.append( (n.get_name(), {'color': 'lavender'}) )
for n in slice.get_l3networks():
    print( "L3 " + n.get_name() )
    l3_nets.append( (n.get_name(), {'color': 'pink'}) )
for n in slice.get_nodes():
    print( "Node " + n.get_name() )
    hosts.append( (n.get_name(), {'color': 'lightblue'}) )
nodes   = l2_nets + l3_nets + hosts

edges = []
for iface in slice.get_interfaces():
    ifDict = iface.toDict()
    edges.append( (ifDict['network'], ifDict['node'], {'label': ifDict['physical_dev'] + '\n' + ifDict['ip_addr'] + '\n' + ifDict['mac']}) )
     

In [None]:
import networkx as nx
import matplotlib.pyplot as plt
plt.figure(figsize=(len(nodes),len(nodes)))
G = nx.Graph()
G.add_nodes_from(nodes)
G.add_edges_from(edges)
pos = nx.spring_layout(G)
nx.draw(G, pos, node_shape='s',  
        node_color=[n[1]['color'] for n in nodes], 
        node_size=[len(n[0])*400 for n in nodes],  
        with_labels=True);

nx.draw_networkx_edge_labels(G, pos,
                             edge_labels=nx.get_edge_attributes(G,'label'),
                             font_color='gray',  font_size=8, rotate=False);

## Log into nodes
Get the ssh commands for the Consumers, Routers, and Server

In [None]:
print( slice.get_node("consumer1").get_ssh_command() )

In [None]:
print( slice.get_node("consumer2").get_ssh_command() )

In [None]:
print( slice.get_node("router1").get_ssh_command() )

In [None]:
print( slice.get_node("router2").get_ssh_command() )

In [None]:
print( slice.get_node("server").get_ssh_command() )

In [None]:
print( slice.get_node("meas-node").get_ssh_command() )

## Initialize MFLib object

In [None]:
%%time
mf = MFLib(slice_name)

## Install Prometheus and Grafana

Install the 2 tools and then get the access information

In [None]:
%%time
instrumetize_results = mf.instrumentize( ["prometheus"] )

In [None]:
# Grafana SSH Tunnel Command
# mf.grafana_tunnel_local_port = 10010 # optionally change the port
print(mf.grafana_tunnel)

print(f"Browse to https://localhost:{mf.grafana_tunnel_local_port}/grafana/dashboards?query=%2A")

In [None]:
# The grafana_manager service was created by the mf.instrumentize call.
# Get access info for Grafana by using the mflib.info call to the grafana_manager.
# Create a dictionary to pass to the service.
data = {}
# Set the info you want to get.
data["get"] = ["admin_password"]
# Call info using service name and data dictionary.
info_results = mf.info("grafana_manager", data)
print(info_results)

## Formulate Prometheus Queries
The following steps help establish a means to query the Prometheus data set from the Command Line

In [None]:
from mflib.data_transfer import PrometheusExporter
prom_tools = PrometheusExporter(slice_name=slice_name)

In [None]:
# Get prometheus admin credentials so we can create snapshots
data={}
data["get"] = ["ht_user", "ht_password"]
prom_credentials = prom_tools.info("prometheus", data)
print( prom_credentials )

We will use the credentials from above in order to form queries to run in Prometheus.

Below is an example query that will not be used in practice for this Notebook.

In [None]:
promUser = prom_credentials["ht_user"]
promPass =  prom_credentials["ht_password"]
nodeToMeasure = "router2"
linkToMonitor = "enp9s0"

sampleGetCmd = "curl -G -k --user \'" + promUser +"\':\'" + promPass + "\' --data-urlencode 'query=rate(node_network_receive_packets_total{instance=\"" + nodeToMeasure + "\",job=\"node\", device=\"" + linkToMonitor + "\"}[1m])' https://localhost:9090/api/v1/query"

print( sampleGetCmd )

The queries (populating the 'query=<data>' section of the curl command) in the following sections were derived from reviewing the Grafan dashboard. By opening a specific chart and reviewing the JSON file there is a section labeled "targets[ ... expr = "" ...]" The expr contains the generic syntax to get the data plotted by Grafana. To get it for a specific node and connection you simply need to reverse engineer which fields of the graph are poplating which variable in the expr. 

### Command to monitor Router 2

The above "curl" command will return the number of packets being routed through Router 2 along a specific network interface (not necessarily an interface we care about).

It has some weaknesses that need to be addressed. The first is it's monitoring an arbitrary interface in the "linkToMonitor" argument. SSH into Router 2 if you haven't already. Run an `ifconfig` in the terminal and look for the device with IP "10.10.5.20" which is the interface to the Server from Router 2. Replace the "linkToMonitor" with the name of the device with the corresponding IP (it should be enp#s#)

The command below also differs from the generic in that it trims the excess response information off the return. The generic returns all the HTTP confirmation of successful command as well as the time stamp of the measurement. Since our interest is solely the number of packets being routed through here, we will trim all the excess information to get just that value.

In [None]:
promUser = prom_credentials["ht_user"]
promPass =  prom_credentials["ht_password"]
nodeToMeasure = "router2"
linkToMonitor = "enp7s0" #may need updating

router2ToServerPacketsCmd = "curl -G -k --user \'" + promUser +"\':\'" + promPass + "\' --data-urlencode 'query=rate(node_network_receive_packets_total{instance=\"" + nodeToMeasure + "\",job=\"node\", device=\"" + linkToMonitor + "\"}[1m])' https://localhost:9090/api/v1/query | xargs echo | sed \'s/.*value//g\' | tr -d \"[]{}:\" |sed \'s/.*,//g\'"

print( router2ToServerPacketsCmd )

Since we also intend to monitor the router's bandwidth, the following command will return the measured router speed.

In [None]:
promUser = prom_credentials["ht_user"]
promPass =  prom_credentials["ht_password"]
nodeToMeasure = "router2"

router2ToServerBandwidthCmd = "curl -G -k --user \'" + promUser +"\':\'" + promPass + "\' --data-urlencode 'query=node_network_speed_bytes{instance=\"" + nodeToMeasure + "\",job=\"node\"}' https://localhost:9090/api/v1/query  | xargs echo | sed \'s/.*value//g\' | tr -d \"[]{}:\" |sed \'s/.*,//g\'"

print( router2ToServerBandwidthCmd )

### Command to monitor Router 1

We'll need to do some similar work to get a measurement command set-up for Router 1

SSH into Router 1 if you haven't already. Run an `ifconfig` in the terminal and look for the device with IP "10.10.4.10" which is the interface to the Server from Router 1. Replace the "linkToMonitor" with the name of the device with the corresponding IP (it should be enp#s#)

In [None]:
promUser = prom_credentials["ht_user"]
promPass =  prom_credentials["ht_password"]
nodeToMeasure = "router1"
linkToMonitor = "enp8s0" #may need updating

router1ToServerPacketsCmd = "curl -G -k --user \'" + promUser +"\':\'" + promPass + "\' --data-urlencode 'query=rate(node_network_receive_packets_total{instance=\"" + nodeToMeasure + "\",job=\"node\", device=\"" + linkToMonitor + "\"}[1m])' https://localhost:9090/api/v1/query  | xargs echo | sed \'s/.*value//g\' | tr -d \"[]{}:\" |sed \'s/.*,//g\'"

print( router1ToServerPacketsCmd )

In [None]:
promUser = prom_credentials["ht_user"]
promPass =  prom_credentials["ht_password"]
nodeToMeasure = "router1"

router1ToServerBandwidthCmd = "curl -G -k --user \'" + promUser +"\':\'" + promPass + "\' --data-urlencode 'query=node_network_speed_bytes{instance=\"" + nodeToMeasure + "\",job=\"node\"}' https://localhost:9090/api/v1/query  | xargs echo | sed \'s/.*value//g\' | tr -d \"[]{}:\" |sed \'s/.*,//g\'"

print( router1ToServerBandwidthCmd )

## Route Switching

This section takes everything that's been configured and runs the path switching logic. It will iterate a few times, and the environment can be reconfigured at any point between executions.

It is advised to be SSH'd into Consumer 2 manually and not have that run as part of this loop.

The expected execution order is to
1) Run this loop once and confirm traffic flows through Router 2
2) Login to Consumer 2 and start an indefinite ping of the Server (`ping server`)
3) Re-run this loop and observe Consumer 1 flowing it's traffic through Router 1
4) Optionally stop Consumer 2 and see Consumer 1 return to pinging through Router 2

In [None]:
#We will redefine the commands defined earlier including
# router1ToServerPacketsCmd
# router1ToServerBandwidthCmd
# router2ToServerPacketsCmd
# router2ToServerBandwidthCmd

measNode = slice.get_node(name="meas-node")
con1Node = slice.get_node(name="consumer1")

#initialize variables to use
router1Bandwidth = 0
router1Connections = 0
router2Bandwidth = 0
router2Connections = 0
#earlier step disabled path through router 1
#so we know router 2 is default
routerPath = 2

loopCnt = 0
MAX_LOOPS = 10
measResult = ""

#loop a specified number of times
while( loopCnt < MAX_LOOPS ):
    print("\nLoop " + str(loopCnt + 1) + "/" + str(MAX_LOOPS) )
    loopCnt += 1
    #get the router stats
    try:
        measResult, stderr = measNode.execute(router1ToServerPacketsCmd, quiet=True)
        router1Connections = int(float(measResult))
    except Exception as e:
        print(f"Exception: {e}")
    try:
        measResult, stderr = measNode.execute(router1ToServerBandwidthCmd, quiet=True)
        router1Bandwidth = int(float(measResult))
    except Exception as e:
        print(f"Exception: {e}")
    try:
        measResult, stderr = measNode.execute(router2ToServerPacketsCmd, quiet=True)
        router2Connections = int(float(measResult))
    except Exception as e:
        print(f"Exception: {e}")
    try:
        measResult, stderr = measNode.execute(router2ToServerBandwidthCmd, quiet=True)
        router2Bandwidth = int(float(measResult))
    except Exception as e:
        print(f"Exception: {e}")
    
    #Because router 1 & 2 are configured to identical bandwidths we
    #artifically lower router 1's measured bandwidth to create a value
    #difference in forwarding
    router1Bandwidth = int(router1Bandwidth * 0.9)
    print(" Router 1 : " + str(int(router1Bandwidth / (1024*1024))) + " Mps / " + str(router1Connections) )
    print(" Router 2 : " + str(int(router2Bandwidth / (1024*1024))) + " Mps / " + str(router2Connections) )
    
    #
    # Review and update forwarding table before next ping set
    # This will be done by looking at bandwidth and dividing it by
    # the number of connections+1 (the +1 is to account for consumer 1)
    #
    #do not accidentally recount consumer 1 among the connections on router 1
    if( (1 == routerPath) and (router1Connections > 1) ):
        router1Connections -= 1
    router1EstBandwidth = router1Bandwidth / (router1Connections+1)
    #do not accidentally recount consumer 1 among the connections on router 2
    if( (2 == routerPath) and (router2Connections > 1) ):
        router2Connections -= 1
    router2EstBandwidth = router2Bandwidth / (router2Connections+1)
    #pick forwarding
    if( router1EstBandwidth > router2EstBandwidth ):
        print("  Forwarding through Router 1")
        #updates will be necessary if not already using router 1
        if( 1 != routerPath ):
            #disable link to router 2 and enable router 1
            con1Node.ip_route_del(subnet=IPv4Network("10.10.5.0/24"), gateway="10.10.2.20")
            con1Node.ip_route_add(subnet=IPv4Network("10.10.4.0/24"), gateway="10.10.1.10")
            routerPath = 1
    else:
        print("  Forwarding through Router 2")
        #updates will be necessary if not already using router 1
        if( 2 != routerPath ):
            #disable link to router 1 and enable router 2
            con1Node.ip_route_del(subnet=IPv4Network("10.10.4.0/24"), gateway="10.10.1.10")
            con1Node.ip_route_add(subnet=IPv4Network("10.10.5.0/24"), gateway="10.10.2.20")
            routerPath = 2
    
    #execute some pings on this path
    print("<<<<<PING CMDS>>>>>")
    try:
        stdout, stderr = con1Node.execute(f'ping -c 10 server')
    except Exception as e:
        print(f"Exception: {e}")
    print("===================")

print(" LOOPS FINISHED ")

#reconfigure to have router 1 disabled & router 2 enabled
#for clean repeats of this code
if( 2 != routerPath ):
    #disable link to router 1 and enable router 2
    con1Node.ip_route_del(subnet=IPv4Network("10.10.4.0/24"), gateway="10.10.1.10")
    con1Node.ip_route_add(subnet=IPv4Network("10.10.5.0/24"), gateway="10.10.2.20")
    routerPath = 2

## Route Switching (MANUAL DEBUG)

The following are steps that can be executed to support a manual debug of any anomolous behaviors that may occur when path switching. They are not part of the scope of the Notebook & can be skipped to proceed to the clean-up step.

In [None]:
slice.get_node(name="consumer1").ip_route_del(subnet=IPv4Network("10.10.5.0/24"), gateway="10.10.2.20")

In [None]:
slice.get_node(name="consumer1").ip_route_del(subnet=IPv4Network("10.10.4.0/24"), gateway="10.10.1.10")

In [None]:
slice.get_node(name="consumer1").ip_route_add(subnet=IPv4Network("10.10.5.0/24"), gateway="10.10.2.20")

In [None]:
slice.get_node(name="consumer1").ip_route_add(subnet=IPv4Network("10.10.4.0/24"), gateway="10.10.1.10")

In [None]:
#This debug section serves as a means to confirm
#route switching works without enabling all of MFLib
#(the output has been demonstrated to swap pathes as intended)

con1Node = slice.get_node(name="consumer1")

#initialize variables to use
routerPath = 2
loopCnt = 0
MAX_LOOPS = 10

#loop a specified number of times
while( loopCnt < MAX_LOOPS ):
    print("\nLoop " + str(loopCnt + 1) + "/" + str(MAX_LOOPS) )
    loopCnt += 1
    #Alternate forwarding path
    if( 2 == routerPath ):
        print("  Switching to Router 1")
        #disable link to router 2 and enable router 1
        con1Node.ip_route_del(subnet=IPv4Network("10.10.5.0/24"), gateway="10.10.2.20")
        con1Node.ip_route_add(subnet=IPv4Network("10.10.4.0/24"), gateway="10.10.1.10")
        routerPath = 1
    else:
        print("  Switching to Router 2")
        #disable link to router 1 and enable router 2
        con1Node.ip_route_del(subnet=IPv4Network("10.10.4.0/24"), gateway="10.10.1.10")
        con1Node.ip_route_add(subnet=IPv4Network("10.10.5.0/24"), gateway="10.10.2.20")
        routerPath = 2
    
    #execute some pings on this path
    print("<<<<<PING CMDS>>>>>")
    try:
        stdout, stderr = con1Node.execute(f'ping -c 3 server')
    except Exception as e:
        print(f"Exception: {e}")
    print("===================")

print(" LOOPS FINISHED ")

## Cleanup
Step to delete the slice

In [None]:
slice = fablib.get_slice(name=slice_name)
slice.delete()