From f34be22930cff2caef70fe3081dfa9e5c89c1d6e Mon Sep 17 00:00:00 2001 From: Pete Savage Date: Wed, 27 Nov 2019 09:46:00 +0000 Subject: [PATCH] Massive refactor to using the affinity framework for tests * Tests are now moved into pytest (as well they should be) * Greatly simplifies the GH actions run * Moves many more functions into the affinity framework * per node pinging * per node debug dot files * topo start now by default waits for stabalization --- .github/workflows/tester.yml | 50 +----------- test/perf/affinity.py | 143 ++++++++++++++++++++++++++++----- test/perf/node_utils.py | 124 ++-------------------------- test/perf/requirements.txt | 1 + test/perf/test_ping.py | 31 +++++++ test/perf/test_route.py | 61 ++++++++++++++ test/perf/topology-flat.yaml | 2 +- test/perf/topology-random.yaml | 2 +- test/perf/topology-tree.yaml | 2 +- test/perf/utils.py | 34 ++++++++ 10 files changed, 263 insertions(+), 187 deletions(-) create mode 100644 test/perf/test_ping.py create mode 100644 test/perf/test_route.py diff --git a/.github/workflows/tester.yml b/.github/workflows/tester.yml index a4ab17b1..14b47d6b 100644 --- a/.github/workflows/tester.yml +++ b/.github/workflows/tester.yml @@ -16,54 +16,12 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install python-dateutil + pip install python-dateutil pytest pip install . pip install -r ./test/perf/requirements.txt - - name: Perform perf test 1 - Random + - name: Perform integration tests run: | - python ./test/perf/node_utils.py file ./test/perf/topology-random.yaml& - python ./test/perf/node_utils.py dot-compare graph_controller.dot .last-topology-graph.dot --wait 40 - python ./test/perf/node_utils.py ping ./test/perf/topology-random.yaml --count 100 --validate 0.1 - python ./test/perf/node_utils.py check-stats .last-topology.yaml - kill `pidof python` - rm -Rf /tmp/receptor - rm graph_controller.dot - rm .last-topology-graph.dot - - name: Upload artifact for Perf - Random - uses: actions/upload-artifact@v1 - with: - name: RandomPerfTest - path: results.yaml - - name: Perform perf test 2 - Flat - run: | - python ./test/perf/node_utils.py file ./test/perf/topology-flat.yaml& - python ./test/perf/node_utils.py dot-compare graph_controller.dot .last-topology-graph.dot --wait 40 - python ./test/perf/node_utils.py ping ./test/perf/topology-flat.yaml --count 100 --validate 0.1 - python ./test/perf/node_utils.py check-stats .last-topology.yaml - kill `pidof python` - rm -Rf /tmp/receptor - rm graph_controller.dot - rm .last-topology-graph.dot - - name: Upload artifact for Perf - Flat - uses: actions/upload-artifact@v1 - with: - name: FlatPerfTest - path: results.yaml - - name: Perform perf test 3 - Tree - run: | - python ./test/perf/node_utils.py file ./test/perf/topology-tree.yaml& - python ./test/perf/node_utils.py dot-compare graph_controller.dot .last-topology-graph.dot --wait 40 - python ./test/perf/node_utils.py ping ./test/perf/topology-tree.yaml --count 100 --validate 0.1 - python ./test/perf/node_utils.py check-stats .last-topology.yaml - kill `pidof python` - rm -Rf /tmp/receptor - rm graph_controller.dot - rm .last-topology-graph.dot - - name: Upload artifact for Perf - Tree - uses: actions/upload-artifact@v1 - with: - name: TreePerfTest - path: results.yaml + pytest ./test/perf/ unit: name: Unit Tests runs-on: ubuntu-latest @@ -81,4 +39,4 @@ jobs: pip install . - name: pytest unit tests run: | - pytest + pytest test/integration diff --git a/test/perf/affinity.py b/test/perf/affinity.py index b175a0ed..99fb37aa 100644 --- a/test/perf/affinity.py +++ b/test/perf/affinity.py @@ -1,11 +1,18 @@ +import atexit +import os import random -from collections import defaultdict import subprocess -import attr -import atexit -from utils import random_port +import time import uuid +from collections import defaultdict +from test.perf.utils import random_port +from test.perf.utils import read_and_parse_dot +import signal + +import attr import yaml +from pyparsing import ParseException +from wait_for import wait_for procs = {} @@ -69,7 +76,7 @@ def _construct_run_command(self): for pnode in self.connections ] ) - st.extend(["-d", self.data_path, "--node-id", self.name, "node"]) + st.extend(["--debug", "-d", self.data_path, "--node-id", self.name, "node"]) st.extend([f"--listen-port={self.listen_port}", peer_string]) if self.stats_enable: @@ -78,12 +85,69 @@ def _construct_run_command(self): return st def start(self): - op = subprocess.Popen(" ".join(self._construct_run_command()), shell=True) + try: + os.remove(f"graph_{self.name}.dot") + os.sync() + except FileNotFoundError: + print(f"DIND'T FIND IT graph_{self.name}.dot") + print(f"{time.time()} starting {self.name}({self.uuid})") + op = subprocess.Popen(" ".join(self._construct_run_command()), shell=True, preexec_fn=os.setsid) procs[self.uuid] = op def stop(self): - print(f"killing {self.name}({self.uuid})") - procs[self.uuid].kill() + print(f"{time.time()} killing {self.name}({self.uuid})") + try: + os.killpg(os.getpgid(procs[self.uuid].pid), signal.SIGTERM) + except ProcessLookupError: + print("Couldn't kill the process {procs[self.uuid].pid}") + procs[self.uuid].wait() + print(f"Service was kill {procs[self.uuid].returncode}") + + def get_debug_dot(self): + try: + with open(f"graph_{self.name}.dot") as f: + dot_data = f.read() + #print(f"FILE FOUND: graph_{self.name}.dot") + return dot_data + except FileNotFoundError: + #print(f"FILE NOT FOUND: graph_{self.name}.dot") + return "" + + def validate_routes(self): + dot1 = self.get_debug_dot() + dot2 = self.topology.generate_dot() + if dot1 and dot2: + return self.topology.compare_dot(dot1, dot2) + else: + return False + + def ping(self, count): + socket_path = self.topology.find_controller()[0].socket_path + + if self.controller: + # TODO Remove this once a controller is pingable + return True + + starter = [ + "time", + "receptor", + "ping", + "--socket-path", + socket_path, + self.name, + "--count", + str(count), + ] + start = time.time() + op = subprocess.Popen(" ".join(starter), shell=True, stdout=subprocess.PIPE) + op.wait() + duration = time.time() - start + cmd_output = op.stdout.readlines() + print(cmd_output) + if b"Failed" in cmd_output[0]: + return "Failed" + else: + return duration / count @attr.s @@ -109,9 +173,7 @@ def remove_node(self, node_or_name): del self.nodes[node_name] @staticmethod - def generate_mesh( - controller_port, node_count, conn_method, profile=False, socket_path=None - ): + def generate_mesh(controller_port, node_count, conn_method, profile=False, socket_path=None): topology = Topology() topology.add_node( Node( @@ -170,7 +232,6 @@ def peer_function(*args): ) return topology - def dump_yaml(self, filename=".last-topology.yaml"): with open(filename, "w") as f: data = {"nodes": {}} @@ -192,19 +253,28 @@ def dump_yaml(self, filename=".last-topology.yaml"): def dump_dot(self, filename=".last-topology-graph.dot"): with open(filename, "w") as f: - f.write("graph {") - for node, node_data in self.nodes.items(): - for conn in node_data.connections: - f.write(f"{node} -- {conn}; ") - f.write("}") + f.write(self.generate_dot()) - def start(self): + def generate_dot(self): + dot_data = "graph {" + for node, node_data in self.nodes.items(): + for conn in node_data.connections: + dot_data += f"{node} -- {conn}; " + dot_data += "}" + return dot_data + + def start(self, wait=True): self.dump_yaml() self.dump_dot() for k, node in self.nodes.items(): node.start() + if wait: + wait_for(self.validate_all_node_routes, delay=6, num_sec=30) + #for name, node in self.nodes.items(): + # wait_for(lambda: node.validate_routes) + def stop(self): for k, node in self.nodes.items(): node.stop() @@ -212,7 +282,9 @@ def stop(self): @staticmethod def load_topology_from_file(filename): - data = yaml.safe_load(filename) + with open(filename) as f: + data = yaml.safe_load(f) + topology = Topology() for node_name, definition in data["nodes"].items(): node = Node.create_from_config(definition) @@ -222,3 +294,36 @@ def load_topology_from_file(filename): def find_controller(self): return list(filter(lambda o: o.controller, self.nodes.values())) + + def ping(self, count=10, socket_path=None): + results = {} + for _, node in self.nodes.items(): + results[node.name] = node.ping(count) + return results + + @staticmethod + def validate_ping_results(results, threshold=0.1): + valid = True + for node in results: + print(f"Asserting node {node} was under {threshold} threshold") + print(f" {results[node]}") + if results[node] == "Failed" or float(results[node]) > float(threshold): + valid = False + return valid + + @staticmethod + def compare_dot(dot1, dot2): + try: + ds1 = read_and_parse_dot(dot1) + ds2 = read_and_parse_dot(dot2) + if ds1 != ds2: + print(f"MATCH FAIL") + print(ds1) + print(ds2) + return False + return True + except ParseException: + return False + + def validate_all_node_routes(self): + return all(node.validate_routes() for _, node in self.nodes.items()) diff --git a/test/perf/node_utils.py b/test/perf/node_utils.py index bb15e26f..ab060d9b 100644 --- a/test/perf/node_utils.py +++ b/test/perf/node_utils.py @@ -1,27 +1,10 @@ - -import logging -import random - -import subprocess import sys -import time -from affinity import Topology, Node - -from collections import defaultdict +from test.perf.affinity import Topology from time import sleep - import click import requests -import yaml from prometheus_client.parser import text_string_to_metric_families -from pyparsing import alphanums -from pyparsing import Group -from pyparsing import OneOrMore -from pyparsing import ParseException -from pyparsing import Suppress -from pyparsing import Word - DEBUG = False @@ -34,25 +17,6 @@ ) -class Conn: - def __init__(self, a, b): - self.a = a - self.b = b - - def __eq__(self, other): - if self.a == other.a and self.b == other.b or self.a == other.b and self.b == other.a: - return True - return False - - def __hash__(self): - enps = [self.a, self.b] - sorted_enps = sorted(enps) - return hash(tuple(sorted_enps)) - - def __repr__(self): - return f"{self.a} -- {self.b}" - - def do_loop(topology): topology.start() try: @@ -97,9 +61,7 @@ def flat(controller_port, node_count, debug, profile, socket_path): global DEBUG DEBUG = True - topology = Topology.generate_flat_mesh( - controller_port, node_count, profile, socket_path - ) + topology = Topology.generate_flat_mesh(controller_port, node_count, profile, socket_path) print(topology) do_loop(topology) @@ -119,85 +81,9 @@ def file(filename, debug): @click.argument("filename", type=click.File("r")) def ping(filename, count, validate, socket_path): topology = Topology.load_topology_from_file(filename) - if not socket_path: - socket_path = topology.find_controller()[0].socket_path - - results = {} - for name, node in topology.nodes.items(): - starter = [ - "time", - "receptor", - "ping", - "--socket-path", - socket_path, - node.name, - "--count", - str(count), - ] - start = time.time() - op = subprocess.Popen(" ".join(starter), shell=True, stdout=subprocess.PIPE) - op.wait() - duration = time.time() - start - cmd_output = op.stdout.readlines() - print(cmd_output) - if b"Failed" in cmd_output[0]: - results[node.name] = "Failed" - else: - results[node.name] = duration / count - with open("results.yaml", "w") as f: - yaml.dump(results, f) - if validate: - valid = True - for node in results: - if topology.nodes[node].controller: - continue - print(f"Asserting node {node} was under {validate} threshold") - print(f" {results[node]}") - if results[node] == "Failed" or float(results[node]) > float(validate): - valid = False - print(" FAILED!") - else: - print(" PASSED!") - if not valid: - sys.exit(127) - - -def read_and_parse_dot(filename): - group = Group(Word(alphanums) + Suppress("--") + Word(alphanums)) + Suppress(";") - dot = Suppress("graph {") + OneOrMore(group) + Suppress("}") - - with open(filename) as f: - raw_data = f.read() - data = dot.parseString(raw_data).asList() - return {Conn(c[0], c[1]) for c in data} - - -@main.command("dot-compare") -@click.option("--wait", default=None) -@click.argument("filename_one") -@click.argument("filename_two") -def dot_compare(filename_one, filename_two, wait): - - if wait: - start = time.time() - while True: - try: - assert read_and_parse_dot(filename_one) == read_and_parse_dot(filename_two) - sys.stderr.write("Matched\n") - sys.exit(0) - except (AssertionError, ParseException, FileNotFoundError) as e: - if time.time() < start + float(wait): - time.sleep(1) - else: - sys.stderr.write("Failed match\n") - raise e - else: - try: - assert read_and_parse_dot(filename_one) == read_and_parse_dot(filename_two) - sys.stderr.write("Matched\n") - except AssertionError: - sys.stderr.write("Failed match\n") - sys.exit(127) + + results = topology.ping(count, socket_path=socket_path) + Topology.validate_ping_results(results, validate) @main.command("check-stats") diff --git a/test/perf/requirements.txt b/test/perf/requirements.txt index e95e9ab8..f7e314ed 100644 --- a/test/perf/requirements.txt +++ b/test/perf/requirements.txt @@ -3,3 +3,4 @@ click pyparsing pyyaml requests +wait_for diff --git a/test/perf/test_ping.py b/test/perf/test_ping.py new file mode 100644 index 00000000..5e11b538 --- /dev/null +++ b/test/perf/test_ping.py @@ -0,0 +1,31 @@ +from test.perf.affinity import Topology +from wait_for import TimedOutError +import time +import pytest + + +@pytest.yield_fixture( + scope="function", + params=[ + "test/perf/topology-flat.yaml", + "test/perf/topology-tree.yaml", + "test/perf/topology-random.yaml", + ], + ids=["flat", "tree", "random"], +) +def topology(request): + topo = Topology.load_topology_from_file(request.param) + try: + topo.start(wait=True) + yield topo + except TimedOutError: + raise + finally: + print(f"{time.time()} - Stopping current topo") + print(topo.nodes['controller']) + topo.stop() + + +def test_pings_perf(topology): + results = topology.ping() + topology.validate_ping_results(results) diff --git a/test/perf/test_route.py b/test/perf/test_route.py new file mode 100644 index 00000000..8635ee49 --- /dev/null +++ b/test/perf/test_route.py @@ -0,0 +1,61 @@ +from test.perf.affinity import Node +from test.perf.affinity import Topology + +import pytest +from wait_for import wait_for, TimedOutError + + +@pytest.fixture(scope="function") +def random_topology(): + topo = Topology.load_topology_from_file("test/perf/topology-random.yaml") + try: + topo.start(wait=True) + yield topo + except TimedOutError: + pass + finally: + topo.stop() + + +@pytest.fixture(scope="function") +def tree_topology(): + topo = Topology.load_topology_from_file("test/perf/topology-tree.yaml") + try: + topo.start(wait=True) + yield topo + except TimedOutError: + pass + finally: + topo.stop() + + +def test_default_routes_validate(random_topology): + assert random_topology.validate_all_node_routes() + + +def test_add_remove_node(random_topology): + nodeX = Node("nodeX", connections=["controller"]) + random_topology.add_node(nodeX) + nodeX.start() + wait_for(random_topology.validate_all_node_routes, num_sec=30) + assert nodeX.ping(1) != "Failed" + assert "nodeX" in random_topology.nodes["controller"].get_debug_dot() + assert random_topology.validate_all_node_routes() + + +def test_alternative_route(tree_topology): + nodeX = Node("nodeX", connections=["node4", "node3"]) + tree_topology.add_node(nodeX) + nodeX.start() + wait_for(tree_topology.validate_all_node_routes, num_sec=30) + assert nodeX.ping(1) != "Failed" + assert "nodeX" in tree_topology.nodes["controller"].get_debug_dot() + assert tree_topology.validate_all_node_routes() + tree_topology.nodes["node3"].stop() + # TODO make ping return quicker if it can't ping then reenable to ensure node3 is dead + # assert tree_topology.nodes['node3'].ping() != "Failed" + assert nodeX.ping(1) != "Failed" + tree_topology.nodes["node3"].start() + wait_for(tree_topology.validate_all_node_routes, num_sec=30) + tree_topology.nodes["node4"].stop() + assert nodeX.ping(1) != "Failed" diff --git a/test/perf/topology-flat.yaml b/test/perf/topology-flat.yaml index 399d917e..6454daec 100644 --- a/test/perf/topology-flat.yaml +++ b/test/perf/topology-flat.yaml @@ -11,7 +11,7 @@ nodes: connections: - controller controller: false - listen_port: null + listen_port: 11111 name: node1 stats_enable: true stats_port: null diff --git a/test/perf/topology-random.yaml b/test/perf/topology-random.yaml index 6d6cb205..c8e62408 100644 --- a/test/perf/topology-random.yaml +++ b/test/perf/topology-random.yaml @@ -11,7 +11,7 @@ nodes: connections: - controller controller: false - listen_port: null + listen_port: 11111 name: node1 stats_enable: true stats_port: null diff --git a/test/perf/topology-tree.yaml b/test/perf/topology-tree.yaml index ee22d550..71272169 100644 --- a/test/perf/topology-tree.yaml +++ b/test/perf/topology-tree.yaml @@ -11,7 +11,7 @@ nodes: connections: - controller controller: false - listen_port: null + listen_port: 11111 name: node1 stats_enable: true stats_port: null diff --git a/test/perf/utils.py b/test/perf/utils.py index 0bfe6d74..f847a03b 100644 --- a/test/perf/utils.py +++ b/test/perf/utils.py @@ -1,5 +1,39 @@ import socket +from pyparsing import alphanums +from pyparsing import Group +from pyparsing import OneOrMore +from pyparsing import Suppress +from pyparsing import Word + + +class Conn: + def __init__(self, a, b): + self.a = a + self.b = b + + def __eq__(self, other): + if self.a == other.a and self.b == other.b or self.a == other.b and self.b == other.a: + return True + return False + + def __hash__(self): + enps = [self.a, self.b] + sorted_enps = sorted(enps) + return hash(tuple(sorted_enps)) + + def __repr__(self): + return f"{self.a} -- {self.b}" + + +def read_and_parse_dot(raw_data): + group = Group(Word(alphanums) + Suppress("--") + Word(alphanums)) + Suppress(";") + dot = Suppress("graph {") + OneOrMore(group) + Suppress("}") + + data = dot.parseString(raw_data).asList() + return {Conn(c[0], c[1]) for c in data} + + def random_port(tcp=True): """Get a random port number for making a socket