In [None]:
# endpoints.txt  (format: "<name> <hostname> <path>")
# auth auth.example.com /health
# users users.example.com /health
# orders orders.example.com /health
# cdn cdn.example.com /health
# search search.example.com /health
# billing billing.example.com /health
# profile profile.example.com /health
# admin admin.example.com /health
# reports reports.example.com /health
# metrics metrics.example.com /health


# Assume the real world results are:
# auth -> 200
# users -> timeout
# orders -> 503
# cdn -> DNS failure
# search -> 301 (redirect to /healthz) [treat as OK for this exercise]
# billing -> 200
# profile -> 500
# admin -> SSL error
# reports -> 200
# metrics -> 200
#
# With threshold_fail_pct=30 (>= 30% => ALERT):
# failing endpoints = users, orders, cdn, profile, admin  => 5 failing out of 10 = 50%
#
# Expected output:
# ALERT: failing=5 total=10 fail_pct=50
# - users reason=timeout
# - orders reason=http_503
# - cdn reason=dns
# - profile reason=http_500
# - admin reason=ssl

In [None]:
#USE THIS OF REVISION #NO SOLUTION FOR THIS BELOW.

import sys
import urllib.request
import urllib.error
import socket
import ssl

def classify_failure(exc):
    # returns one of: "timeout", "dns", "ssl", "http_5xx", "http_4xx", "other"
    msg = str(exc).lower()
    if "timed out" in msg:
        return "timeout"
    if "name or service not known" in msg or "nodename nor servname provided" in msg:
        return "dns"
    if "ssl" in msg:
        return "ssl"
    return "other"

def check_one(hostname, path="/health", timeout="2"):
    """
    returns: (ok: bool, reason: str)
    reason: "ok", "timeout", "dns", "ssl", "http_XXX", "other"
    """
    url = "https://" + hostname + path
    try:
        resp = urllib.request.urlopen(url, timeout=timeout)
        code = resp.getcode()
        # treat 2xx as OK only
        if 200 <= code < 300:
            return True, "ok"
        return False, "http_" + str(code)
    except urllib.error.HTTPError as e:
        # HTTPError is also an exception but has .code
        if e.code >= 500:
            return False, "http_5xx"
        return False, "http_4xx"
    except Exception as e:
        return False, classify_failure(e)

def parse_targets(lines):
    targets = []
    for line in lines:
        line = line.strip()
        if not line:
            continue
        name, host, path = line.split()
        targets.append((name, host, path))
    return targets

def main(argv):
    if len(argv) != 2:
        raise ValueError("usage: netmon.py <endpoints_file> <threshold_fail_pct>")

    path = argv[0]
    threshold_pct = argv[1]

    with open(path) as f:
        targets = parse_targets(f)

    failing = []
    for name, host, p in targets:
        ok, reason = check_one(host, p)
        if not ok and reason != "http_4xx":
            failing.append((name, reason))

    total = len(targets)
    fail_pct = (len(failing) / total) * 100

    if fail_pct > threshold_pct:
        print(f"ALERT: failing={len(failing)} total={total} fail_pct={fail_pct}")
        for name, reason in failing:
            print(f"- {name} reason={reason}")
    else:
        print(f"OK: failing={len(failing)} total={total} fail_pct={fail_pct}")

if __name__ == "__main__":
    main(sys.argv[1:])