In [19]:
import requests
import lxml.html

def requests_get(url, func, **kwargs):
    resp = requests.get(url, **kwargs)
    if resp.content:
        return func(resp)

def get_html(url, **kwargs):
    return requests_get(url, lambda resp: lxml.html.fromstring(resp.content), **kwargs)

def get_json(url, **kwargs):
    return requests_get(url, lambda resp: resp.json(), **kwargs)

In [17]:
from collections import defaultdict
from datetime import date, timedelta

def group(lst, key, value=None):
    result = defaultdict(list)
    for i in lst:
        result[key(i) if callable(key) else i[key]].append(value(i) if callable(value) else i[value] if value else i)
    return result

def scrape_cve(cve_id):
    data_ids = ["vuln-description", "vuln-cvss3-panel-score"]
    nvd_cve_url = f"https://nvd.nist.gov/vuln/detail/{cve_id}"
    cve_doc = get_html(nvd_cve_url)
    extracted = {id: cve_doc.xpath(f"//*[@data-testid='{id}']/text()")[0] for id in data_ids}
    return {
        "id": cve_id,
        "urls": [nvd_cve_url, microsoft_cve_url_prefix+cve_id],
        "description": extracted["vuln-description"],
        "aka": re.findall(", aka '(.*)'", extracted["vuln-description"])[0],
        "cvss_score": float(re.match("(\d+\.\d+).*", extracted["vuln-cvss3-panel-score"]).group(1))
    }

cvss_severity_colors = {
    "None": None,
    "Low": "green",
    "Medium": "#FFA53F",
    "High": "#FF4F4F",
    "Critical": "mediumorchid"  
}
cvss_severity_matrix = [
    ("None", 0.0, 0.0),
    ("Low", 0.1, 3.9),
    ("Medium", 4.0, 6.9),
    ("High", 7.0, 8.9),
    ("Critical", 9.0, 10.0)
]

explode_severities = {
    "None": 0.1,
    "Low": 0.1,
    "Medium": 0.1,
    "High": 0.1,
    "Critical": 0.2
}

def cvss_severity(score):
    for severity, from_score, to_score in cvss_severity_matrix:
        if int(from_score*10) <= int(score*10) <= int(to_score*10):
            return severity
        
def months_backwards(months, start_date=date.today()):
    for i in range(months):
        start_date = start_date.replace(day=1)
        yield start_date
        start_date -= timedelta(days=1)

In [3]:
from datetime import datetime

def parse_text_content(texts):
    return " ".join(map(str.strip, texts)).strip()


def parse_text(element):
    return parse_text_content(element.xpath(".//text()"))


def parse_date(cell):
    return datetime.strptime(parse_text(cell), "%m/%d/%Y")

def bisect(lst, predicate):
    a = []
    b = []
    for i in lst:
        (a if predicate(i) else b).append(i)
    return a,b


def bisect(lst, predicate):
    a = []
    b = []
    for i in lst:
        (a if predicate(i) else b).append(i)
    return a, b


def transpose(matrix):
    result = []
    rows = len(matrix)
    columns_counts = [len(r) for r in matrix]
    if max(columns_counts) != min(columns_counts):
        raise Exception("This is not a matrix - it has varying number of columns")
    columns = max(columns_counts)
    result = [[None for _ in range(rows)] for _ in range(columns)]
    for r, row in enumerate(matrix):
        for c, column in enumerate(row):
            result[c][r] = column
    return result


def count(lst, item):
    count = 0
    for i in lst:
        if i == item:
            count += 1
        else:
            break
    return count


def rstrip(lst, item):
    rcount = count(reversed(lst), item)
    return lst[:-rcount] if rcount else lst


def parse_table(table, parsers=None):
    def get_parser(path):
         return (parsers or {}).get(path[0] if len(path) == 1 else tuple(path), parse_text)
        
    if len(table) == 2 and table[0].tag == "thead" and table[1].tag == "tbody":
        # Proper table with thead and tbody
        header_rows = list(table[0])
        body_rows = list(table[1])
    else:
        if len(table) == 1 and table[0].tag == "tbody":
            # Only tbody - weird, but some people apparently do that
            rows = list(table[0])
        elif all(i.tag == "tr" for i in table):
            # Old-style table, just rows
            rows = list(table)
        else:
            raise Exception("No idea what's going on with this table")
        header_rows, body_rows = bisect(rows, lambda tr: all(i.tag == "th" for i in tr))
    column_count = max([sum(map(int, [th.attrib.get("colspan", 1) for th in tr])) for tr in header_rows])
    header_matrix = [[None for _ in range(column_count)] for _ in header_rows]

    class Empty(object):
        pass

    empty = Empty()

    for r, header_row in enumerate(header_rows):
        c = 0
        for th in header_row:
            while c < len(header_matrix[r]) and header_matrix[r][c]:
                c += 1
            if c < len(header_matrix[r]):
                rowspan = int(th.attrib.get("rowspan", 1))
                colspan = int(th.attrib.get("colspan", 1))
                text = parse_text(th)
                header_matrix[r][c] = text
                for rr in range(r + 1, r + rowspan):
                    header_matrix[rr][c] = empty
                for cc in range(c + 1, c + colspan):
                    header_matrix[r][cc] = text
    paths = [rstrip(i, empty) for i in transpose(header_matrix)]

    result_rows = []
    for tr in body_rows:
        result_row = {}
        for c, td in enumerate(tr):
            current_dict = result_row
            for p in paths[c][:-1]:
                if p not in current_dict:
                    current_dict[p] = {}
                current_dict = current_dict[p]
            current_dict[paths[c][-1]] = get_parser(paths[c])(td)
        result_rows.append(result_row)

    return result_rows

In [22]:
from itertools import chain

def table(rows, columns=None):
    if not columns:
        columns = [{"key": i, "title": i} for i in set(chain.from_iterable([i.keys() for i in rows]))]        
    def table_cell_attrs(column, row=None):
        attrs = {}
        if "attrs" in column:
            for k, v in column["attrs"].items():
                if callable(v):
                    if row:
                        attrs[k] = v(row)
                else:
                     attrs[k] = v    
            if attrs:
                return " " + " ".join(f'{k}="{v}"' for k, v in attrs.items())
        return ""
    def table_header_cells():
        return [f"<th{table_cell_attrs(column)}>{column['title']}</th>" for column in columns]
    def table_body_cells(row):
        def cell(column):
            processor = column.get('processor', lambda x:x)
            key = column['key']
            value = row if not key else (key(row) if callable(key) else row[key])
            return f"<td{table_cell_attrs(column,row)}>{processor(value)}</td>"
        return map(cell, columns)
    def table_row(cells):
        return f"<tr>{''.join(cells)}</tr>"
    return f"""<table>
    <thead>{table_row(table_header_cells())}</thead>
    <tbody>{''.join(table_row(table_body_cells(row)) for row in rows)}</tbody>
</table>"""

In [15]:
import matplotlib.pyplot as plotter

def show_severity_breakdown(grouped_data):
    severities = list(reversed([i[0] for i in cvss_severity_matrix if i[0] in grouped_data]))
    severity_counts = [len(grouped_data.get(k,[])) for k in severities]

    figureObject, axesObject = plotter.subplots()
    pie=axesObject.pie(severity_counts, 
            explode=[explode_severities[k] for k in severities],
            labels=severities,
            autopct='%1.2f',
            colors=[cvss_severity_colors[k] for k in severities],
            startangle=90)
    axesObject.axis('equal')
    plotter.title('Vulnerability Severity Distribution%')
    plotter.show()

    figureObject, axesObject = plotter.subplots()
    barlist = axesObject.barh(severities,
            severity_counts,
            0.8,
            alpha = 0.7,
            color = 'blue',
            label = 'Vulns')
    for i, s in enumerate(severities):
        barlist[i].set_color(cvss_severity_colors[s])
    axesObject.set_xlabel('Vuln Count')
    plotter.title('No. Vulnerabilities By Severity')
    plotter.show()