In [5]:
import requests
from collections import defaultdict
import lxml.html
from datetime import date, timedelta

def group(lst, key, value=None):
    result = defaultdict(list)
    for i in lst:
        result[key(i) if callable(key) else i[key]].append(value(i) if callable(value) else i[value] if value else i)
    return result

def scrape_cve(cve_id):
    data_ids = ["vuln-description", "vuln-cvss3-panel-score"]
    nvd_cve_url = f"https://nvd.nist.gov/vuln/detail/{cve_id}"
    cve_doc = lxml.html.fromstring(requests.get(nvd_cve_url).content)
    extracted = {id: cve_doc.xpath(f"//*[@data-testid='{id}']/text()")[0] for id in data_ids}
    return {
        "id": cve_id,
        "urls": [nvd_cve_url, microsoft_cve_url_prefix+cve_id],
        "description": extracted["vuln-description"],
        "aka": re.findall(", aka '(.*)'", extracted["vuln-description"])[0],
        "cvss_score": float(re.match("(\d+\.\d+).*", extracted["vuln-cvss3-panel-score"]).group(1))
    }

cvss_severity_colors = {
    "None": None,
    "Low": "green",
    "Medium": "#FFA53F",
    "High": "#FF4F4F",
    "Critical": "mediumorchid"  
}
cvss_severity_matrix = [
    ("None", 0.0, 0.0),
    ("Low", 0.1, 3.9),
    ("Medium", 4.0, 6.9),
    ("High", 7.0, 8.9),
    ("Critical", 9.0, 10.0)
]

explode_severities = {
    "None": 0.1,
    "Low": 0.1,
    "Medium": 0.1,
    "High": 0.1,
    "Critical": 0.2
}

def cvss_severity(score):
    for severity, from_score, to_score in cvss_severity_matrix:
        if int(from_score*10) <= int(score*10) <= int(to_score*10):
            return severity
        
def months_backwards(months, start_date=date.today()):
    for i in range(months):
        start_date = start_date.replace(day=1)
        yield start_date
        start_date -= timedelta(days=1)

In [None]:
from datetime import datetime

def parse_text_content(texts):
    return " ".join(map(str.strip, texts)).strip()


def parse_text(element):
    return parse_text_content(element.xpath(".//text()"))


def parse_date(cell):
    return datetime.strptime(parse_text(cell), "%m/%d/%Y")


def parse_table(table, parsers):
    header = list(map(parse_text, table.xpath("//tr/th")))
    if table.xpath("tbody/tr/th"):  # Headers in tbody
        rows = table.xpath("tbody/tr[position() > 1]")
    else:
        rows = table.xpath("tbody/tr")
        if not rows:  # Missing tbody
            rows = table.xpath("tr")
    return [
        {
            k: parsers.get(k, parse_text)(c)
            for k, c in
            zip(header, r)
        }
        for r in
        rows
    ]