diff --git a/sig-security-tooling/cve-feed/hack/fetch-official-cve-feed.py b/sig-security-tooling/cve-feed/hack/fetch-official-cve-feed.py index b708e81..db541d1 100755 --- a/sig-security-tooling/cve-feed/hack/fetch-official-cve-feed.py +++ b/sig-security-tooling/cve-feed/hack/fetch-official-cve-feed.py @@ -16,9 +16,31 @@ import copy import json +import re import requests from datetime import datetime +# CVE ID Format: CVE-YYYY-NNNN+ (NNNN+ at least 4 digits) +CVE_ID_PATTERN = r"CVE-\d{4}-\d{4,}" + +# Match leading CVEs with optional separators (anchored to start, using ^) +LEADING_CVE_BLOCK_PATTERN = rf"^(?:{CVE_ID_PATTERN}[\s,:-]*)+" + +def parse_cve_title(title: str): + match = re.match(LEADING_CVE_BLOCK_PATTERN, title) + if not match: + raise LookupError(f"Title does not start with CVE block: {title}") + + leading_cve_block = match.group(0) + # Extract CVEs only from that leading block - handling issues which contain other CVE' references + # ex: CVE-2019-11249: Incomplete fixes for CVE-2019-1002101 and CVE-2019-11246, kubectl [...] + # ref: https://github.com/kubernetes/kubernetes/issues/80984 + cve_ids = re.findall(CVE_ID_PATTERN, leading_cve_block) + + # Remove the leading CVE block from the title to get the description + description = re.sub(LEADING_CVE_BLOCK_PATTERN, "", title).strip() + return cve_ids, description + def getCVEStatus(state, state_reason): if state == "open": if state_reason == "reopened": @@ -68,6 +90,7 @@ def getCVEStatus(state, state_reason): feed_envelope['_kubernetes_io'] = root_kubernetes_io cve_list = [] +non_parsable_cve_list = [] for item in gh_items: # These keys respects the item jsonfeed spec https://www.jsonfeed.org/version/1.1/ cve = {'content_text': None, 'date_published': None, 'external_url': None, @@ -81,32 +104,31 @@ def getCVEStatus(state, state_reason): cve['content_text'] = item['body'] cve['date_published'] = item['created_at'] cve['status'] = getCVEStatus(item['state'], item['state_reason']) - # This is because some CVEs were titled "CVE-XXXX-XXXX - Something" instead of - # "CVE-XXXX-XXXX: Something" on GitHub (see https://github.com/kubernetes/kubernetes/issues/60813). - title = item['title'].replace(' -', ':') - # This splits the CVE into its ID and the description/name, however some are in the following forms: - # - CVE-2019-11245: v1.14.2, v1.13.6: container uid [...] (see https://github.com/kubernetes/kubernetes/issues/78308) - # - CVE-2019-11250: TOB-K8S-001: Bearer tokens [...] (see https://github.com/kubernetes/kubernetes/issues/81114) - # We don't know if there are going to be version numbers and/or vendor IDs but the description should be last. - title = title.split(': ') - if len(title) > 0: - cve['summary'] = title[-1] - if len(title) > 1: - cve_ids = [cve_id.strip() for cve_id in title[0].split(',')] - first_cve_id = cve_ids[0] - cve['id'] = first_cve_id - cve['external_url'] = f'https://www.cve.org/cverecord?id={first_cve_id}' - cve['_kubernetes_io']['google_group_url'] = f'https://groups.google.com/g/kubernetes-announce/search?q={first_cve_id}' - - # Add additional entries for any remaining CVE IDs - for additional_cve_id in cve_ids[1:]: - additional_cve = copy.deepcopy(cve) - additional_cve['id'] = additional_cve_id - additional_cve['external_url'] = f'https://www.cve.org/cverecord?id={additional_cve_id}' - additional_cve['_kubernetes_io']['google_group_url'] = f'https://groups.google.com/g/kubernetes-announce/search?q={additional_cve_id}' - cve_list.append(additional_cve) - cve_list.append(cve) + + try: + cve_ids, description = parse_cve_title(item['title']) + cve['summary'] = description + + first_cve_id = cve_ids[0] + cve['id'] = first_cve_id + cve['external_url'] = f'https://www.cve.org/cverecord?id={first_cve_id}' + cve['_kubernetes_io']['google_group_url'] = f'https://groups.google.com/g/kubernetes-announce/search?q={first_cve_id}' + + # Add additional entries for any remaining CVE IDs + for additional_cve_id in cve_ids[1:]: + additional_cve = copy.deepcopy(cve) + additional_cve['id'] = additional_cve_id + additional_cve['external_url'] = f'https://www.cve.org/cverecord?id={additional_cve_id}' + additional_cve['_kubernetes_io']['google_group_url'] = f'https://groups.google.com/g/kubernetes-announce/search?q={additional_cve_id}' + cve_list.append(additional_cve) + + cve_list.append(cve) + except LookupError: + non_parsable_cve_list.append(item['title']) feed_envelope['items'] = cve_list json_feed = json.dumps(feed_envelope, sort_keys=False, indent=4) print(json_feed) + +if len(non_parsable_cve_list) != 0: + exit(7)