Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 47 additions & 25 deletions sig-security-tooling/cve-feed/hack/fetch-official-cve-feed.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,31 @@

import copy
import json
import re
import requests
from datetime import datetime

# CVE ID Format: CVE-YYYY-NNNN+ (NNNN+ at least 4 digits)
CVE_ID_PATTERN = r"CVE-\d{4}-\d{4,}"

# Match leading CVEs with optional separators (anchored to start, using ^)
LEADING_CVE_BLOCK_PATTERN = rf"^(?:{CVE_ID_PATTERN}[\s,:-]*)+"

def parse_cve_title(title: str):
match = re.match(LEADING_CVE_BLOCK_PATTERN, title)
if not match:
raise LookupError(f"Title does not start with CVE block: {title}")

leading_cve_block = match.group(0)
# Extract CVEs only from that leading block - handling issues which contain other CVE' references
# ex: CVE-2019-11249: Incomplete fixes for CVE-2019-1002101 and CVE-2019-11246, kubectl [...]
# ref: https://github.com/kubernetes/kubernetes/issues/80984
cve_ids = re.findall(CVE_ID_PATTERN, leading_cve_block)

# Remove the leading CVE block from the title to get the description
description = re.sub(LEADING_CVE_BLOCK_PATTERN, "", title).strip()
return cve_ids, description

def getCVEStatus(state, state_reason):
if state == "open":
if state_reason == "reopened":
Expand Down Expand Up @@ -68,6 +90,7 @@ def getCVEStatus(state, state_reason):
feed_envelope['_kubernetes_io'] = root_kubernetes_io

cve_list = []
non_parsable_cve_list = []
for item in gh_items:
# These keys respects the item jsonfeed spec https://www.jsonfeed.org/version/1.1/
cve = {'content_text': None, 'date_published': None, 'external_url': None,
Expand All @@ -81,32 +104,31 @@ def getCVEStatus(state, state_reason):
cve['content_text'] = item['body']
cve['date_published'] = item['created_at']
cve['status'] = getCVEStatus(item['state'], item['state_reason'])
# This is because some CVEs were titled "CVE-XXXX-XXXX - Something" instead of
# "CVE-XXXX-XXXX: Something" on GitHub (see https://github.com/kubernetes/kubernetes/issues/60813).
title = item['title'].replace(' -', ':')
# This splits the CVE into its ID and the description/name, however some are in the following forms:
# - CVE-2019-11245: v1.14.2, v1.13.6: container uid [...] (see https://github.com/kubernetes/kubernetes/issues/78308)
# - CVE-2019-11250: TOB-K8S-001: Bearer tokens [...] (see https://github.com/kubernetes/kubernetes/issues/81114)
# We don't know if there are going to be version numbers and/or vendor IDs but the description should be last.
title = title.split(': ')
if len(title) > 0:
cve['summary'] = title[-1]
if len(title) > 1:
cve_ids = [cve_id.strip() for cve_id in title[0].split(',')]
first_cve_id = cve_ids[0]
cve['id'] = first_cve_id
cve['external_url'] = f'https://www.cve.org/cverecord?id={first_cve_id}'
cve['_kubernetes_io']['google_group_url'] = f'https://groups.google.com/g/kubernetes-announce/search?q={first_cve_id}'

# Add additional entries for any remaining CVE IDs
for additional_cve_id in cve_ids[1:]:
additional_cve = copy.deepcopy(cve)
additional_cve['id'] = additional_cve_id
additional_cve['external_url'] = f'https://www.cve.org/cverecord?id={additional_cve_id}'
additional_cve['_kubernetes_io']['google_group_url'] = f'https://groups.google.com/g/kubernetes-announce/search?q={additional_cve_id}'
cve_list.append(additional_cve)
cve_list.append(cve)

try:
cve_ids, description = parse_cve_title(item['title'])
cve['summary'] = description

first_cve_id = cve_ids[0]
cve['id'] = first_cve_id
cve['external_url'] = f'https://www.cve.org/cverecord?id={first_cve_id}'
cve['_kubernetes_io']['google_group_url'] = f'https://groups.google.com/g/kubernetes-announce/search?q={first_cve_id}'

# Add additional entries for any remaining CVE IDs
for additional_cve_id in cve_ids[1:]:
additional_cve = copy.deepcopy(cve)
additional_cve['id'] = additional_cve_id
additional_cve['external_url'] = f'https://www.cve.org/cverecord?id={additional_cve_id}'
additional_cve['_kubernetes_io']['google_group_url'] = f'https://groups.google.com/g/kubernetes-announce/search?q={additional_cve_id}'
cve_list.append(additional_cve)

cve_list.append(cve)
except LookupError:
non_parsable_cve_list.append(item['title'])

feed_envelope['items'] = cve_list
json_feed = json.dumps(feed_envelope, sort_keys=False, indent=4)
print(json_feed)

if len(non_parsable_cve_list) != 0:
exit(7)