In [1]:
import uuid
import re
import json
import requests
from bs4 import BeautifulSoup

In [2]:
# получаем html код страницы, если она доступна, иначе Error
def get_html_code(url: str) -> str:
    headers = {
      'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36'
    }
    try:
        req = requests.get(url, headers=headers)
        # The HTTP 200 OK success status response code indicates that the request has succeeded.
        if req.status_code == 200:
            return req.text
        else:
            return "Error"
        
    except Exception as e:
        print(e)
        return "Error"

# переводим html код в объект soup класса BeautifulSoup
def html_to_soup(html: str) -> BeautifulSoup:
    soup = BeautifulSoup(html, "html.parser")
    return soup

# вытаскивает из текста совпадения, используя регулярные выражения
def find_by_regex(text: str, regular_expression: re.Pattern) -> list[str]:
    matches = re.findall(regular_expression, text)
    matches = list(set(matches))
    return matches

# обогащает CVE описанием с mitre.org
def enrich_cve(cve: str) -> str:
    try:
        mitre_url = "https://cve.mitre.org/cgi-bin/cvename.cgi?name="
        cve_url = mitre_url + cve

        html_code = get_html_code(cve_url)
        soup = html_to_soup(html_code)

        description = soup.find("th", string="Description")
        description = description.find_next("td", colspan="2").text
    except Exception as e:
        print(e)
        return "Error"

    return description

# записывает данные в json файл
def write_to_json(filename: str, data: list[dict]):
    with open(filename, 'w') as f:
        json.dump(data,
                  f,
                  indent=4,
                  ensure_ascii=True)

# читает данные из json файла
def read_from_json(filename: str):
    with open(filename, 'r') as f:
        data = json.load(f)
    return data

In [3]:
adobe_url = "https://helpx.adobe.com/security/Home.html"
html_code = get_html_code(adobe_url)
soup = html_to_soup(html_code)

table = soup.find_all("table")[0]

final_data = []

for link in table.find_all("a"):
  information_link = "https://helpx.adobe.com" + link['href']
  product = link.text

  html_code = get_html_code(information_link)
  soup = html_to_soup(html_code)
  clean_text = soup.text

  CVEs = find_by_regex(clean_text, r"CVE-\d{4}-\d{2,7}")

  

  dictionary = {
    "id": str(uuid.uuid4()),
    "href": information_link,
    "product": product,
    "cve": {

    }
  }

  for cve in CVEs:
    description = enrich_cve(cve)
    dictionary["cve"][cve] = description

  final_data.append(dictionary)

final_data

[{'id': 'f6783f69-bb95-4f71-b4bf-99e2e2baa45b',
  'href': 'https://helpx.adobe.com/security/products/connect/apsb23-33.html',
  'product': 'APSB23-33\xa0: Security update available for Adobe\xa0Connect',
  'cve': {'CVE-2023-29305': "Adobe Connect versions 12.3 and earlier are affected by a reflected Cross-Site Scripting (XSS) vulnerability. If an attacker is able to convince a victim to visit a URL referencing a vulnerable page, malicious JavaScript content may be executed within the context of the victim's browser.\n\n",
   'CVE-2023-29306': "Adobe Connect versions 12.3 and earlier are affected by a reflected Cross-Site Scripting (XSS) vulnerability. If an attacker is able to convince a victim to visit a URL referencing a vulnerable page, malicious JavaScript content may be executed within the context of the victim's browser.\n\n"}},
 {'id': 'e1e55143-c254-4c9d-ba9d-99567cf72226',
  'href': 'https://helpx.adobe.com/security/products/acrobat/apsb23-34.html',
  'product': 'APSB23-34\xa0

In [4]:
write_to_json("cve.json", final_data)

In [5]:
data = read_from_json("cve.json")
data

[{'id': 'f6783f69-bb95-4f71-b4bf-99e2e2baa45b',
  'href': 'https://helpx.adobe.com/security/products/connect/apsb23-33.html',
  'product': 'APSB23-33\xa0: Security update available for Adobe\xa0Connect',
  'cve': {'CVE-2023-29305': "Adobe Connect versions 12.3 and earlier are affected by a reflected Cross-Site Scripting (XSS) vulnerability. If an attacker is able to convince a victim to visit a URL referencing a vulnerable page, malicious JavaScript content may be executed within the context of the victim's browser.\n\n",
   'CVE-2023-29306': "Adobe Connect versions 12.3 and earlier are affected by a reflected Cross-Site Scripting (XSS) vulnerability. If an attacker is able to convince a victim to visit a URL referencing a vulnerable page, malicious JavaScript content may be executed within the context of the victim's browser.\n\n"}},
 {'id': 'e1e55143-c254-4c9d-ba9d-99567cf72226',
  'href': 'https://helpx.adobe.com/security/products/acrobat/apsb23-34.html',
  'product': 'APSB23-34\xa0

In [6]:
data[2].get("cve").get('CVE-2023-25690')

'Some mod_proxy configurations on Apache HTTP Server versions 2.4.0 through 2.4.55 allow a HTTP Request Smuggling attack. Configurations are affected when mod_proxy is enabled along with some form of RewriteRule or ProxyPassMatch in which a non-specific pattern matches some portion of the user-supplied request-target (URL) data and is then re-inserted into the proxied request-target using variable substitution. For example, something like: RewriteEngine on RewriteRule "^/here/(.*)" "http://example.com:8080/elsewhere?$1"; [P] ProxyPassReverse /here/ http://example.com:8080/ Request splitting/smuggling could result in bypass of access controls in the proxy server, proxying unintended URLs to existing origin servers, and cache poisoning. Users are recommended to update to at least version 2.4.56 of Apache HTTP Server.\n\n'