In [1]:
import requests
from bs4 import BeautifulSoup
import time
import csv
from random import randint
from fake_useragent import UserAgent

# 设置UserAgent，避免被反爬虫机制阻止
ua = UserAgent()
headers = {
    "User-Agent": ua.random,
}

# 定义爬取函数，抓取页面的数据
def fetch_cve_data():
    url = "https://cve.mitre.org/cgi-bin/cvekey.cgi?keyword=Node.js"  # 正确的URL
    response = requests.get(url, headers=headers)
    
    if response.status_code != 200:
        print("Failed to retrieve data")
        return None
    
    # 使用BeautifulSoup解析页面内容
    soup = BeautifulSoup(response.text, 'html.parser')
    
    # 获取漏洞列表
    cve_entries = soup.find_all('tr')[1:]  # 第一个<tr>是表头，不需要抓取
    
    cve_data = []
    
    # 遍历每一行，提取漏洞名称和描述
    for entry in cve_entries:
        td_tags = entry.find_all('td')
        
        # 确保每行有足够的<td>标签
        if len(td_tags) >= 2:
            name_tag = entry.find('a', {'href': True})  # 获取CVE ID
            description_tag = td_tags[1]  # 获取漏洞描述

            if name_tag and description_tag:
                name = name_tag.text.strip()
                description = description_tag.text.strip()
                cve_data.append({"Name": name, "Description": description})
    
    return cve_data

# 定义主程序，爬取数据并保存
def main():
    all_cve_data = []

    # 获取页面的漏洞数据
    cve_data = fetch_cve_data()
    
    if cve_data:
        all_cve_data.extend(cve_data)
    
    # 存储数据到CSV文件
    with open('nodejs_cve_data.csv', 'w', newline='', encoding='utf-8') as file:
        writer = csv.DictWriter(file, fieldnames=["Name", "Description"])
        writer.writeheader()
        writer.writerows(all_cve_data)

    print(f"Data saved to 'nodejs_cve_data.csv'. Total records fetched: {len(all_cve_data)}")

if __name__ == "__main__":
    main()


Data saved to 'nodejs_cve_data.csv'. Total records fetched: 415
