In [None]:
import requests
from bs4 import BeautifulSoup
import csv
import time
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# Function to extract description and vector from NVD
def extract_description_and_vector(url, retries=3, delay=3):
    for attempt in range(retries):
        try:
            response = requests.get(url, timeout=10)
            if response.status_code == 404:
                return None, None  # Skip non-existent CVEs
            response.raise_for_status()
            soup = BeautifulSoup(response.content, "html.parser")

            # Extract description
            description_tag = soup.find("p", {"data-testid": "vuln-description"})
            description = description_tag.text.strip() if description_tag else "No description found"

            # Extract vector from the tooltipCvss3CnaMetrics class
            vector_tag = soup.find("span", {"class": "tooltipCvss3CnaMetrics"})
            vector = vector_tag.text.strip() if vector_tag else "No vector found"

            return description, vector

        except requests.RequestException as e:
            print(f"Attempt {attempt + 1}: Error fetching {url}: {e}")
            if attempt < retries - 1:
                time.sleep(delay * (attempt + 1))  # Exponential backoff
            else:
                return None, None

# Function to save descriptions and vectors to CSV in Google Drive
def save_descriptions_and_vectors_to_csv(start, end, max_entries=1000):
    file_path = '/content/drive/My Drive/cve_descriptionns_and_vectors.csv'

    with open(file_path, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['URL', 'Description', 'Vector'])  # Header

        valid_entries = 0

        for i in range(start, end + 1):
            if valid_entries >= max_entries:
                break

            cve_id = f"{i:04d}"  # Format CVE ID with leading zeros
            url = f"https://nvd.nist.gov/vuln/detail/CVE-2024-{cve_id}"
            print(f"Processing {url}...")

            description, vector = extract_description_and_vector(url)

            if description and vector:
                writer.writerow([url, description, vector])
                valid_entries += 1
                print(f"Processed {valid_entries}/{max_entries}")

            time.sleep(3)  # Slight delay to avoid rate limiting

    print(f"Completed! Saved {valid_entries} distinct CVE entries.")

# Example usage: Adjust the range (you can go higher to pull more CVEs)
save_descriptions_and_vectors_to_csv(20000, 30000, max_entries=1000)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Processing https://nvd.nist.gov/vuln/detail/CVE-2024-20000...
Processed 1/1000
Processing https://nvd.nist.gov/vuln/detail/CVE-2024-20001...
Processed 2/1000
Processing https://nvd.nist.gov/vuln/detail/CVE-2024-20002...
Processed 3/1000
Processing https://nvd.nist.gov/vuln/detail/CVE-2024-20003...
Processed 4/1000
Processing https://nvd.nist.gov/vuln/detail/CVE-2024-20004...
Processed 5/1000
Processing https://nvd.nist.gov/vuln/detail/CVE-2024-20005...
Processed 6/1000
Processing https://nvd.nist.gov/vuln/detail/CVE-2024-20006...
Processed 7/1000
Processing https://nvd.nist.gov/vuln/detail/CVE-2024-20007...
Processed 8/1000
Processing https://nvd.nist.gov/vuln/detail/CVE-2024-20008...
Processed 9/1000
Processing https://nvd.nist.gov/vuln/detail/CVE-2024-20009...
Processed 10/1000
Processing https://nvd.nist.gov/vuln/detail/CVE-2024-20010...
Processed 11/1000
