# Fetch all exisiting CVE Information (Vulnerability)

In [None]:
import requests
import json
import time
import os

# Ensure the directory exists
os.makedirs("data", exist_ok=True)

# Define files to store the last processed index and failed indexes
last_index_file = "data/last_index.txt"
failed_batches_file = "data/failed_batches.txt"

# Read the last start index if the file exists, otherwise start from 0
if os.path.exists(last_index_file):
    with open(last_index_file, "r") as f:
        start_index = int(f.read().strip())
else:
    start_index = 0

# Read previously failed batches if the file exists
failed_batches = []
if os.path.exists(failed_batches_file):
    with open(failed_batches_file, "r") as f:
        failed_batches = [int(line.strip()) for line in f.readlines()]

url = "https://services.nvd.nist.gov/rest/json/cves/2.0"
results_per_page = 1000  # Reduce batch size to avoid large response issues
total_results = 279615  # Total CVEs
all_cves = []
max_retries = 5  # Number of retries per request

# Function to save failed batches
def save_failed_batches():
    with open(failed_batches_file, "w") as f:
        for idx in failed_batches:
            f.write(f"{idx}\n")

# Try to process CVEs starting from the last index or failed batches
while start_index < total_results or failed_batches:
    # Get the next index to process (either from failed batches or continue)
    if failed_batches:
        start_index = failed_batches.pop(0)  # Start from the first failed batch
    params = {"startIndex": start_index, "resultsPerPage": results_per_page}

    for attempt in range(max_retries):  # Retry loop
        try:
            response = requests.get(url, params=params, timeout=30, stream=True)  # Streaming response
            response.raise_for_status()  # Raise error for bad status codes
            
            data = response.json()
            all_cves.extend(data.get("vulnerabilities", []))  # Extract and store CVEs
            print(f"Fetched {start_index}/{total_results} CVEs...")

            # Save the current index to resume from here in the future
            with open(last_index_file, "w") as f:
                f.write(str(start_index))

            time.sleep(1)  # Avoid hitting rate limits
            break  # Exit retry loop on success

        except requests.exceptions.RequestException as e:
            print(f"Error fetching data (attempt {attempt+1}/{max_retries}): {e}")
            if attempt == max_retries - 1:  # Max retries reached
                print(f"Failed to fetch batch starting at {start_index}. Adding to failed batches.")
                failed_batches.append(start_index)  # Add to failed batches list
                save_failed_batches()  # Save the failed batches
            time.sleep(5 * (attempt + 1))  # Exponential backoff before retrying

    # Move to the next batch if current batch is successful
    start_index += results_per_page

# Save all CVEs to a file
with open("data/all_cves.json", "w") as f:
    json.dump({"vulnerabilities": all_cves}, f, indent=4)

print(f"Saved {len(all_cves)} CVEs to data/all_cves.json")


# Tabulate the fetched Detailed 

In [None]:
import json
import pandas as pd

# Load CVE data from the JSON file
with open("all_cves.json", "r") as f:
    cve_data = json.load(f)

# Extract relevant fields
cve_list = []
for item in cve_data.get("vulnerabilities", []):
    cve_id = item["cve"]["id"]
    
    # Extract description (get English description if available)
    descriptions = item["cve"].get("descriptions", [])
    description = next((d["value"] for d in descriptions if d["lang"] == "en"), "No description available")
    
    # Extract CVSS details (try CVSS v3 first, then v2 if v3 is missing)
    metrics = item["cve"].get("metrics", {})
    
    cvss_v3 = metrics.get("cvssMetricV31", metrics.get("cvssMetricV30", []))
    if cvss_v3:
        cvss_v3 = cvss_v3[0].get("cvssData", {})
    else:
        cvss_v2 = metrics.get("cvssMetricV2", [])
        cvss_v3 = cvss_v2[0].get("cvssData", {}) if cvss_v2 else {}

    base_score = cvss_v3.get("baseScore", "N/A")
    confidentiality_impact = cvss_v3.get("confidentialityImpact", "N/A")
    integrity_impact = cvss_v3.get("integrityImpact", "N/A")
    availability_impact = cvss_v3.get("availabilityImpact", "N/A")

    # Extract CWE ID if available
    weaknesses = item["cve"].get("weaknesses", [])
    cwe_id = weaknesses[0]["description"][0]["value"] if weaknesses else "N/A"

    # Append data to list
    cve_list.append([cve_id, description, base_score, confidentiality_impact, integrity_impact, availability_impact, cwe_id])

# Create DataFrame
# Create DataFrame
columns = ["CVE ID", "Description", "CVSS Base Score", "Confidentiality Impact", "Integrity Impact", "Availability Impact", "CWE ID"]
df = pd.DataFrame(cve_list, columns=columns)

# Keep only 'CVE ID' and 'Description' columns
df = df[["CVE ID", "Description"]]

# Save to Excel with updated filename
df.to_excel("../Data/CleanedUpdates2.xlsx", index=False)

print("Excel file saved: CleanedUpdates2.xlsx")



Excel file saved: all_cves.xlsx
