# CVE-Datensatz: Abruf und CSV-Export

> Dieses Notebook ruft über das Fetcher-Skript die CVE-Daten (NVD CVE API v2.0) ab, speichert sie zeilenbasiert (JSONL) und erzeugt daraus versionsspezifische CSV-Dateien mit CVSS-Metriken (v4.0 / v3.1 / v3.0 / v2).

In [None]:
from pathlib import Path
import os
import json
import datetime as dt

REPO_DIR = Path("..").resolve()
DATA_DIR = REPO_DIR / "data"
RAW_DIR = DATA_DIR / "raw"
SCRIPTS_DIR = REPO_DIR / "scripts"

# Skripte
FETCH_SCRIPT = SCRIPTS_DIR / "nvd_cve_fetcher" / "nvd_cve_fetcher.py"
CSV_SCRIPT = SCRIPTS_DIR / "cves_json_to_csv.py"

# CSV-Export Ziel (kann manuell überschrieben werden, Standard: RAW_DIR)
CSV_OUT_DIR = os.getenv("CSV_OUT_DIR", str(RAW_DIR))

print("REPO_DIR:", REPO_DIR)
print("RAW_DIR:", RAW_DIR)
print("CSV_OUT_DIR:", CSV_OUT_DIR)
print("FETCH_SCRIPT exists:", FETCH_SCRIPT.exists())
print("CSV_SCRIPT exists:", CSV_SCRIPT.exists())

In [None]:
# Zugangsdaten laden (.env) – optional für höheres Rate-Limit
from dotenv import dotenv_values
ENV_PATH = REPO_DIR / ".env"
if ENV_PATH.exists():
    env_vals = dotenv_values(str(ENV_PATH))
    print(".env gefunden – Keys:")
    for k in ("NVD_API_KEY", "CONTACT_EMAIL"):
        val = env_vals.get(k)
        print(f"  {k}: {'gesetzt' if val else '—'}")
else:
    print(".env nicht gefunden – es wird ohne API Key gearbeitet (langsameres Rate-Limit).")

In [None]:
# Fetcher ausführen
import subprocess, sys, os

print("Starte Fetcher … dies kann je nach Netzwerk/Rate-Limit dauern.")
ret = subprocess.run([sys.executable, str(FETCH_SCRIPT)], cwd=str(REPO_DIR))
print("Fetcher Exit-Code:", ret.returncode)
assert ret.returncode in (0, 130), "Fetcher fehlgeschlagen"

In [None]:
# Vorverarbeitung / CSV-Export
import subprocess, sys

print("Starte CSV-Export …")
# Skript nutzt interne Config (INPUT/OUT_DIR). Falls anderes Verzeichnis gewünscht, Config im Skript anpassen.
ret_csv = subprocess.run([sys.executable, str(CSV_SCRIPT)], cwd=str(REPO_DIR))
print("CSV Exit-Code:", ret_csv.returncode)
assert ret_csv.returncode == 0, "CSV-Export fehlgeschlagen"

In [None]:
# Datensatz-Stichprobe anzeigen
import pandas as pd
from pathlib import Path

out_dir = Path(CSV_OUT_DIR)
for name in ["cves_v40.csv", "cves_v31.csv", "cves_v30.csv", "cves_v2.csv"]:
    p = out_dir / name
    if p.exists():
        print("Vorschau:", name)
        display(pd.read_csv(p, nrows=5))
    else:
        print("Nicht gefunden:", p)