In [3]:
import pandas as pd

# Replace 'output.parquet' with the path to your Parquet file
df = pd.read_parquet('output.parquet', engine='pyarrow')

# Display the first few rows of the DataFrame
df.head()

Unnamed: 0,rule_id,message,cwe,cve,artifact_location,start_line,end_line,start_column,end_column,author,language,application,sarif_file
0,CWE-79,Improper Neutralization of Input During Web Pa...,CWE-79,CVE-2006-6808,wp-admin/templates.php,109,,,,WordPress Foundation,php,cpe:2.3:a:wordpress:wordpress:2.0:-:*:*:*:*:*:*,sard_dataset/2015-03-31-wordpress-v2-0/149031-...
1,CWE-94,Improper Control of Generation of Code ('Code ...,CWE-94,CVE-2007-0233,wp-trackback.php,34,,,,WordPress Foundation,php,cpe:2.3:a:wordpress:wordpress:2.0:-:*:*:*:*:*:*,sard_dataset/2015-03-31-wordpress-v2-0/231383-...
2,CWE-211,Externally-Generated Error Message Containing ...,CWE-211,CVE-2006-3389,wp-includes/classes.php,605,,,,WordPress Foundation,php,cpe:2.3:a:wordpress:wordpress:2.0:-:*:*:*:*:*:*,sard_dataset/2015-03-31-wordpress-v2-0/149040-...
3,CWE-211,Externally-Generated Error Message Containing ...,CWE-211,CVE-2006-3389,wp-includes/classes.php,612,,,,WordPress Foundation,php,cpe:2.3:a:wordpress:wordpress:2.0:-:*:*:*:*:*:*,sard_dataset/2015-03-31-wordpress-v2-0/149040-...
4,CWE-211,Externally-Generated Error Message Containing ...,CWE-211,CVE-2006-3389,wp-includes/classes.php,639,,,,WordPress Foundation,php,cpe:2.3:a:wordpress:wordpress:2.0:-:*:*:*:*:*:*,sard_dataset/2015-03-31-wordpress-v2-0/149040-...


In [4]:
import requests
import time
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq
import logging
from typing import Dict, Any

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

API_URL = "https://services.nvd.nist.gov/rest/json/cves/2.0"

def get_nvd_data(cve_id: str) -> Dict[str, Any]:
    """Fetches CVSS 3.1 score and CVSS v4 metrics and severity from NVD API.

    Args:
        cve_id: The CVE ID to query.

    Returns:
        A dictionary with CVSS 3.1 score, CVSS v4 metrics, and CVSS v4 severity.
    """
    url = f"{API_URL}?cveId={cve_id}"
    try:
        response = requests.get(url)
        response.raise_for_status()
        data = response.json()

        # Initialize return values
        cvss_31_score = None
        cvss_v4_metrics = None
        cvss_v4_severity = None

        if "vulnerabilities" in data and len(data["vulnerabilities"]) > 0:
            vuln_data = data["vulnerabilities"][0]
            metrics = vuln_data.get("metrics", {})

            # Extract CVSS 3.1 score
            if "cvssMetricV31" in metrics:
                cvss_31_score = metrics["cvssMetricV31"][0]["cvssData"]["baseScore"]

            # Extract CVSS v4 metrics and severity
            if "cvssMetricV40" in metrics:
                cvss_v4_metrics = metrics["cvssMetricV40"][0]["cvssData"]["vectorString"]
                cvss_v4_severity = metrics["cvssMetricV40"][0]["cvssData"]["baseSeverity"]

        return {
            "cvss_31_score": cvss_31_score,
            "cvss_v4_metrics": cvss_v4_metrics,
            "cvss_v4_severity": cvss_v4_severity,
        }
    
    except requests.exceptions.RequestException as e:
        logging.error(f"Failed to fetch data for CVE {cve_id}: {e}")
        return {
            "cvss_31_score": None,
            "cvss_v4_metrics": None,
            "cvss_v4_severity": None,
        }


def enrich_parquet_with_nvd_data(parquet_file: str, output_file: str) -> None:
    """Enriches Parquet file with CVSS 3.1 score, CVSS v4 metrics, and severity from NVD.

    Args:
        parquet_file: Path to the input Parquet file.
        output_file: Path to the output Parquet file with enriched data.
    """
    # Load the existing Parquet file
    df = pd.read_parquet(parquet_file, engine='pyarrow')

    # Lists to store new columns
    cvss_31_scores = []
    cvss_v4_metrics_list = []
    cvss_v4_severities = []

    # Iterate through each row to enrich data
    for index, row in df.iterrows():
        cve_id = row.get("cve")
        if cve_id:
            logging.info(f"Processing CVE {cve_id} (index {index})")
            nvd_data = get_nvd_data(cve_id)
            cvss_31_scores.append(nvd_data["cvss_31_score"])
            cvss_v4_metrics_list.append(nvd_data["cvss_v4_metrics"])
            cvss_v4_severities.append(nvd_data["cvss_v4_severity"])

            # Sleep for 6 seconds to avoid rate limiting
            time.sleep(6)
        else:
            logging.warning(f"No CVE ID found for index {index}")
            cvss_31_scores.append(None)
            cvss_v4_metrics_list.append(None)
            cvss_v4_severities.append(None)

    # Add the new columns to the DataFrame
    df["cvss_31_score"] = cvss_31_scores
    df["cvss_v4_metrics"] = cvss_v4_metrics_list
    df["cvss_v4_severity"] = cvss_v4_severities

    # Save the enriched DataFrame back to a Parquet file
    table = pa.Table.from_pandas(df)
    pq.write_table(table, output_file)

    logging.info(f"Enriched data saved to {output_file}")

# Example usage
enrich_parquet_with_nvd_data("output.parquet", "enriched_output.parquet")


2024-08-26 23:11:51,804 - INFO - Processing CVE CVE-2006-6808 (index 0)
2024-08-26 23:11:54,254 - INFO - Processing CVE CVE-2007-0233 (index 1)
2024-08-26 23:11:56,639 - INFO - Processing CVE CVE-2006-3389 (index 2)
2024-08-26 23:11:59,020 - INFO - Processing CVE CVE-2006-3389 (index 3)
2024-08-26 23:12:01,397 - INFO - Processing CVE CVE-2006-3389 (index 4)
2024-08-26 23:12:03,774 - INFO - Processing CVE CVE-2007-0233 (index 5)
2024-08-26 23:12:04,142 - ERROR - Failed to fetch data for CVE CVE-2007-0233: 403 Client Error: Forbidden for url: https://services.nvd.nist.gov/rest/json/cves/2.0?cveId=CVE-2007-0233
2024-08-26 23:12:06,148 - INFO - Processing CVE CVE-2006-1263 (index 6)
2024-08-26 23:12:06,506 - ERROR - Failed to fetch data for CVE CVE-2006-1263: 403 Client Error: Forbidden for url: https://services.nvd.nist.gov/rest/json/cves/2.0?cveId=CVE-2006-1263
2024-08-26 23:12:08,514 - INFO - Processing CVE CVE-2006-1263 (index 7)
2024-08-26 23:12:08,872 - ERROR - Failed to fetch data f

In [6]:
import pandas as pd

# Replace 'output.parquet' with the path to your Parquet file
df = pd.read_parquet('enriched_output.parquet', engine='pyarrow')

# Display the first few rows of the DataFrame
df.head(50)

Unnamed: 0,rule_id,message,cwe,cve,artifact_location,start_line,end_line,start_column,end_column,author,language,application,sarif_file,cvss_31_score,cvss_v4_metrics,cvss_v4_severity
0,CWE-79,Improper Neutralization of Input During Web Pa...,CWE-79,CVE-2006-6808,wp-admin/templates.php,109,,,,WordPress Foundation,php,cpe:2.3:a:wordpress:wordpress:2.0:-:*:*:*:*:*:*,sard_dataset/2015-03-31-wordpress-v2-0/149031-...,,,
1,CWE-94,Improper Control of Generation of Code ('Code ...,CWE-94,CVE-2007-0233,wp-trackback.php,34,,,,WordPress Foundation,php,cpe:2.3:a:wordpress:wordpress:2.0:-:*:*:*:*:*:*,sard_dataset/2015-03-31-wordpress-v2-0/231383-...,,,
2,CWE-211,Externally-Generated Error Message Containing ...,CWE-211,CVE-2006-3389,wp-includes/classes.php,605,,,,WordPress Foundation,php,cpe:2.3:a:wordpress:wordpress:2.0:-:*:*:*:*:*:*,sard_dataset/2015-03-31-wordpress-v2-0/149040-...,,,
3,CWE-211,Externally-Generated Error Message Containing ...,CWE-211,CVE-2006-3389,wp-includes/classes.php,612,,,,WordPress Foundation,php,cpe:2.3:a:wordpress:wordpress:2.0:-:*:*:*:*:*:*,sard_dataset/2015-03-31-wordpress-v2-0/149040-...,,,
4,CWE-211,Externally-Generated Error Message Containing ...,CWE-211,CVE-2006-3389,wp-includes/classes.php,639,,,,WordPress Foundation,php,cpe:2.3:a:wordpress:wordpress:2.0:-:*:*:*:*:*:*,sard_dataset/2015-03-31-wordpress-v2-0/149040-...,,,
5,CWE-94,Improper Control of Generation of Code ('Code ...,CWE-94,CVE-2007-0233,wp-trackback.php,65,,,,WordPress Foundation,php,cpe:2.3:a:wordpress:wordpress:2.0:-:*:*:*:*:*:*,sard_dataset/2015-03-31-wordpress-v2-0/231383-...,,,
6,CWE-79,Improper Neutralization of Input During Web Pa...,CWE-79,CVE-2006-1263,wp-register.php,30,,,,WordPress Foundation,php,cpe:2.3:a:wordpress:wordpress:2.0:-:*:*:*:*:*:*,sard_dataset/2015-03-31-wordpress-v2-0/149033-...,,,
7,CWE-79,Improper Neutralization of Input During Web Pa...,CWE-79,CVE-2006-1263,wp-register.php,67,,,,WordPress Foundation,php,cpe:2.3:a:wordpress:wordpress:2.0:-:*:*:*:*:*:*,sard_dataset/2015-03-31-wordpress-v2-0/149033-...,,,
8,CWE-79,Improper Neutralization of Input During Web Pa...,CWE-79,CVE-2006-1263,wp-register.php,69,,,,WordPress Foundation,php,cpe:2.3:a:wordpress:wordpress:2.0:-:*:*:*:*:*:*,sard_dataset/2015-03-31-wordpress-v2-0/149033-...,,,
9,CWE-79,Improper Neutralization of Input During Web Pa...,CWE-79,CVE-2006-1263,wp-register.php,110,,,,WordPress Foundation,php,cpe:2.3:a:wordpress:wordpress:2.0:-:*:*:*:*:*:*,sard_dataset/2015-03-31-wordpress-v2-0/149033-...,,,


In [22]:
import pandas as pd

# Load the Parquet file into a DataFrame
parquet_file_path = "enriched_output.parquet"  # Update this with your Parquet file path
df = pd.read_parquet(parquet_file_path, engine='pyarrow')

# Display the first few rows of the DataFrame
print("First 5 rows of the DataFrame:")
display(df.head())

# Display the DataFrame's information (columns, data types, etc.)
print("DataFrame Info:")
df.info()

# Describe the numeric columns (e.g., for understanding distributions)
print("Descriptive Statistics for Numeric Columns:")
display(df.describe())

# Check for missing values in the DataFrame
print("Missing Values in Each Column:")
display(df.isnull().sum())

# Display the unique values in key columns (e.g., CVEs, CVSS scores)
print("Unique Values in CVE Column:")
display(df['cve'].unique())

print("Unique Values in CVSS 3.1 Scores:")
display(df['cvss_31_score'].unique())

print("Unique Values in CVSS v4 Severity:")
display(df['cvss_v4_severity'].unique())

First 5 rows of the DataFrame:


Unnamed: 0,rule_id,message,cwe,cve,artifact_location,start_line,end_line,start_column,end_column,author,language,application,sarif_file,cvss_version,base_score,base_severity
0,CWE-79,Improper Neutralization of Input During Web Pa...,CWE-79,CVE-2006-6808,wp-admin/templates.php,109,,,,WordPress Foundation,php,cpe:2.3:a:wordpress:wordpress:2.0:-:*:*:*:*:*:*,sard_dataset/2015-03-31-wordpress-v2-0/149031-...,2.0,6.8,
1,CWE-94,Improper Control of Generation of Code ('Code ...,CWE-94,CVE-2007-0233,wp-trackback.php,34,,,,WordPress Foundation,php,cpe:2.3:a:wordpress:wordpress:2.0:-:*:*:*:*:*:*,sard_dataset/2015-03-31-wordpress-v2-0/231383-...,2.0,7.5,
2,CWE-211,Externally-Generated Error Message Containing ...,CWE-211,CVE-2006-3389,wp-includes/classes.php,605,,,,WordPress Foundation,php,cpe:2.3:a:wordpress:wordpress:2.0:-:*:*:*:*:*:*,sard_dataset/2015-03-31-wordpress-v2-0/149040-...,2.0,5.0,
3,CWE-211,Externally-Generated Error Message Containing ...,CWE-211,CVE-2006-3389,wp-includes/classes.php,612,,,,WordPress Foundation,php,cpe:2.3:a:wordpress:wordpress:2.0:-:*:*:*:*:*:*,sard_dataset/2015-03-31-wordpress-v2-0/149040-...,2.0,5.0,
4,CWE-211,Externally-Generated Error Message Containing ...,CWE-211,CVE-2006-3389,wp-includes/classes.php,639,,,,WordPress Foundation,php,cpe:2.3:a:wordpress:wordpress:2.0:-:*:*:*:*:*:*,sard_dataset/2015-03-31-wordpress-v2-0/149040-...,2.0,5.0,


DataFrame Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 41 entries, 0 to 40
Data columns (total 16 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   rule_id            41 non-null     object 
 1   message            41 non-null     object 
 2   cwe                41 non-null     object 
 3   cve                33 non-null     object 
 4   artifact_location  41 non-null     object 
 5   start_line         41 non-null     int64  
 6   end_line           1 non-null      float64
 7   start_column       0 non-null      object 
 8   end_column         0 non-null      object 
 9   author             41 non-null     object 
 10  language           41 non-null     object 
 11  application        41 non-null     object 
 12  sarif_file         41 non-null     object 
 13  cvss_version       33 non-null     object 
 14  base_score         33 non-null     float64
 15  base_severity      0 non-null      object 
dtypes: float64(2

Unnamed: 0,start_line,end_line,base_score
count,41.0,1.0,33.0
mean,411.780488,24.0,4.933333
std,694.703084,,0.982556
min,0.0,24.0,4.3
25%,30.0,24.0,4.3
50%,110.0,24.0,4.3
75%,605.0,24.0,5.0
max,2487.0,24.0,7.5


Missing Values in Each Column:


rule_id               0
message               0
cwe                   0
cve                   8
artifact_location     0
start_line            0
end_line             40
start_column         41
end_column           41
author                0
language              0
application           0
sarif_file            0
cvss_version          8
base_score            8
base_severity        41
dtype: int64

Unique Values in CVE Column:


array(['CVE-2006-6808', 'CVE-2007-0233', 'CVE-2006-3389', 'CVE-2006-1263',
       'CVE-2007-0540', 'CVE-2007-1894', 'CVE-2007-5106', 'CVE-2007-1622',
       'CVE-2013-7233', 'CVE-2007-0541', None, 'CVE-2006-1796',
       'CVE-2007-0109', 'CVE-2007-1049', 'CVE-2007-4893', 'CVE-2007-5105',
       'CVE-2006-0985'], dtype=object)

Unique Values in CVSS 3.1 Scores:


KeyError: 'cvss_31_score'

In [8]:
import requests
import time
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq
import logging
from typing import Dict, Any

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

API_URL = "https://services.nvd.nist.gov/rest/json/cves/2.0"

def get_nvd_data(cve_id: str) -> Dict[str, Any]:
    """Fetches CVSS 3.1 score and CVSS v4 metrics and severity from NVD API.

    Args:
        cve_id: The CVE ID to query.

    Returns:
        A dictionary with CVSS 3.1 score, CVSS v4 metrics, and CVSS v4 severity.
    """
    url = f"{API_URL}?cveId={cve_id}"
    retries = 3  # Number of retries in case of rate limit errors
    backoff_time = 30  # Initial backoff time in seconds for 403 errors

    for attempt in range(retries):
        try:
            response = requests.get(url)
            response.raise_for_status()
            data = response.json()

            # Initialize return values
            cvss_31_score = None
            cvss_v4_metrics = None
            cvss_v4_severity = None

            if "vulnerabilities" in data and len(data["vulnerabilities"]) > 0:
                vuln_data = data["vulnerabilities"][0]
                metrics = vuln_data.get("metrics", {})

                # Extract CVSS 3.1 score
                if "cvssMetricV31" in metrics:
                    cvss_31_score = metrics["cvssMetricV31"][0]["cvssData"]["baseScore"]

                # Extract CVSS v4 metrics and severity
                if "cvssMetricV40" in metrics:
                    cvss_v4_metrics = metrics["cvssMetricV40"][0]["cvssData"]["vectorString"]
                    cvss_v4_severity = metrics["cvssMetricV40"][0]["cvssData"]["baseSeverity"]

            return {
                "cvss_31_score": cvss_31_score,
                "cvss_v4_metrics": cvss_v4_metrics,
                "cvss_v4_severity": cvss_v4_severity,
            }

        except requests.exceptions.HTTPError as e:
            if response.status_code == 403:
                logging.error(f"Rate limit exceeded for CVE {cve_id}. Waiting {backoff_time} seconds before retrying...")
                time.sleep(backoff_time)
                backoff_time *= 2  # Exponential backoff
            else:
                logging.error(f"Failed to fetch data for CVE {cve_id}: {e}")
                return {
                    "cvss_31_score": None,
                    "cvss_v4_metrics": None,
                    "cvss_v4_severity": None,
                }
        except requests.exceptions.RequestException as e:
            logging.error(f"Failed to fetch data for CVE {cve_id}: {e}")
            return {
                "cvss_31_score": None,
                "cvss_v4_metrics": None,
                "cvss_v4_severity": None,
            }

    logging.error(f"Exceeded maximum retries for CVE {cve_id}.")
    return {
        "cvss_31_score": None,
        "cvss_v4_metrics": None,
        "cvss_v4_severity": None,
    }


def enrich_parquet_with_nvd_data(parquet_file: str, output_file: str) -> None:
    """Enriches Parquet file with CVSS 3.1 score, CVSS v4 metrics, and severity from NVD.

    Args:
        parquet_file: Path to the input Parquet file.
        output_file: Path to the output Parquet file with enriched data.
    """
    # Load the existing Parquet file
    df = pd.read_parquet(parquet_file, engine='pyarrow')

    # Initialize lists to store new columns
    cvss_31_scores = []
    cvss_v4_metrics_list = []
    cvss_v4_severities = []

    # Iterate through each row to enrich data
    for index, row in df.iterrows():
        cve_id = row.get("cve")
        if cve_id:
            logging.info(f"Processing CVE {cve_id} (index {index})")
            nvd_data = get_nvd_data(cve_id)
            cvss_31_scores.append(nvd_data["cvss_31_score"])
            cvss_v4_metrics_list.append(nvd_data["cvss_v4_metrics"])
            cvss_v4_severities.append(nvd_data["cvss_v4_severity"])

            # Sleep for 6 seconds to avoid rate limiting
            time.sleep(6)
        else:
            logging.warning(f"No CVE ID found for index {index}")
            cvss_31_scores.append(None)
            cvss_v4_metrics_list.append(None)
            cvss_v4_severities.append(None)

    # Add the new columns to the DataFrame
    df["cvss_31_score"] = cvss_31_scores
    df["cvss_v4_metrics"] = cvss_v4_metrics_list
    df["cvss_v4_severity"] = cvss_v4_severities

    # Save the enriched DataFrame back to a Parquet file
    table = pa.Table.from_pandas(df)
    pq.write_table(table, output_file)

    logging.info(f"Enriched data saved to {output_file}")

# Example usage
enrich_parquet_with_nvd_data("output.parquet", "enriched_output.parquet")


2024-08-26 23:21:34,932 - INFO - Processing CVE CVE-2006-6808 (index 0)
2024-08-26 23:21:41,437 - INFO - Processing CVE CVE-2007-0233 (index 1)
2024-08-26 23:21:47,816 - INFO - Processing CVE CVE-2006-3389 (index 2)
2024-08-26 23:21:54,195 - INFO - Processing CVE CVE-2006-3389 (index 3)
2024-08-26 23:22:00,570 - INFO - Processing CVE CVE-2006-3389 (index 4)
2024-08-26 23:22:07,067 - INFO - Processing CVE CVE-2007-0233 (index 5)
2024-08-26 23:22:13,449 - INFO - Processing CVE CVE-2006-1263 (index 6)
2024-08-26 23:22:19,859 - INFO - Processing CVE CVE-2006-1263 (index 7)
2024-08-26 23:22:26,234 - INFO - Processing CVE CVE-2006-1263 (index 8)
2024-08-26 23:22:32,606 - INFO - Processing CVE CVE-2006-1263 (index 9)
2024-08-26 23:22:38,979 - INFO - Processing CVE CVE-2006-1263 (index 10)
2024-08-26 23:22:45,353 - INFO - Processing CVE CVE-2006-1263 (index 11)
2024-08-26 23:22:51,761 - INFO - Processing CVE CVE-2006-1263 (index 12)
2024-08-26 23:22:58,426 - INFO - Processing CVE CVE-2006-1263

In [15]:
import requests
import time
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq
import logging
from typing import Dict, Any

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

API_URL = "https://services.nvd.nist.gov/rest/json/cves/2.0"

def get_nvd_data(cve_id: str) -> Dict[str, Any]:
    """Fetches available CVSS scores and severities from NVD API.

    Args:
        cve_id: The CVE ID to query.

    Returns:
        A dictionary with CVSS version, score, and severity.
    """
    url = f"{API_URL}?cveId={cve_id}"
    retries = 3  # Number of retries in case of rate limit errors
    backoff_time = 30  # Initial backoff time in seconds for 403 errors

    for attempt in range(retries):
        try:
            response = requests.get(url)
            response.raise_for_status()
            data = response.json()

            # Log the full response for debugging
            logging.debug(f"Response for {cve_id}: {data}")

            # Initialize return values
            cvss_version = None
            cvss_score = None
            cvss_severity = None

            if "vulnerabilities" in data and len(data["vulnerabilities"]) > 0:
                vuln_data = data["vulnerabilities"][0]
                metrics = vuln_data.get("metrics", {})

                # Check for CVSS v4.0 data
                if "cvssMetricV40" in metrics:
                    cvss_version = "4.0"
                    cvss_score = metrics["cvssMetricV40"][0]["cvssData"]["baseScore"]
                    cvss_severity = metrics["cvssMetricV40"][0]["cvssData"]["baseSeverity"]

                # Check for CVSS v3.1 data
                elif "cvssMetricV31" in metrics:
                    cvss_version = "3.1"
                    cvss_score = metrics["cvssMetricV31"][0]["cvssData"]["baseScore"]
                    cvss_severity = metrics["cvssMetricV31"][0]["cvssData"]["baseSeverity"]

                # Check for CVSS v3.0 data
                elif "cvssMetricV30" in metrics:
                    cvss_version = "3.0"
                    cvss_score = metrics["cvssMetricV30"][0]["cvssData"]["baseScore"]
                    cvss_severity = metrics["cvssMetricV30"][0]["cvssData"]["baseSeverity"]

                # Check for CVSS v2.0 data
                elif "cvssMetricV2" in metrics:
                    cvss_version = "2.0"
                    cvss_score = metrics["cvssMetricV2"][0]["cvssData"]["baseScore"]
                    cvss_severity = metrics["cvssMetricV2"][0]["baseSeverity"]

            return {
                "cvss_version": cvss_version,
                "cvss_score": cvss_score,
                "cvss_severity": cvss_severity,
            }

        except requests.exceptions.HTTPError as e:
            if response.status_code == 403:
                logging.error(f"Rate limit exceeded for CVE {cve_id}. Waiting {backoff_time} seconds before retrying...")
                time.sleep(backoff_time)
                backoff_time *= 2  # Exponential backoff
            else:
                logging.error(f"Failed to fetch data for CVE {cve_id}: {e}")
                return {
                    "cvss_version": None,
                    "cvss_score": None,
                    "cvss_severity": None,
                }
        except requests.exceptions.RequestException as e:
            logging.error(f"Failed to fetch data for CVE {cve_id}: {e}")
            return {
                "cvss_version": None,
                "cvss_score": None,
                "cvss_severity": None,
            }

    logging.error(f"Exceeded maximum retries for CVE {cve_id}.")
    return {
        "cvss_version": None,
        "cvss_score": None,
        "cvss_severity": None,
    }


def enrich_parquet_with_nvd_data(parquet_file: str, output_file: str) -> None:
    """Enriches Parquet file with CVSS version, score, and severity from NVD.

    Args:
        parquet_file: Path to the input Parquet file.
        output_file: Path to the output Parquet file with enriched data.
    """
    # Load the existing Parquet file
    df = pd.read_parquet(parquet_file, engine='pyarrow')

    # Initialize lists to store new columns
    cvss_versions = []
    cvss_scores = []
    cvss_severities = []

    # Iterate through each row to enrich data
    for index, row in df.iterrows():
        cve_id = row.get("cve")
        if cve_id:
            logging.info(f"Processing CVE {cve_id} (index {index})")
            nvd_data = get_nvd_data(cve_id)
            cvss_versions.append(nvd_data["cvss_version"])
            cvss_scores.append(nvd_data["cvss_score"])
            cvss_severities.append(nvd_data["cvss_severity"])

            # Print out the results
            print(f"CVE: {cve_id}")
            print(f"CVSS Version: {nvd_data['cvss_version']}")
            print(f"CVSS Score: {nvd_data['cvss_score']}")
            print(f"CVSS Severity: {nvd_data['cvss_severity']}")
            print("-" * 40)

            # Sleep for 6 seconds to avoid rate limiting
            time.sleep(6)
        else:
            logging.warning(f"No CVE ID found for index {index}")
            cvss_versions.append(None)
            cvss_scores.append(None)
            cvss_severities.append(None)

    # Add the new columns to the DataFrame
    df["cvss_version"] = cvss_versions
    df["cvss_score"] = cvss_scores
    df["cvss_severity"] = cvss_severities

    # Save the enriched DataFrame back to a Parquet file
    table = pa.Table.from_pandas(df)
    pq.write_table(table, output_file)

    logging.info(f"Enriched data saved to {output_file}")

# Example usage
enrich_parquet_with_nvd_data("output.parquet", "enriched_output.parquet")


2024-08-26 23:46:29,488 - INFO - Processing CVE CVE-2006-6808 (index 0)


CVE: CVE-2006-6808
CVSS Version: None
CVSS Score: None
CVSS Severity: None
----------------------------------------


2024-08-26 23:46:31,882 - INFO - Processing CVE CVE-2007-0233 (index 1)


CVE: CVE-2007-0233
CVSS Version: None
CVSS Score: None
CVSS Severity: None
----------------------------------------


2024-08-26 23:46:34,255 - INFO - Processing CVE CVE-2006-3389 (index 2)


CVE: CVE-2006-3389
CVSS Version: None
CVSS Score: None
CVSS Severity: None
----------------------------------------


2024-08-26 23:46:36,631 - INFO - Processing CVE CVE-2006-3389 (index 3)


CVE: CVE-2006-3389
CVSS Version: None
CVSS Score: None
CVSS Severity: None
----------------------------------------


2024-08-26 23:46:39,011 - INFO - Processing CVE CVE-2006-3389 (index 4)


CVE: CVE-2006-3389
CVSS Version: None
CVSS Score: None
CVSS Severity: None
----------------------------------------


2024-08-26 23:46:41,402 - INFO - Processing CVE CVE-2007-0233 (index 5)
2024-08-26 23:46:41,758 - ERROR - Rate limit exceeded for CVE CVE-2007-0233. Waiting 30 seconds before retrying...


CVE: CVE-2007-0233
CVSS Version: None
CVSS Score: None
CVSS Severity: None
----------------------------------------


2024-08-26 23:47:14,144 - INFO - Processing CVE CVE-2006-1263 (index 6)


CVE: CVE-2006-1263
CVSS Version: None
CVSS Score: None
CVSS Severity: None
----------------------------------------


2024-08-26 23:47:16,528 - INFO - Processing CVE CVE-2006-1263 (index 7)


CVE: CVE-2006-1263
CVSS Version: None
CVSS Score: None
CVSS Severity: None
----------------------------------------


KeyboardInterrupt: 

In [21]:
import pandas as pd

# Replace 'output.parquet' with the path to your Parquet file
df = pd.read_parquet('enriched_output.parquet', engine='pyarrow')

# Display the first few rows of the DataFrame
df.head()

Unnamed: 0,rule_id,message,cwe,cve,artifact_location,start_line,end_line,start_column,end_column,author,language,application,sarif_file,cvss_version,base_score,base_severity
0,CWE-79,Improper Neutralization of Input During Web Pa...,CWE-79,CVE-2006-6808,wp-admin/templates.php,109,,,,WordPress Foundation,php,cpe:2.3:a:wordpress:wordpress:2.0:-:*:*:*:*:*:*,sard_dataset/2015-03-31-wordpress-v2-0/149031-...,2.0,6.8,
1,CWE-94,Improper Control of Generation of Code ('Code ...,CWE-94,CVE-2007-0233,wp-trackback.php,34,,,,WordPress Foundation,php,cpe:2.3:a:wordpress:wordpress:2.0:-:*:*:*:*:*:*,sard_dataset/2015-03-31-wordpress-v2-0/231383-...,2.0,7.5,
2,CWE-211,Externally-Generated Error Message Containing ...,CWE-211,CVE-2006-3389,wp-includes/classes.php,605,,,,WordPress Foundation,php,cpe:2.3:a:wordpress:wordpress:2.0:-:*:*:*:*:*:*,sard_dataset/2015-03-31-wordpress-v2-0/149040-...,2.0,5.0,
3,CWE-211,Externally-Generated Error Message Containing ...,CWE-211,CVE-2006-3389,wp-includes/classes.php,612,,,,WordPress Foundation,php,cpe:2.3:a:wordpress:wordpress:2.0:-:*:*:*:*:*:*,sard_dataset/2015-03-31-wordpress-v2-0/149040-...,2.0,5.0,
4,CWE-211,Externally-Generated Error Message Containing ...,CWE-211,CVE-2006-3389,wp-includes/classes.php,639,,,,WordPress Foundation,php,cpe:2.3:a:wordpress:wordpress:2.0:-:*:*:*:*:*:*,sard_dataset/2015-03-31-wordpress-v2-0/149040-...,2.0,5.0,


In [16]:
import requests
import time
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq
import logging
from typing import Dict, Any

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

API_URL = "https://services.nvd.nist.gov/rest/json/cves/2.0"

def get_nvd_data(cve_id: str) -> Dict[str, Any]:
    """Fetches available CVSS scores and severities from NVD API.

    Args:
        cve_id: The CVE ID to query.

    Returns:
        A dictionary with CVSS version, score, and severity.
    """
    url = f"{API_URL}?cveId={cve_id}"
    retries = 3  # Number of retries in case of rate limit errors
    backoff_time = 30  # Initial backoff time in seconds for 403 errors

    for attempt in range(retries):
        try:
            response = requests.get(url)
            response.raise_for_status()
            data = response.json()

            # Log the full response for debugging
            logging.debug(f"Response for {cve_id}: {data}")

            # Initialize return values
            cvss_version = None
            cvss_score = None
            cvss_severity = None

            if "vulnerabilities" in data and len(data["vulnerabilities"]) > 0:
                vuln_data = data["vulnerabilities"][0]
                metrics = vuln_data.get("metrics", {})

                # Check for CVSS v4.0 data
                if "cvssMetricV40" in metrics:
                    cvss_version = "4.0"
                    cvss_score = metrics["cvssMetricV40"][0]["cvssData"]["baseScore"]
                    cvss_severity = metrics["cvssMetricV40"][0]["cvssData"]["baseSeverity"]

                # Check for CVSS v3.1 data
                elif "cvssMetricV31" in metrics:
                    cvss_version = "3.1"
                    cvss_score = metrics["cvssMetricV31"][0]["cvssData"]["baseScore"]
                    cvss_severity = metrics["cvssMetricV31"][0]["cvssData"]["baseSeverity"]

                # Check for CVSS v3.0 data
                elif "cvssMetricV30" in metrics:
                    cvss_version = "3.0"
                    cvss_score = metrics["cvssMetricV30"][0]["cvssData"]["baseScore"]
                    cvss_severity = metrics["cvssMetricV30"][0]["cvssData"]["baseSeverity"]

                # Check for CVSS v2.0 data
                elif "cvssMetricV2" in metrics:
                    cvss_version = "2.0"
                    cvss_score = metrics["cvssMetricV2"][0]["cvssData"]["baseScore"]
                    cvss_severity = metrics["cvssMetricV2"][0]["baseSeverity"]

            return {
                "cvss_version": cvss_version,
                "cvss_score": cvss_score,
                "cvss_severity": cvss_severity,
            }

        except requests.exceptions.HTTPError as e:
            if response.status_code == 403:
                logging.error(f"Rate limit exceeded for CVE {cve_id}. Waiting {backoff_time} seconds before retrying...")
                time.sleep(backoff_time)
                backoff_time *= 2  # Exponential backoff
            else:
                logging.error(f"Failed to fetch data for CVE {cve_id}: {e}")
                return {
                    "cvss_version": None,
                    "cvss_score": None,
                    "cvss_severity": None,
                }
        except requests.exceptions.RequestException as e:
            logging.error(f"Failed to fetch data for CVE {cve_id}: {e}")
            return {
                "cvss_version": None,
                "cvss_score": None,
                "cvss_severity": None,
            }

    logging.error(f"Exceeded maximum retries for CVE {cve_id}.")
    return {
        "cvss_version": None,
        "cvss_score": None,
        "cvss_severity": None,
    }


def enrich_parquet_with_nvd_data(parquet_file: str, output_file: str) -> None:
    """Enriches Parquet file with CVSS version, score, and severity from NVD.

    Args:
        parquet_file: Path to the input Parquet file.
        output_file: Path to the output Parquet file with enriched data.
    """
    # Load the existing Parquet file
    df = pd.read_parquet(parquet_file, engine='pyarrow')

    # Initialize lists to store new columns
    cvss_versions = []
    cvss_scores = []
    cvss_severities = []

    # Iterate through each row to enrich data
    for index, row in df.iterrows():
        cve_id = row.get("cve")
        if cve_id:
            logging.info(f"Processing CVE {cve_id} (index {index})")
            nvd_data = get_nvd_data(cve_id)
            cvss_versions.append(nvd_data["cvss_version"])
            cvss_scores.append(nvd_data["cvss_score"])
            cvss_severities.append(nvd_data["cvss_severity"])

            # Print out the results
            print(f"CVE: {cve_id}")
            print(f"CVSS Version: {nvd_data['cvss_version']}")
            print(f"CVSS Score: {nvd_data['cvss_score']}")
            print(f"CVSS Severity: {nvd_data['cvss_severity']}")
            print("-" * 40)

            # Sleep for 6 seconds to avoid rate limiting
            time.sleep(6)
        else:
            logging.warning(f"No CVE ID found for index {index}")
            cvss_versions.append(None)
            cvss_scores.append(None)
            cvss_severities.append(None)

    # Add the new columns to the DataFrame
    df["cvss_version"] = cvss_versions
    df["cvss_score"] = cvss_scores
    df["cvss_severity"] = cvss_severities

    # Save the enriched DataFrame back to a Parquet file
    table = pa.Table.from_pandas(df)
    pq.write_table(table, output_file)

    logging.info(f"Enriched data saved to {output_file}")

# Example usage
enrich_parquet_with_nvd_data("output.parquet", "enriched_output.parquet")


2024-08-26 23:57:05,460 - INFO - Processing CVE CVE-2006-6808 (index 0)


CVE: CVE-2006-6808
CVSS Version: None
CVSS Score: None
CVSS Severity: None
----------------------------------------


2024-08-26 23:57:11,856 - INFO - Processing CVE CVE-2007-0233 (index 1)


CVE: CVE-2007-0233
CVSS Version: None
CVSS Score: None
CVSS Severity: None
----------------------------------------


2024-08-26 23:57:18,261 - INFO - Processing CVE CVE-2006-3389 (index 2)


CVE: CVE-2006-3389
CVSS Version: None
CVSS Score: None
CVSS Severity: None
----------------------------------------


2024-08-26 23:57:24,719 - INFO - Processing CVE CVE-2006-3389 (index 3)


CVE: CVE-2006-3389
CVSS Version: None
CVSS Score: None
CVSS Severity: None
----------------------------------------


2024-08-26 23:57:31,111 - INFO - Processing CVE CVE-2006-3389 (index 4)


CVE: CVE-2006-3389
CVSS Version: None
CVSS Score: None
CVSS Severity: None
----------------------------------------


2024-08-26 23:57:37,507 - INFO - Processing CVE CVE-2007-0233 (index 5)


CVE: CVE-2007-0233
CVSS Version: None
CVSS Score: None
CVSS Severity: None
----------------------------------------


2024-08-26 23:57:43,895 - INFO - Processing CVE CVE-2006-1263 (index 6)


CVE: CVE-2006-1263
CVSS Version: None
CVSS Score: None
CVSS Severity: None
----------------------------------------


2024-08-26 23:57:50,274 - INFO - Processing CVE CVE-2006-1263 (index 7)


CVE: CVE-2006-1263
CVSS Version: None
CVSS Score: None
CVSS Severity: None
----------------------------------------


2024-08-26 23:57:56,647 - INFO - Processing CVE CVE-2006-1263 (index 8)


CVE: CVE-2006-1263
CVSS Version: None
CVSS Score: None
CVSS Severity: None
----------------------------------------


2024-08-26 23:58:03,031 - INFO - Processing CVE CVE-2006-1263 (index 9)


CVE: CVE-2006-1263
CVSS Version: None
CVSS Score: None
CVSS Severity: None
----------------------------------------


2024-08-26 23:58:09,410 - INFO - Processing CVE CVE-2006-1263 (index 10)


CVE: CVE-2006-1263
CVSS Version: None
CVSS Score: None
CVSS Severity: None
----------------------------------------


2024-08-26 23:58:15,783 - INFO - Processing CVE CVE-2006-1263 (index 11)


CVE: CVE-2006-1263
CVSS Version: None
CVSS Score: None
CVSS Severity: None
----------------------------------------


2024-08-26 23:58:22,157 - INFO - Processing CVE CVE-2006-1263 (index 12)


CVE: CVE-2006-1263
CVSS Version: None
CVSS Score: None
CVSS Severity: None
----------------------------------------


2024-08-26 23:58:28,536 - INFO - Processing CVE CVE-2006-1263 (index 13)


CVE: CVE-2006-1263
CVSS Version: None
CVSS Score: None
CVSS Severity: None
----------------------------------------


2024-08-26 23:58:34,912 - INFO - Processing CVE CVE-2007-0540 (index 14)


CVE: CVE-2007-0540
CVSS Version: None
CVSS Score: None
CVSS Severity: None
----------------------------------------


2024-08-26 23:58:41,281 - INFO - Processing CVE CVE-2007-1894 (index 15)


CVE: CVE-2007-1894
CVSS Version: None
CVSS Score: None
CVSS Severity: None
----------------------------------------


2024-08-26 23:58:47,669 - INFO - Processing CVE CVE-2007-0540 (index 16)


CVE: CVE-2007-0540
CVSS Version: None
CVSS Score: None
CVSS Severity: None
----------------------------------------


2024-08-26 23:58:54,065 - INFO - Processing CVE CVE-2007-5106 (index 17)


CVE: CVE-2007-5106
CVSS Version: None
CVSS Score: None
CVSS Severity: None
----------------------------------------


2024-08-26 23:59:00,453 - INFO - Processing CVE CVE-2007-5106 (index 18)


CVE: CVE-2007-5106
CVSS Version: None
CVSS Score: None
CVSS Severity: None
----------------------------------------


2024-08-26 23:59:06,833 - INFO - Processing CVE CVE-2007-1622 (index 19)


CVE: CVE-2007-1622
CVSS Version: None
CVSS Score: None
CVSS Severity: None
----------------------------------------


2024-08-26 23:59:13,228 - INFO - Processing CVE CVE-2013-7233 (index 20)


CVE: CVE-2013-7233
CVSS Version: None
CVSS Score: None
CVSS Severity: None
----------------------------------------


2024-08-26 23:59:19,649 - INFO - Processing CVE CVE-2007-0541 (index 21)


CVE: CVE-2007-0541
CVSS Version: None
CVSS Score: None
CVSS Severity: None
----------------------------------------


2024-08-26 23:59:26,058 - INFO - Processing CVE CVE-2006-1796 (index 29)


CVE: CVE-2006-1796
CVSS Version: None
CVSS Score: None
CVSS Severity: None
----------------------------------------


2024-08-26 23:59:32,485 - INFO - Processing CVE CVE-2007-0541 (index 30)


CVE: CVE-2007-0541
CVSS Version: None
CVSS Score: None
CVSS Severity: None
----------------------------------------


2024-08-26 23:59:38,857 - INFO - Processing CVE CVE-2007-0109 (index 31)


CVE: CVE-2007-0109
CVSS Version: None
CVSS Score: None
CVSS Severity: None
----------------------------------------


2024-08-26 23:59:45,239 - INFO - Processing CVE CVE-2007-1049 (index 32)


CVE: CVE-2007-1049
CVSS Version: None
CVSS Score: None
CVSS Severity: None
----------------------------------------


2024-08-26 23:59:51,619 - INFO - Processing CVE CVE-2007-0109 (index 34)


CVE: CVE-2007-0109
CVSS Version: None
CVSS Score: None
CVSS Severity: None
----------------------------------------


2024-08-26 23:59:58,007 - INFO - Processing CVE CVE-2007-0109 (index 35)


CVE: CVE-2007-0109
CVSS Version: None
CVSS Score: None
CVSS Severity: None
----------------------------------------


2024-08-27 00:00:04,380 - INFO - Processing CVE CVE-2007-4893 (index 36)


CVE: CVE-2007-4893
CVSS Version: None
CVSS Score: None
CVSS Severity: None
----------------------------------------


2024-08-27 00:00:10,778 - INFO - Processing CVE CVE-2007-4893 (index 37)


CVE: CVE-2007-4893
CVSS Version: None
CVSS Score: None
CVSS Severity: None
----------------------------------------


2024-08-27 00:00:17,163 - INFO - Processing CVE CVE-2007-5105 (index 38)


CVE: CVE-2007-5105
CVSS Version: None
CVSS Score: None
CVSS Severity: None
----------------------------------------


2024-08-27 00:00:23,628 - INFO - Processing CVE CVE-2007-5105 (index 39)


CVE: CVE-2007-5105
CVSS Version: None
CVSS Score: None
CVSS Severity: None
----------------------------------------


2024-08-27 00:00:30,022 - INFO - Processing CVE CVE-2006-0985 (index 40)


CVE: CVE-2006-0985
CVSS Version: None
CVSS Score: None
CVSS Severity: None
----------------------------------------


2024-08-27 00:00:36,406 - INFO - Enriched data saved to enriched_output.parquet


In [None]:
import requests
import time
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq
import logging
from typing import Dict, Any

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

API_URL = "https://services.nvd.nist.gov/rest/json/cves/2.0"

def get_nvd_data(cve_id: str) -> Dict[str, Any]:
    """Fetches available CVSS scores and severities from NVD API.

    Args:
        cve_id: The CVE ID to query.

    Returns:
        A dictionary with CVSS version, score, and severity.
    """
    url = f"{API_URL}?cveId={cve_id}"
    retries = 3  # Number of retries in case of rate limit errors
    backoff_time = 30  # Initial backoff time in seconds for 403 errors

    for attempt in range(retries):
        try:
            response = requests.get(url)
            response.raise_for_status()
            data = response.json()

            # Initialize return values
            cvss_version = None
            cvss_score = None
            cvss_severity = None

            if "vulnerabilities" in data and len(data["vulnerabilities"]) > 0:
                vuln_data = data["vulnerabilities"][0]
                metrics = vuln_data.get("metrics", {})

                # Check for CVSS v4.0 data
                if "cvssMetricV40" in metrics:
                    cvss_version = "4.0"
                    cvss_score = metrics["cvssMetricV40"][0]["cvssData"]["baseScore"]
                    cvss_severity = metrics["cvssMetricV40"][0]["cvssData"]["baseSeverity"]

                # Check for CVSS v3.1 data
                elif "cvssMetricV31" in metrics:
                    cvss_version = "3.1"
                    cvss_score = metrics["cvssMetricV31"][0]["cvssData"]["baseScore"]
                    cvss_severity = metrics["cvssMetricV31"][0]["cvssData"]["baseSeverity"]

                # Check for CVSS v3.0 data
                elif "cvssMetricV30" in metrics:
                    cvss_version = "3.0"
                    cvss_score = metrics["cvssMetricV30"][0]["cvssData"]["baseScore"]
                    cvss_severity = metrics["cvssMetricV30"][0]["cvssData"]["baseSeverity"]

                # Check for CVSS v2.0 data
                elif "cvssMetricV2" in metrics:
                    cvss_version = "2.0"
                    cvss_score = metrics["cvssMetricV2"][0]["cvssData"]["baseScore"]
                    cvss_severity = metrics["cvssMetricV2"][0]["baseSeverity"]

            return {
                "cvss_version": cvss_version,
                "cvss_score": cvss_score,
                "cvss_severity": cvss_severity,
            }

        except requests.exceptions.HTTPError as e:
            if response.status_code == 403:
                logging.error(f"Rate limit exceeded for CVE {cve_id}. Waiting {backoff_time} seconds before retrying...")
                time.sleep(backoff_time)
                backoff_time *= 2  # Exponential backoff
            else:
                logging.error(f"Failed to fetch data for CVE {cve_id}: {e}")
                return {
                    "cvss_version": None,
                    "cvss_score": None,
                    "cvss_severity": None,
                }
        except requests.exceptions.RequestException as e:
            logging.error(f"Failed to fetch data for CVE {cve_id}: {e}")
            return {
                "cvss_version": None,
                "cvss_score": None,
                "cvss_severity": None,
            }

    logging.error(f"Exceeded maximum retries for CVE {cve_id}.")
    return {
        "cvss_version": None,
        "cvss_score": None,
        "cvss_severity": None,
    }


def enrich_parquet_with_nvd_data(parquet_file: str, output_file: str) -> None:
    """Enriches Parquet file with CVSS version, score, and severity from NVD.

    Args:
        parquet_file: Path to the input Parquet file.
        output_file: Path to the output Parquet file with enriched data.
    """
    # Load the existing Parquet file
    df = pd.read_parquet(parquet_file, engine='pyarrow')

    # Initialize lists to store new columns
    cvss_versions = []
    cvss_scores = []
    cvss_severities = []

    # Iterate through each row to enrich data
    for index, row in df.iterrows():
        cve_id = row.get("cve")
        if cve_id:
            logging.info(f"Processing CVE {cve_id} (index {index})")
            nvd_data = get_nvd_data(cve_id)
            cvss_versions.append(nvd_data["cvss_version"])
            cvss_scores.append(nvd_data["cvss_score"])
            cvss_severities.append(nvd_data["cvss_severity"])

            # Print out the results
            print(f"CVE: {cve_id}")
            print(f"CVSS Version: {nvd_data['cvss_version']}")
            print(f"CVSS Score: {nvd_data['cvss_score']}")
            print(f"CVSS Severity: {nvd_data['cvss_severity']}")
            print("-" * 40)

            # Sleep for 2 seconds to avoid rate limiting
            time.sleep(2)
        else:
            logging.warning(f"No CVE ID found for index {index}")
            cvss_versions.append(None)
            cvss_scores.append(None)
            cvss_severities.append(None)

    # Add the new columns to the DataFrame
    df["cvss_version"] = cvss_versions
    df["cvss_score"] = cvss_scores
    df["cvss_severity"] = cvss_severities

    # Save the enriched DataFrame back to a Parquet file
    table = pa.Table.from_pandas(df)
    pq.write_table(table, output_file)

    logging.info(f"Enriched data saved to {output_file}")

# Example usage
enrich_parquet_with_nvd_data("output.parquet", "enriched_output.parquet")

2024-08-27 00:05:19,883 - INFO - Processing CVE CVE-2006-6808 (index 0)


CVE: CVE-2006-6808
CVSS Version: None
CVSS Score: None
CVSS Severity: None
----------------------------------------


2024-08-27 00:05:22,291 - INFO - Processing CVE CVE-2007-0233 (index 1)


CVE: CVE-2007-0233
CVSS Version: None
CVSS Score: None
CVSS Severity: None
----------------------------------------


In [18]:
import requests

def get_latest_cvss_score(cve_id):
    # Define the NVD API URL
    nvd_api_url = f"https://services.nvd.nist.gov/rest/json/cves/2.0?cveId={cve_id}"
    
    # Make the request to the NVD API
    response = requests.get(nvd_api_url)
    
    if response.status_code != 200:
        raise Exception(f"Failed to fetch data for {cve_id}. HTTP Status Code: {response.status_code}")
    
    # Parse the JSON response
    data = response.json()
    
    # Check if vulnerabilities data is present
    if not data.get("vulnerabilities"):
        raise Exception(f"No vulnerabilities data found for {cve_id}")
    
    vulnerability_data = data["vulnerabilities"][0]["cve"]["metrics"]
    
    # Check for available CVSS metrics and choose the highest version available
    if "cvssMetricV31" in vulnerability_data:
        latest_cvss = vulnerability_data["cvssMetricV31"][0]["cvssData"]
    elif "cvssMetricV30" in vulnerability_data:
        latest_cvss = vulnerability_data["cvssMetricV30"][0]["cvssData"]
    elif "cvssMetricV2" in vulnerability_data:
        latest_cvss = vulnerability_data["cvssMetricV2"][0]["cvssData"]
    else:
        raise Exception(f"No CVSS data available for {cve_id}")
    
    # Extract the base score and severity
    base_score = latest_cvss["baseScore"]
    base_severity = latest_cvss["baseSeverity"]

    return {
        "cve_id": cve_id,
        "cvss_version": latest_cvss["version"],
        "base_score": base_score,
        "base_severity": base_severity
    }

# Example usage
if __name__ == "__main__":
    cve_id = "CVE-2019-1010218"
    result = get_latest_cvss_score(cve_id)
    print(result)


{'cve_id': 'CVE-2019-1010218', 'cvss_version': '3.1', 'base_score': 7.5, 'base_severity': 'HIGH'}


In [None]:
import requests
import time
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq
import logging
from typing import Dict, Any

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

API_URL = "https://services.nvd.nist.gov/rest/json/cves/2.0"

def get_nvd_data(cve_id: str) -> Dict[str, Any]:
    """Fetches available CVSS scores and severities from NVD API.

    Args:
        cve_id: The CVE ID to query.

    Returns:
        A dictionary with CVSS version, score, and severity.
    """
    url = f"{API_URL}?cveId={cve_id}"
    retries = 3  # Number of retries in case of rate limit errors
    backoff_time = 30  # Initial backoff time in seconds for 403 errors

    for attempt in range(retries):
        try:
            response = requests.get(url)
            response.raise_for_status()
            data = response.json()

            if "vulnerabilities" not in data or not data["vulnerabilities"]:
                raise Exception(f"No vulnerabilities data found for {cve_id}")

            vulnerability_data = data["vulnerabilities"][0]["cve"]["metrics"]

            # Check for available CVSS metrics and choose the highest version available
            if "cvssMetricV31" in vulnerability_data:
                latest_cvss = vulnerability_data["cvssMetricV31"][0]["cvssData"]
            elif "cvssMetricV30" in vulnerability_data:
                latest_cvss = vulnerability_data["cvssMetricV30"][0]["cvssData"]
            elif "cvssMetricV2" in vulnerability_data:
                latest_cvss = vulnerability_data["cvssMetricV2"][0]["cvssData"]
            else:
                raise Exception(f"No CVSS data available for {cve_id}")

            # Extract the base score and severity
            base_score = latest_cvss["baseScore"]
            base_severity = latest_cvss["baseSeverity"]

            return {
                "cvss_version": latest_cvss["version"],
                "base_score": base_score,
                "base_severity": base_severity
            }

        except requests.exceptions.HTTPError as e:
            if response.status_code == 403:
                logging.error(f"Rate limit exceeded for CVE {cve_id}. Waiting {backoff_time} seconds before retrying...")
                time.sleep(backoff_time)
                backoff_time *= 2  # Exponential backoff
            else:
                logging.error(f"Failed to fetch data for CVE {cve_id}: {e}")
                return {
                    "cvss_version": None,
                    "base_score": None,
                    "base_severity": None
                }
        except requests.exceptions.RequestException as e:
            logging.error(f"Failed to fetch data for CVE {cve_id}: {e}")
            return {
                "cvss_version": None,
                "base_score": None,
                "base_severity": None
            }
        except Exception as e:
            logging.error(f"Failed to process data for CVE {cve_id}: {e}")
            return {
                "cvss_version": None,
                "base_score": None,
                "base_severity": None
            }

    logging.error(f"Exceeded maximum retries for CVE {cve_id}.")
    return {
        "cvss_version": None,
        "base_score": None,
        "base_severity": None
    }

def enrich_parquet_with_nvd_data(parquet_file: str, output_file: str) -> None:
    """Enriches Parquet file with CVSS version, score, and severity from NVD.

    Args:
        parquet_file: Path to the input Parquet file.
        output_file: Path to the output Parquet file with enriched data.
    """
    # Load the existing Parquet file
    df = pd.read_parquet(parquet_file, engine='pyarrow')

    # Initialize lists to store new columns
    cvss_versions = []
    base_scores = []
    base_severities = []

    # Iterate through each row to enrich data
    for index, row in df.iterrows():
        cve_id = row.get("cve")
        if cve_id:
            logging.info(f"Processing CVE {cve_id} (index {index})")
            nvd_data = get_nvd_data(cve_id)
            cvss_versions.append(nvd_data["cvss_version"])
            base_scores.append(nvd_data["base_score"])
            base_severities.append(nvd_data["base_severity"])

            # Print out the results
            print(f"CVE: {cve_id}")
            print(f"CVSS Version: {nvd_data['cvss_version']}")
            print(f"CVSS Score: {nvd_data['base_score']}")
            print(f"CVSS Severity: {nvd_data['base_severity']}")
            print("-" * 40)

            # Sleep for 6 seconds to avoid rate limiting
            time.sleep(6)
        else:
            logging.warning(f"No CVE ID found for index {index}")
            cvss_versions.append(None)
            base_scores.append(None)
            base_severities.append(None)

    # Add the new columns to the DataFrame
    df["cvss_version"] = cvss_versions
    df["base_score"] = base_scores
    df["base_severity"] = base_severities

    # Save the enriched DataFrame back to a Parquet file
    table = pa.Table.from_pandas(df)
    pq.write_table(table, output_file)

    logging.info(f"Enriched data saved to {output_file}")

# Example usage
enrich_parquet_with_nvd_data("output.parquet", "enriched_output.parquet")


2024-08-27 00:13:43,344 - INFO - Processing CVE CVE-2006-6808 (index 0)
2024-08-27 00:13:43,723 - ERROR - Failed to process data for CVE CVE-2006-6808: 'baseSeverity'


CVE: CVE-2006-6808
CVSS Version: None
CVSS Score: None
CVSS Severity: None
----------------------------------------


2024-08-27 00:13:45,727 - INFO - Processing CVE CVE-2007-0233 (index 1)
2024-08-27 00:13:46,099 - ERROR - Failed to process data for CVE CVE-2007-0233: 'baseSeverity'


CVE: CVE-2007-0233
CVSS Version: None
CVSS Score: None
CVSS Severity: None
----------------------------------------


2024-08-27 00:13:48,108 - INFO - Processing CVE CVE-2006-3389 (index 2)
2024-08-27 00:13:48,488 - ERROR - Failed to process data for CVE CVE-2006-3389: 'baseSeverity'


CVE: CVE-2006-3389
CVSS Version: None
CVSS Score: None
CVSS Severity: None
----------------------------------------


2024-08-27 00:13:50,497 - INFO - Processing CVE CVE-2006-3389 (index 3)
2024-08-27 00:13:50,874 - ERROR - Failed to process data for CVE CVE-2006-3389: 'baseSeverity'


CVE: CVE-2006-3389
CVSS Version: None
CVSS Score: None
CVSS Severity: None
----------------------------------------


2024-08-27 00:13:52,883 - INFO - Processing CVE CVE-2006-3389 (index 4)
2024-08-27 00:13:53,252 - ERROR - Failed to process data for CVE CVE-2006-3389: 'baseSeverity'


CVE: CVE-2006-3389
CVSS Version: None
CVSS Score: None
CVSS Severity: None
----------------------------------------


2024-08-27 00:13:55,258 - INFO - Processing CVE CVE-2007-0233 (index 5)
2024-08-27 00:13:55,628 - ERROR - Failed to process data for CVE CVE-2007-0233: 'baseSeverity'


CVE: CVE-2007-0233
CVSS Version: None
CVSS Score: None
CVSS Severity: None
----------------------------------------


2024-08-27 00:13:57,636 - INFO - Processing CVE CVE-2006-1263 (index 6)
2024-08-27 00:13:58,028 - ERROR - Failed to process data for CVE CVE-2006-1263: 'baseSeverity'


CVE: CVE-2006-1263
CVSS Version: None
CVSS Score: None
CVSS Severity: None
----------------------------------------


2024-08-27 00:14:00,036 - INFO - Processing CVE CVE-2006-1263 (index 7)
2024-08-27 00:14:00,418 - ERROR - Failed to process data for CVE CVE-2006-1263: 'baseSeverity'


CVE: CVE-2006-1263
CVSS Version: None
CVSS Score: None
CVSS Severity: None
----------------------------------------


2024-08-27 00:14:02,426 - INFO - Processing CVE CVE-2006-1263 (index 8)
2024-08-27 00:14:02,792 - ERROR - Rate limit exceeded for CVE CVE-2006-1263. Waiting 30 seconds before retrying...
2024-08-27 00:14:33,169 - ERROR - Failed to process data for CVE CVE-2006-1263: 'baseSeverity'


CVE: CVE-2006-1263
CVSS Version: None
CVSS Score: None
CVSS Severity: None
----------------------------------------


2024-08-27 00:14:35,177 - INFO - Processing CVE CVE-2006-1263 (index 9)
2024-08-27 00:14:35,570 - ERROR - Failed to process data for CVE CVE-2006-1263: 'baseSeverity'


CVE: CVE-2006-1263
CVSS Version: None
CVSS Score: None
CVSS Severity: None
----------------------------------------


2024-08-27 00:14:37,576 - INFO - Processing CVE CVE-2006-1263 (index 10)
2024-08-27 00:14:37,945 - ERROR - Failed to process data for CVE CVE-2006-1263: 'baseSeverity'


CVE: CVE-2006-1263
CVSS Version: None
CVSS Score: None
CVSS Severity: None
----------------------------------------


2024-08-27 00:14:39,951 - INFO - Processing CVE CVE-2006-1263 (index 11)
2024-08-27 00:14:40,313 - ERROR - Rate limit exceeded for CVE CVE-2006-1263. Waiting 30 seconds before retrying...
2024-08-27 00:15:10,700 - ERROR - Failed to process data for CVE CVE-2006-1263: 'baseSeverity'


CVE: CVE-2006-1263
CVSS Version: None
CVSS Score: None
CVSS Severity: None
----------------------------------------


2024-08-27 00:15:12,706 - INFO - Processing CVE CVE-2006-1263 (index 12)
2024-08-27 00:15:13,083 - ERROR - Failed to process data for CVE CVE-2006-1263: 'baseSeverity'


CVE: CVE-2006-1263
CVSS Version: None
CVSS Score: None
CVSS Severity: None
----------------------------------------


2024-08-27 00:15:15,091 - INFO - Processing CVE CVE-2006-1263 (index 13)
2024-08-27 00:15:15,464 - ERROR - Failed to process data for CVE CVE-2006-1263: 'baseSeverity'


CVE: CVE-2006-1263
CVSS Version: None
CVSS Score: None
CVSS Severity: None
----------------------------------------


2024-08-27 00:15:17,471 - INFO - Processing CVE CVE-2007-0540 (index 14)
2024-08-27 00:15:17,887 - ERROR - Failed to process data for CVE CVE-2007-0540: 'baseSeverity'


CVE: CVE-2007-0540
CVSS Version: None
CVSS Score: None
CVSS Severity: None
----------------------------------------


2024-08-27 00:15:19,891 - INFO - Processing CVE CVE-2007-1894 (index 15)
2024-08-27 00:15:20,289 - ERROR - Failed to process data for CVE CVE-2007-1894: 'baseSeverity'


CVE: CVE-2007-1894
CVSS Version: None
CVSS Score: None
CVSS Severity: None
----------------------------------------


2024-08-27 00:15:22,295 - INFO - Processing CVE CVE-2007-0540 (index 16)
2024-08-27 00:15:22,656 - ERROR - Failed to process data for CVE CVE-2007-0540: 'baseSeverity'


CVE: CVE-2007-0540
CVSS Version: None
CVSS Score: None
CVSS Severity: None
----------------------------------------


2024-08-27 00:15:24,666 - INFO - Processing CVE CVE-2007-5106 (index 17)
2024-08-27 00:15:25,029 - ERROR - Rate limit exceeded for CVE CVE-2007-5106. Waiting 30 seconds before retrying...


In [25]:
import pandas as pd

# Set Pandas options to display all columns and set display width
pd.set_option('display.max_columns', None)  # Show all columns
pd.set_option('display.width', 1000)        # Set display width to a large number

# Load the Parquet file into a DataFrame
parquet_file_path = "enriched_output.parquet"  # Update this with your Parquet file path
df = pd.read_parquet(parquet_file_path, engine='pyarrow')

# Display the first few rows of the DataFrame
print("First 5 rows of the DataFrame:")
display(df.head())

# Display the DataFrame's information (columns, data types, etc.)
print("DataFrame Info:")
df.info()

# Describe the numeric columns (e.g., for understanding distributions)
print("Descriptive Statistics for Numeric Columns:")
display(df.describe())

# Check for missing values in the DataFrame
print("Missing Values in Each Column:")
display(df.isnull().sum())

# Display the unique values in key columns (e.g., CVEs, CVSS scores)
print("Unique Values in CVE Column:")
display(df['cve'].unique())

print("Unique Values in CVSS 3.1 Scores:")
display(df['cvss_31_score'].unique())

print("Unique Values in CVSS v4 Severity:")
display(df['cvss_v4_severity'].unique())


First 5 rows of the DataFrame:


Unnamed: 0,rule_id,message,cwe,cve,artifact_location,start_line,end_line,start_column,end_column,author,language,application,sarif_file,cvss_version,base_score,base_severity
0,CWE-79,Improper Neutralization of Input During Web Pa...,CWE-79,CVE-2006-6808,wp-admin/templates.php,109,,,,WordPress Foundation,php,cpe:2.3:a:wordpress:wordpress:2.0:-:*:*:*:*:*:*,sard_dataset/2015-03-31-wordpress-v2-0/149031-...,2.0,6.8,MEDIUM
1,CWE-94,Improper Control of Generation of Code ('Code ...,CWE-94,CVE-2007-0233,wp-trackback.php,34,,,,WordPress Foundation,php,cpe:2.3:a:wordpress:wordpress:2.0:-:*:*:*:*:*:*,sard_dataset/2015-03-31-wordpress-v2-0/231383-...,2.0,7.5,HIGH
2,CWE-211,Externally-Generated Error Message Containing ...,CWE-211,CVE-2006-3389,wp-includes/classes.php,605,,,,WordPress Foundation,php,cpe:2.3:a:wordpress:wordpress:2.0:-:*:*:*:*:*:*,sard_dataset/2015-03-31-wordpress-v2-0/149040-...,2.0,5.0,MEDIUM
3,CWE-211,Externally-Generated Error Message Containing ...,CWE-211,CVE-2006-3389,wp-includes/classes.php,612,,,,WordPress Foundation,php,cpe:2.3:a:wordpress:wordpress:2.0:-:*:*:*:*:*:*,sard_dataset/2015-03-31-wordpress-v2-0/149040-...,2.0,5.0,MEDIUM
4,CWE-211,Externally-Generated Error Message Containing ...,CWE-211,CVE-2006-3389,wp-includes/classes.php,639,,,,WordPress Foundation,php,cpe:2.3:a:wordpress:wordpress:2.0:-:*:*:*:*:*:*,sard_dataset/2015-03-31-wordpress-v2-0/149040-...,2.0,5.0,MEDIUM


DataFrame Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 41 entries, 0 to 40
Data columns (total 16 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   rule_id            41 non-null     object 
 1   message            41 non-null     object 
 2   cwe                41 non-null     object 
 3   cve                33 non-null     object 
 4   artifact_location  41 non-null     object 
 5   start_line         41 non-null     int64  
 6   end_line           1 non-null      float64
 7   start_column       0 non-null      object 
 8   end_column         0 non-null      object 
 9   author             41 non-null     object 
 10  language           41 non-null     object 
 11  application        41 non-null     object 
 12  sarif_file         41 non-null     object 
 13  cvss_version       33 non-null     object 
 14  base_score         33 non-null     float64
 15  base_severity      33 non-null     object 
dtypes: float64(2

Unnamed: 0,start_line,end_line,base_score
count,41.0,1.0,33.0
mean,411.780488,24.0,4.933333
std,694.703084,,0.982556
min,0.0,24.0,4.3
25%,30.0,24.0,4.3
50%,110.0,24.0,4.3
75%,605.0,24.0,5.0
max,2487.0,24.0,7.5


Missing Values in Each Column:


rule_id               0
message               0
cwe                   0
cve                   8
artifact_location     0
start_line            0
end_line             40
start_column         41
end_column           41
author                0
language              0
application           0
sarif_file            0
cvss_version          8
base_score            8
base_severity         8
dtype: int64

Unique Values in CVE Column:


array(['CVE-2006-6808', 'CVE-2007-0233', 'CVE-2006-3389', 'CVE-2006-1263',
       'CVE-2007-0540', 'CVE-2007-1894', 'CVE-2007-5106', 'CVE-2007-1622',
       'CVE-2013-7233', 'CVE-2007-0541', None, 'CVE-2006-1796',
       'CVE-2007-0109', 'CVE-2007-1049', 'CVE-2007-4893', 'CVE-2007-5105',
       'CVE-2006-0985'], dtype=object)

Unique Values in CVSS 3.1 Scores:


KeyError: 'cvss_31_score'

In [24]:
import requests
import time
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq
import logging
from typing import Dict, Any

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

API_URL = "https://services.nvd.nist.gov/rest/json/cves/2.0"

def get_nvd_data(cve_id: str) -> Dict[str, Any]:
    """Fetches available CVSS scores and severities from NVD API.

    Args:
        cve_id: The CVE ID to query.

    Returns:
        A dictionary with CVSS version, score, and severity.
    """
    url = f"{API_URL}?cveId={cve_id}"
    retries = 3  # Number of retries in case of rate limit errors
    backoff_time = 30  # Initial backoff time in seconds for 403 errors

    for attempt in range(retries):
        try:
            response = requests.get(url)
            response.raise_for_status()
            data = response.json()

            if "vulnerabilities" not in data or not data["vulnerabilities"]:
                raise Exception(f"No vulnerabilities data found for {cve_id}")

            vulnerability_data = data["vulnerabilities"][0]["cve"]["metrics"]

            # Initialize return values
            latest_cvss = None
            base_severity = None

            # Check for available CVSS metrics and choose the highest version available
            if "cvssMetricV31" in vulnerability_data:
                latest_cvss = vulnerability_data["cvssMetricV31"][0]["cvssData"]
                base_severity = vulnerability_data["cvssMetricV31"][0]["cvssData"].get("baseSeverity")
            elif "cvssMetricV30" in vulnerability_data:
                latest_cvss = vulnerability_data["cvssMetricV30"][0]["cvssData"]
                base_severity = vulnerability_data["cvssMetricV30"][0]["cvssData"].get("baseSeverity")
            elif "cvssMetricV2" in vulnerability_data:
                latest_cvss = vulnerability_data["cvssMetricV2"][0]["cvssData"]
                base_severity = vulnerability_data["cvssMetricV2"][0].get("baseSeverity")
            else:
                raise Exception(f"No CVSS data available for {cve_id}")

            # Extract the base score
            base_score = latest_cvss["baseScore"]

            return {
                "cvss_version": latest_cvss["version"],
                "base_score": base_score,
                "base_severity": base_severity
            }

        except requests.exceptions.HTTPError as e:
            if response.status_code == 403:
                logging.error(f"Rate limit exceeded for CVE {cve_id}. Waiting {backoff_time} seconds before retrying...")
                time.sleep(backoff_time)
                backoff_time *= 2  # Exponential backoff
            else:
                logging.error(f"Failed to fetch data for CVE {cve_id}: {e}")
                return {
                    "cvss_version": None,
                    "base_score": None,
                    "base_severity": None
                }
        except requests.exceptions.RequestException as e:
            logging.error(f"Failed to fetch data for CVE {cve_id}: {e}")
            return {
                "cvss_version": None,
                "base_score": None,
                "base_severity": None
            }
        except Exception as e:
            logging.error(f"Failed to process data for CVE {cve_id}: {e}")
            return {
                "cvss_version": None,
                "base_score": None,
                "base_severity": None
            }

    logging.error(f"Exceeded maximum retries for CVE {cve_id}.")
    return {
        "cvss_version": None,
        "base_score": None,
        "base_severity": None
    }

def enrich_parquet_with_nvd_data(parquet_file: str, output_file: str) -> None:
    """Enriches Parquet file with CVSS version, score, and severity from NVD.

    Args:
        parquet_file: Path to the input Parquet file.
        output_file: Path to the output Parquet file with enriched data.
    """
    # Load the existing Parquet file
    df = pd.read_parquet(parquet_file, engine='pyarrow')

    # Initialize lists to store new columns
    cvss_versions = []
    base_scores = []
    base_severities = []

    # Iterate through each row to enrich data
    for index, row in df.iterrows():
        cve_id = row.get("cve")
        if cve_id:
            logging.info(f"Processing CVE {cve_id} (index {index})")
            nvd_data = get_nvd_data(cve_id)
            cvss_versions.append(nvd_data["cvss_version"])
            base_scores.append(nvd_data["base_score"])
            base_severities.append(nvd_data["base_severity"])

            # Print out the results
            print(f"CVE: {cve_id}")
            print(f"CVSS Version: {nvd_data['cvss_version']}")
            print(f"CVSS Score: {nvd_data['base_score']}")
            print(f"CVSS Severity: {nvd_data['base_severity']}")
            print("-" * 40)

            # Sleep for 6 seconds to avoid rate limiting
            time.sleep(6)
        else:
            logging.warning(f"No CVE ID found for index {index}")
            cvss_versions.append(None)
            base_scores.append(None)
            base_severities.append(None)

    # Add the new columns to the DataFrame
    df["cvss_version"] = cvss_versions
    df["base_score"] = base_scores
    df["base_severity"] = base_severities

    # Save the enriched DataFrame back to a Parquet file
    table = pa.Table.from_pandas(df)
    pq.write_table(table, output_file)

    logging.info(f"Enriched data saved to {output_file}")

# Example usage
enrich_parquet_with_nvd_data("output.parquet", "enriched_output.parquet")


2024-08-27 00:28:13,205 - INFO - Processing CVE CVE-2006-6808 (index 0)


CVE: CVE-2006-6808
CVSS Version: 2.0
CVSS Score: 6.8
CVSS Severity: MEDIUM
----------------------------------------


2024-08-27 00:28:19,607 - INFO - Processing CVE CVE-2007-0233 (index 1)


CVE: CVE-2007-0233
CVSS Version: 2.0
CVSS Score: 7.5
CVSS Severity: HIGH
----------------------------------------


2024-08-27 00:28:25,999 - INFO - Processing CVE CVE-2006-3389 (index 2)


CVE: CVE-2006-3389
CVSS Version: 2.0
CVSS Score: 5.0
CVSS Severity: MEDIUM
----------------------------------------


2024-08-27 00:28:32,395 - INFO - Processing CVE CVE-2006-3389 (index 3)


CVE: CVE-2006-3389
CVSS Version: 2.0
CVSS Score: 5.0
CVSS Severity: MEDIUM
----------------------------------------


2024-08-27 00:28:38,780 - INFO - Processing CVE CVE-2006-3389 (index 4)


CVE: CVE-2006-3389
CVSS Version: 2.0
CVSS Score: 5.0
CVSS Severity: MEDIUM
----------------------------------------


2024-08-27 00:28:45,166 - INFO - Processing CVE CVE-2007-0233 (index 5)


CVE: CVE-2007-0233
CVSS Version: 2.0
CVSS Score: 7.5
CVSS Severity: HIGH
----------------------------------------


2024-08-27 00:28:51,544 - INFO - Processing CVE CVE-2006-1263 (index 6)


CVE: CVE-2006-1263
CVSS Version: 2.0
CVSS Score: 4.3
CVSS Severity: MEDIUM
----------------------------------------


2024-08-27 00:28:57,919 - INFO - Processing CVE CVE-2006-1263 (index 7)


CVE: CVE-2006-1263
CVSS Version: 2.0
CVSS Score: 4.3
CVSS Severity: MEDIUM
----------------------------------------


2024-08-27 00:29:04,299 - INFO - Processing CVE CVE-2006-1263 (index 8)


CVE: CVE-2006-1263
CVSS Version: 2.0
CVSS Score: 4.3
CVSS Severity: MEDIUM
----------------------------------------


2024-08-27 00:29:10,684 - INFO - Processing CVE CVE-2006-1263 (index 9)


CVE: CVE-2006-1263
CVSS Version: 2.0
CVSS Score: 4.3
CVSS Severity: MEDIUM
----------------------------------------


2024-08-27 00:29:17,066 - INFO - Processing CVE CVE-2006-1263 (index 10)


CVE: CVE-2006-1263
CVSS Version: 2.0
CVSS Score: 4.3
CVSS Severity: MEDIUM
----------------------------------------


2024-08-27 00:29:23,449 - INFO - Processing CVE CVE-2006-1263 (index 11)


CVE: CVE-2006-1263
CVSS Version: 2.0
CVSS Score: 4.3
CVSS Severity: MEDIUM
----------------------------------------


2024-08-27 00:29:29,835 - INFO - Processing CVE CVE-2006-1263 (index 12)


CVE: CVE-2006-1263
CVSS Version: 2.0
CVSS Score: 4.3
CVSS Severity: MEDIUM
----------------------------------------


2024-08-27 00:29:36,252 - INFO - Processing CVE CVE-2006-1263 (index 13)


CVE: CVE-2006-1263
CVSS Version: 2.0
CVSS Score: 4.3
CVSS Severity: MEDIUM
----------------------------------------


2024-08-27 00:29:42,636 - INFO - Processing CVE CVE-2007-0540 (index 14)


CVE: CVE-2007-0540
CVSS Version: 2.0
CVSS Score: 5.0
CVSS Severity: MEDIUM
----------------------------------------


2024-08-27 00:29:49,005 - INFO - Processing CVE CVE-2007-1894 (index 15)


CVE: CVE-2007-1894
CVSS Version: 2.0
CVSS Score: 4.3
CVSS Severity: MEDIUM
----------------------------------------


2024-08-27 00:29:55,398 - INFO - Processing CVE CVE-2007-0540 (index 16)


CVE: CVE-2007-0540
CVSS Version: 2.0
CVSS Score: 5.0
CVSS Severity: MEDIUM
----------------------------------------


2024-08-27 00:30:01,776 - INFO - Processing CVE CVE-2007-5106 (index 17)


CVE: CVE-2007-5106
CVSS Version: 2.0
CVSS Score: 4.3
CVSS Severity: MEDIUM
----------------------------------------


2024-08-27 00:30:08,233 - INFO - Processing CVE CVE-2007-5106 (index 18)


CVE: CVE-2007-5106
CVSS Version: 2.0
CVSS Score: 4.3
CVSS Severity: MEDIUM
----------------------------------------


2024-08-27 00:30:14,617 - INFO - Processing CVE CVE-2007-1622 (index 19)


CVE: CVE-2007-1622
CVSS Version: 2.0
CVSS Score: 4.3
CVSS Severity: MEDIUM
----------------------------------------


2024-08-27 00:30:20,999 - INFO - Processing CVE CVE-2013-7233 (index 20)


CVE: CVE-2013-7233
CVSS Version: 2.0
CVSS Score: 6.8
CVSS Severity: MEDIUM
----------------------------------------


2024-08-27 00:30:27,384 - INFO - Processing CVE CVE-2007-0541 (index 21)


CVE: CVE-2007-0541
CVSS Version: 2.0
CVSS Score: 5.0
CVSS Severity: MEDIUM
----------------------------------------


2024-08-27 00:30:33,765 - INFO - Processing CVE CVE-2006-1796 (index 29)


CVE: CVE-2006-1796
CVSS Version: 2.0
CVSS Score: 6.8
CVSS Severity: MEDIUM
----------------------------------------


2024-08-27 00:30:40,166 - INFO - Processing CVE CVE-2007-0541 (index 30)


CVE: CVE-2007-0541
CVSS Version: 2.0
CVSS Score: 5.0
CVSS Severity: MEDIUM
----------------------------------------


2024-08-27 00:30:46,540 - INFO - Processing CVE CVE-2007-0109 (index 31)


CVE: CVE-2007-0109
CVSS Version: 2.0
CVSS Score: 5.0
CVSS Severity: MEDIUM
----------------------------------------


2024-08-27 00:30:52,919 - INFO - Processing CVE CVE-2007-1049 (index 32)


CVE: CVE-2007-1049
CVSS Version: 2.0
CVSS Score: 4.3
CVSS Severity: MEDIUM
----------------------------------------


2024-08-27 00:30:59,341 - INFO - Processing CVE CVE-2007-0109 (index 34)


CVE: CVE-2007-0109
CVSS Version: 2.0
CVSS Score: 5.0
CVSS Severity: MEDIUM
----------------------------------------


2024-08-27 00:31:05,740 - INFO - Processing CVE CVE-2007-0109 (index 35)


CVE: CVE-2007-0109
CVSS Version: 2.0
CVSS Score: 5.0
CVSS Severity: MEDIUM
----------------------------------------


2024-08-27 00:31:12,116 - INFO - Processing CVE CVE-2007-4893 (index 36)


CVE: CVE-2007-4893
CVSS Version: 2.0
CVSS Score: 4.3
CVSS Severity: MEDIUM
----------------------------------------


2024-08-27 00:31:18,516 - INFO - Processing CVE CVE-2007-4893 (index 37)


CVE: CVE-2007-4893
CVSS Version: 2.0
CVSS Score: 4.3
CVSS Severity: MEDIUM
----------------------------------------


2024-08-27 00:31:25,018 - INFO - Processing CVE CVE-2007-5105 (index 38)


CVE: CVE-2007-5105
CVSS Version: 2.0
CVSS Score: 4.3
CVSS Severity: MEDIUM
----------------------------------------


2024-08-27 00:31:31,396 - INFO - Processing CVE CVE-2007-5105 (index 39)


CVE: CVE-2007-5105
CVSS Version: 2.0
CVSS Score: 4.3
CVSS Severity: MEDIUM
----------------------------------------


2024-08-27 00:31:37,779 - INFO - Processing CVE CVE-2006-0985 (index 40)


CVE: CVE-2006-0985
CVSS Version: 2.0
CVSS Score: 4.3
CVSS Severity: MEDIUM
----------------------------------------


2024-08-27 00:31:44,154 - INFO - Enriched data saved to enriched_output.parquet
