In [4]:
!pip install openai==0.28.0


Collecting openai==0.28.0
  Downloading openai-0.28.0-py3-none-any.whl.metadata (13 kB)
Downloading openai-0.28.0-py3-none-any.whl (76 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.5/76.5 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: openai
  Attempting uninstall: openai
    Found existing installation: openai 1.52.2
    Uninstalling openai-1.52.2:
      Successfully uninstalled openai-1.52.2
Successfully installed openai-0.28.0


In [3]:
import pandas as pd
import openai
import time

# Set up your OpenAI API key
openai.api_key = ''

# Load the dataset
cve_file_path = '/content/cve_dataset_with_cvss_vector.tsv'
cve_df = pd.read_csv(cve_file_path, sep='\t')

# Limit to first 200 entries
cve_df = cve_df.head(200)

# Define a function to get CVSS prediction from GPT-4
def calculate_cvss_v3(entry_description, index):
    # Create the prompt based on the CVE description
    prompt = f"""
    Analyze the following CVE description and calculate the CVSS v3.1 Base Score. Determine the
    values for each base metric: AV, AC, PR, UI, S, C, I, and A. Summarize each metric’s value and
    provide the final CVSS v3.1 vector string.

    Valid options for each metric are as follows:
    - Attack Vector (AV): Network (N), Adjacent (A), Local (L), Physical (P)
    - Attack Complexity (AC): Low (L), High (H)
    - Privileges Required (PR): None (N), Low (L), High (H)
    - User Interaction (UI): None (N), Required (R)
    - Scope (S): Unchanged (U), Changed (C)
    - Confidentiality (C): None (N), Low (L), High (H)
    - Integrity (I): None (N), Low (L), High (H)
    - Availability (A): None (N), Low (L), High (H)

    Summarize each metric’s value and provide the final CVSS v3.1 vector string. Ensure the final line
    of your response contains only the CVSS v3 Vector String in the following format:

    Example format: CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H

    CVEDescription: {entry_description}
    """

    # Call GPT-4 API
    try:
        response = openai.ChatCompletion.create(
            model="gpt-4",
            messages=[
                {"role": "system", "content": "You are an expert in cybersecurity and CVSS scoring."},
                {"role": "user", "content": prompt}
            ]
        )
        # Extract the response content and isolate the final CVSS vector string
        cvss_vector = response.choices[0].message['content'].strip().splitlines()[-1]
        print(f"Processed entry {index + 1}/200")
        return cvss_vector
    except Exception as e:
        print(f"Error with GPT-4 API for entry {index + 1}: {e}")
        return None

# List to store CVSS vectors
cvss_vectors = []

# Loop over each entry and calculate the CVSS vector
for index, row in cve_df.iterrows():
    cvss_vector = calculate_cvss_v3(row['Description'], index)
    cvss_vectors.append(cvss_vector)
    time.sleep(1)  # Add delay to prevent rate-limiting issues

# Add the CVSS vectors to the DataFrame
cve_df['CVSS_Vector'] = cvss_vectors

# Save the updated dataset with the CVSS vectors
output_file_path = '/content/gpt4_cvss_predictions.tsv'
cve_df.to_csv(output_file_path, sep='\t', index=False)

print("CVSS predictions saved to:", output_file_path)


Processed entry 1/200
Processed entry 2/200
Processed entry 3/200
Processed entry 4/200
Processed entry 5/200
Processed entry 6/200
Processed entry 7/200
Processed entry 8/200
Processed entry 9/200
Processed entry 10/200
Processed entry 11/200
Processed entry 12/200
Processed entry 13/200
Processed entry 14/200
Processed entry 15/200
Processed entry 16/200
Processed entry 17/200
Processed entry 18/200
Processed entry 19/200
Processed entry 20/200
Processed entry 21/200
Processed entry 22/200
Processed entry 23/200
Processed entry 24/200
Processed entry 25/200
Processed entry 26/200
Processed entry 27/200
Processed entry 28/200
Processed entry 29/200
Processed entry 30/200
Processed entry 31/200
Processed entry 32/200
Processed entry 33/200
Processed entry 34/200
Processed entry 35/200
Processed entry 36/200
Processed entry 37/200
Processed entry 38/200
Processed entry 39/200
Processed entry 40/200
Processed entry 41/200
Processed entry 42/200
Processed entry 43/200
Processed entry 44/2

KeyboardInterrupt: 