Change Date Format

In [12]:
import json
from datetime import datetime

# Function to convert date format to full long date
def convert_to_full_long_date(date_str):
    date_obj = datetime.strptime(date_str, '%d/%m/%Y')
    return date_obj.strftime("%d %B %Y")

# Read JSON data from a file
with open('combined_cve_data.json', 'r') as file:
    data = json.load(file)

# Update the 'Published Date' in each record
for record in data:
    record['Published Date'] = convert_to_full_long_date(record['Published Date'])

# Convert back to JSON
updated_json = json.dumps(data, indent=4)
# print(updated_json)

# Optionally, write the updated data back to a file
with open('combined_cve_data_dated.json', 'w') as file:
    file.write(updated_json)


Generate the question-answer pairs

In [None]:
# DATA IS TRUNCATED FROM THE START FOR SECOND ITERATION

import json
import os
import pandas as pd
from openai import OpenAI
from dotenv import load_dotenv
import time

# Load environment variables
load_dotenv()

def read_json_file(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        return json.load(file)

def clean_description(item):
    item["Description"] = item["Description"].replace('\n', '').replace('\r', '').strip()
    return item

def clean_response(response):
    # Remove invalid control characters
    return ''.join(ch for ch in response if ch.isprintable() or ch in '\t\n\r')

def get_questions_answers(item):
    client = OpenAI(api_key=os.getenv("API_KEY"))

    prompt = f"The following is the information of a CVE:\n\n{json.dumps(item, indent=2)}\n\nPlease generate question and answer pairs for this information. Supplement the provided information with your own knowledge. Add code examples for this vulnerability in the answers if possible. Please also discuss possible attack scenarios of this vulnerability. Don't mention about the cut-off date of your own training data in the questions and answers. Dont mention in the questions and answers that a content for this vulnerability was provided to you. Always mention the CVE id in your questions.\n\nThe response should be in JSON format. Each set of question-answer pairs should be an object inside an array, with key-value pairs called 'question' and 'answer'. The parent key should be called 'data'."

    # Please generate question and answer pairs for this information. Supplement the provided information with your own knowledge. Add code examples for this vulnerability in the answers if possible. Please also discuss possible attack scenarios of this vulnerability. Don't mention about the cut-off date of your own training data in the questions and answers. Dont mention in the questions or answers that a content for this vulnerability was provided or not provided to you. If you dont want to describe something, dont mention anything about this content but say your reason generically always in this type of scenerio. Always mention the CVE id in your questions.\n\nThe response should be in JSON format. Each set of question-answer pairs should be an object inside an array, with key-value pairs called 'question' and 'answer'. The parent key should be called 'data'."



    try:
        completion = client.chat.completions.create(
            model="gpt-4-1106-preview",
            messages=[{'role': 'user', 'content': f"{prompt}"}],
            response_format={"type": "json_object"}
        )
        response = completion.choices[0].message.content.strip()
        clean_resp = clean_response(response)
        print(f"Tokens used in GPT-4-Turbo Response: {completion.usage.total_tokens}")
        return clean_resp
    except Exception as e:
        print(f"Error in GPT-4-Turbo API call: {e}")
        return None

def append_to_excel(qa_pairs, filename):
    df = pd.DataFrame(qa_pairs, columns=['Question', 'Answer'])
    if os.path.isfile(filename):
        book = pd.read_excel(filename)
        df = pd.concat([book, df], ignore_index=True)
    df.to_excel(filename, index=False)

def main(json_file_path, excel_file_path):
    data = read_json_file(json_file_path)

    for item in data:
        cleaned_item = clean_description(item)
        json_response = get_questions_answers(cleaned_item)
        if json_response:
            try:
                response_data = json.loads(json_response)
                qa_pairs = [(qa['question'], qa['answer']) for qa in response_data.get("data", [])]
                append_to_excel(qa_pairs, excel_file_path)
            except json.JSONDecodeError as e:
                print(f"JSON parsing error: {e}")
                print(f"Invalid JSON response: {json_response}")
        print("Taking a break for 5 seconds")
        time.sleep(5)

if __name__ == "__main__":
    json_file_path = 'combined_cve_data_dated.json'
    excel_file_path = 'output.xlsx'
    main(json_file_path, excel_file_path)


Tokens used in GPT-4-Turbo Response: 833
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 976
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 955
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 1033
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 878
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 958
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 932
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 876
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 1070
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 886
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 905
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 937
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 857
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 879
Taking a break for 5 seconds
Tokens used in GPT

Tokens used in GPT-4-Turbo Response: 892
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 842
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 941
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 899
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 788
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 1009
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 898
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 752
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 914
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 925
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 763
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 920
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 870
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 891
Taking a break for 5 seconds
Tokens used in GPT-

Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 1016
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 846
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 1019
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 996
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 973
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 1047
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 1053
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 871
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 1303
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 972
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 1035
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 1095
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 1009
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 965
Taking a bre

Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 974
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 877
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 929
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 931
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 1120
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 905
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 1226
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 1142
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 838
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 1188
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 834
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 896
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 1162
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 986
Taking a break 

Tokens used in GPT-4-Turbo Response: 1044
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 1142
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 999
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 956
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 1049
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 817
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 943
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 953
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 868
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 930
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 800
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 890
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 1017
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 971
Taking a break for 5 seconds
Tokens used in G

Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 911
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 838
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 918
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 736
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 1119
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 819
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 985
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 793
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 898
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 950
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 778
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 879
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 704
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 796
Taking a break for 

Tokens used in GPT-4-Turbo Response: 1150
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 930
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 1130
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 864
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 1191
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 1259
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 992
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 966
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 1065
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 906
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 924
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 836
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 881
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 892
Taking a break for 5 seconds
Tokens used in 

Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 957
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 814
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 965
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 921
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 765
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 1078
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 744
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 753
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 923
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 752
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 1014
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 1005
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 1100
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 1032
Taking a break 

Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 691
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 738
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 661
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 776
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 729
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 928
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 978
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 755
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 1006
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 665
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 791
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 906
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 912
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 1627
Taking a break for

Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 874
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 944
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 1068
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 824
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 852
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 960
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 881
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 833
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 913
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 957
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 894
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 853
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 807
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 761
Taking a break for 

Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 772
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 758
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 760
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 780
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 814
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 973
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 914
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 809
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 769
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 612
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 817
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 767
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 801
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 786
Taking a break for 5

Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 989
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 1005
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 1008
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 970
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 1008
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 967
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 872
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 1191
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 762
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 825
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 842
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 874
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 929
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 1076
Taking a break 

Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 890
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 1342
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 766
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 1071
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 1025
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 759
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 1108
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 941
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 1007
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 1204
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 931
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 1020
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 695
Taking a break for 5 seconds
Tokens used in GPT-4-Turbo Response: 692
Taking a brea