In [222]:
from openai import OpenAI
import pandas as pd
import json

In [223]:
API_KEY = "API_KEY"
JOB_ID = "JOB_ID"

temperature = 0.1

In [224]:
client = OpenAI(api_key=API_KEY)
model_name_pre_object = client.fine_tuning.jobs.retrieve(JOB_ID)
model_name = model_name_pre_object.fine_tuned_model

# How to use pre-tuned model?

In [225]:
prompt = \
    "You are tasked with analyzing a C++, SQL, Java code language which focus in cloud security identify security vulnerabilities. "\
    "The input of the model will be stricly code text." \
    "Your response / output should be in the following json format:\n\n"\
    "title\n-----------\n$title_goes_here\n-----------\n\n "\
    "isFixNecessary\n-----------\n$boolean result (true / false)\n-----------\n\n "\
    "reasoning\n-----------\n#reasoning_goes_here\n-----------\n\n "\
    "ammended code\n-----------\n$ammended_code_goes_here\n-----------\n```\n\n "\
    "Follow this format strictly and order has to be the same. Do not say anything else. This is the code: "

system_message = \
  "Given a piece of C++, SQL, or Java code focused on cloud security, analyze the code for security vulnerabilities." \
  "If a fix is necessary, provide reasoning and suggest an amended code in the same programming language."\
  "Use json as output."

sample_code = \
  "#include<iostream>\nusing namespace std;\nint main() {\n    int password;\n    cout << \"Enter your password: \";\n    cin >> password;\n    if(password == 1234) {\n        cout << \"Access granted.\";\n    } else {\n        cout << \"Access denied.\";\n    }\n    return 0;\n}"

In [226]:
# THIS WILL INCUR COST
sample_message = [
{
  "role": "system",
  "content": prompt + system_message,
},
{
    "role": "user",
    "content": sample_code,
}
]
response = client.chat.completions.create(
    model=model_name,
    messages= sample_message,
    max_tokens=2000,
    response_format={ "type": "json_object" },
)

output = response.choices[0].message.content
print(sample_message)

[{'role': 'system', 'content': 'You are tasked with analyzing a C++, SQL, Java code language which focus in cloud security identify security vulnerabilities. The input of the model will be stricly code text.Your response / output should be in the following json format:\n\ntitle\n-----------\n$title_goes_here\n-----------\n\n isFixNecessary\n-----------\n$boolean result (true / false)\n-----------\n\n reasoning\n-----------\n#reasoning_goes_here\n-----------\n\n ammended code\n-----------\n$ammended_code_goes_here\n-----------\n```\n\n Follow this format strictly and order has to be the same. Do not say anything else. This is the code: Given a piece of C++, SQL, or Java code focused on cloud security, analyze the code for security vulnerabilities.If a fix is necessary, provide reasoning and suggest an amended code in the same programming language.Use json as output.'}, {'role': 'user', 'content': '#include<iostream>\nusing namespace std;\nint main() {\n    int password;\n    cout << "En

# How to parse output to json?

In [227]:
def parse_output_to_json(input_text, json_output, system_message):
    # Parse the JSON string output to a dictionary
    response_details = json.loads(json_output)

    # Create lists for prompts and responses
    prompts = [input_text]
    responses = [response_details]

    # Create a DataFrame from the prompts and responses
    df = pd.DataFrame({
        'prompt': prompts,
        'response': responses
    })

    print('There are ' + str(len(df)) + ' successfully-generated examples.')

    # Initialize list to store training examples
    outputs = []

    # Create training examples in the format required for GPT-3.5 fine-tuning
    for _, row in df.iterrows():
        output = {
            "messages": [
                {"role": "system", "content": system_message.strip()},
                {"role": "user", "content": row['prompt']},
                {"role": "assistant", "content": row['response']}
            ]
        }
        outputs.append(output)

    # Save training examples to a .jsonl file in a prettier format
    with open('output.jsonl', 'w') as f:
        for output in outputs:
            # Use indent=4 for pretty-printing JSON
            f.write(json.dumps(output, indent=4) + '\n')

In [228]:
parse_output_to_json(sample_code, output, system_message)

There are 1 successfully-generated examples.
