# Notebook to run GPT, Gemini, LLAMA models

### Install All Required Packages
Run this cell first to install all necessary packages if not already installed.

In [None]:
# Install required packages
# %pip install openai replicate python-dotenv pandas numpy matplotlib google.genai transformers torch bitsandbytes llamaapi accelerate

# Notebook to run GPT, Gemini, Replicate models

In [None]:
from openai import OpenAI
from google import genai
from google.genai import types
import pandas as pd
import numpy as np
import time
import re
import matplotlib.pyplot as plt
from llamaapi import LlamaAPI

from runningLLAMA import llama_local_generate




## Setup all APIs

In [None]:
import os
from dotenv import load_dotenv

load_dotenv()

In [None]:
#llama api
llama_client = OpenAI(
api_key = os.getenv("LLAMA_API_KEY"),
base_url = "https://api.llmapi.com"
)

In [None]:
# ChatGPT
openai_client = OpenAI(api_key=os.environ.get("OPEN_AI_API_KEY"))

In [None]:
# Gemini
gemini_client = genai.Client(api_key=os.environ.get("GOOGLE_API_KEY"))


## Prediction Params & Method

In [None]:
# set parameters for more deterministic output
temperature = 0
top_p = 0
seed = 42
max_tokens = 2048

In [None]:
sys_prompt = 'You are a cybersecurity expert specializing in cyberthreat intelligence.'

In [None]:
model_mapping = {
    'api-llama3.1-8b': 'llama3.1-8b',
    'api-llama3.3-70b': 'llama3.3-70b',
    'gemini-2.0-flash': 'gemini-2.0-flash', 
    'gpt-4o-mini': 'gpt-4o-mini',
}

In [None]:
def get_single_prediction(question, model_name):
    if model_name.startswith('gpt'):
        # ChatGPT
        model = model_mapping[model_name]
        response = openai_client.chat.completions.create(
            model=model,
            messages=[
                {'role': 'system', 'content': sys_prompt},
                {'role': 'user', 'content': question}
            ],
            temperature=temperature,
            top_p=top_p,
            max_tokens=max_tokens,
            seed=seed
        )
        output = response.choices[0].message.content
    elif model_name.startswith('gemini'):
        # Gemini
        model = model_mapping[model_name]

        # Create message content
        contents = [
            types.Content(role="user", parts=[types.Part(text=sys_prompt + " " + question)])
        ]        
        
        safety_settings = [
            types.SafetySetting(category=types.HarmCategory.HARM_CATEGORY_HARASSMENT, threshold=types.HarmBlockThreshold.BLOCK_ONLY_HIGH),
            types.SafetySetting(category=types.HarmCategory.HARM_CATEGORY_HATE_SPEECH, threshold=types.HarmBlockThreshold.BLOCK_ONLY_HIGH),
            types.SafetySetting(category=types.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT, threshold=types.HarmBlockThreshold.BLOCK_ONLY_HIGH),
            types.SafetySetting(category=types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT, threshold=types.HarmBlockThreshold.BLOCK_ONLY_HIGH),
        ]
       
        generation_config = types.GenerateContentConfig(
            temperature=temperature,
            top_p=top_p,
            max_output_tokens=max_tokens,
            safety_settings=safety_settings
        )
       
        response = gemini_client.models.generate_content(
            model=model,
            contents=contents,
            config=generation_config,
        )

        max_retries = 5
        retry_count = 0
        while retry_count < max_retries:
            try:
                response = gemini_client.models.generate_content(
                    model=model,
                    contents=contents,
                    config=generation_config,
                )
                output = response.candidates[0].content.parts[0].text
                time.sleep(1)  # Regular delay between requests
                break
            except Exception as e:
                error_str = str(e)
                if "429 RESOURCE_EXHAUSTED" in error_str:
                    retry_count += 1
                    print(f"Rate limit hit, retrying in 2 seconds... (attempt {retry_count}/{max_retries})")
                    time.sleep(2)  # Wait for 2 seconds as suggested by API
                    if retry_count == max_retries:
                        output = f"Error: Rate limit exceeded after {max_retries} attempts."
                else:
                    output = f"Error: {str(e)}"
                    break
                
    elif model_name.startswith('llama-local'):
        # temperature must be strictly positive (< 0) or will get error
        output = llama_local_generate(sys_prompt, question, max_tokens=max_tokens, temperature=0.01, top_p=top_p, seed=seed)

    elif model_name.startswith('api-llama'):
         model = model_mapping[model_name]
         response = llama_client.chat.completions.create(
            model=model,
            messages=[
                {'role': 'system', 'content': sys_prompt},
                {'role': 'user', 'content': question}
            ],
            temperature=0.01,
            top_p=top_p,
            max_tokens=max_tokens,
            seed=seed
        )
         output = response.choices[0].message.content
        
    else:
        raise ValueError(f"Model '{model_name}' not supported or not found in model_mapping")
        
    return output


#### Test

In [None]:
question = (
    "Analyze the following CVE description and calculate the CVSS v3.1 Base Score. "
    "Determine the values for each base metric: AV, AC, PR, UI, S, C, I, and A. "
    "Summarize each metric's value and provide the final CVSS v3.1 vector string.   "
    "Valid options for each metric are as follows: \n"
    "- **Attack Vector (AV)**: Network (N), Adjacent (A), Local (L), Physical (P)\n"
    "- **Attack Complexity (AC)**: Low (L), High (H)\n"
    "- **Privileges Required (PR)**: None (N), Low (L), High (H)\n"
    "- **User Interaction (UI)**: None (N), Required (R)\n"
    "- **Scope (S)**: Unchanged (U), Changed (C)\n"
    "- **Confidentiality (C)**: None (N), Low (L), High (H)\n"
    "- **Integrity (I)**: None (N), Low (L), High (H)\n"
    "- **Availability (A)**: None (N), Low (L), High (H)\n"
    "Summarize each metric's value and provide the final CVSS v3.1 vector string. "
    "Ensure the final line of your response contains ONLY the CVSS v3 Vector String (no other text) "
    "in the following format:  \n"
    "Example format: CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H\n\n"
    "CVE Description: In the Linux kernel through 6.7.1, there is a use-after-free "
    "in cec_queue_msg_fh, related to drivers/media/cec/core/cec-adap.c and "
    "drivers/media/cec/core/cec-api.c."
)

##### Are all the APIS working?

In [None]:
print(get_single_prediction(question, 'api-llama3.1-8b'))

# Run Evaluation for a Dataset

### All formatting comes here
While these captures most output format of the LLMs, someimtes have to manually collect some responses from the generated response file

In [None]:
def format(text):
    # Define the regex pattern for CVSS v3.1 vector string
    cvss_pattern = r'AV:[A-Za-z]+/AC:[A-Za-z]+/PR:[A-Za-z]+/UI:[A-Za-z]+/S:[A-Za-z]+/C:[A-Za-z]+/I:[A-Za-z]+/A:[A-Za-z]+'

    # Find all matches in the text
    matches = re.findall(cvss_pattern, text)

    # Return the last match if any match is found, otherwise return the original text
    if matches:
        return matches[-1], True
    else:
        return text, False

In [None]:
def run_evaluation(file_path, model_name):
    # Keep track of time and total #chars generated
    start_time = time.time()
    count_chars = 0
    instructions_failed = 0
    
    data = pd.read_csv(file_path, encoding='utf-8', sep='\t')

    # Only keep results
    all_results = []
    
    for index, row in data.iterrows():
        prompt = row['Prompt']
        try:
            output = get_single_prediction(prompt, model_name)
            count_chars += len(output)
            answer, success = format(output)
            if not success:
                instructions_failed += 1
            all_results.append(answer)
            print(index+1, answer)
        except Exception as e:
            answer = 'Error'
            all_results.append(answer)
            print('Exception at row ', index+1)
            print(e)
            print(index+1, answer)

    time_taken = time.time() - start_time
    print('Time taken:', time_taken)
    print('#Characters generated:', count_chars)
    print('#Instructions failed:', instructions_failed)

    # Create responses directory if it doesn't exist
    output_dir = os.path.join('responses', 'individual-results')
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Only save the results to the responses folder
    out_result = os.path.join(output_dir, 'SENG402_' + os.path.basename(file_path).split('.')[0] + '_' + model_name + '_result.txt')
    with open(out_result, 'w', encoding='utf-8') as f:
        f.write('\n'.join(all_results))

    print('------- Done --------')

In [None]:
run_evaluation('../datasets/cti-vsp-only-2024-and-2025-SMALL.tsv', 'api-llama3.1-8b')