# Choose Model gpt-o4-mini

In [1]:
import torch
print("CUDA Available: ", torch.cuda.is_available())
print("CUDA Device Name: ", torch.cuda.get_device_name(0))
torch.cuda.empty_cache()

# Verificar se CUDA está disponível para acelerar o processamento
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Usando dispositivo: {device}")

CUDA Available:  True
CUDA Device Name:  NVIDIA GeForce RTX 3050 Ti Laptop GPU
Usando dispositivo: cuda


## Gpt-4o-mini

In [1]:
from openai import OpenAI

In [4]:
# completion = client.chat.completions.create(
#     model="gpt-4o-mini",
#     messages=[
#         {"role": "user", "content": "hello?"}
#     ]
# )


In [13]:
# # Test gpt-4o-mini
# response = completion.choices[0].message.content
# print(response)

Hello! How can I assist you today?


# Dataset TeleQnA for Inference

In [2]:
import json

# Path to the TeleQnA processed question in JSON file
rel17_200_questions_path = r"../../Files/rel17_200_questions.json"

# Load the TeleQnA data just release 17
with open(rel17_200_questions_path, "r", encoding="utf-8") as file:
    rel17_200_questions = json.load(file)
print(len(rel17_200_questions))

200


In [4]:
rel17_200_questions[0]

{'question': 'Which NGAP procedure is used for inter-system load balancing? [3GPP Release 17]',
 'option 1': 'eNB Configuration Transfer',
 'option 2': 'Downlink RAN Configuration Transfer',
 'option 3': 'Uplink RAN Configuration Transfer',
 'option 4': 'MME Configuration Transfer',
 'answer': 'option 3: Uplink RAN Configuration Transfer',
 'explanation': 'The NGAP procedure used for inter-system load balancing is Uplink RAN Configuration Transfer.',
 'category': 'Standards overview'}

# Import RAG Functions

In [3]:
import sys
import os

project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
sys.path.append(project_root)

In [4]:
from utils.rag_functions import load_faiss_index, search_faiss_index, search_RAG, load_chunks

  from tqdm.autonotebook import tqdm, trange
2025-01-09 15:30:39.597106: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-01-09 15:30:39.741830: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-01-09 15:30:39.807031: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-01-09 15:30:39.826671: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-01-09 15:30:39.940549

In [5]:
# Test RAG
query_text = "reception of a transparent l3 message in unacknowledged mode"
index_file_path = "../../Files/faiss_index.bin"
chunks_path = "../../Files/tspec_chunks_markdown.pkl"
top_k = 5

In [5]:
result = search_RAG(query_text, index_file_path, chunks_path, top_k)
print(result)

Information 1:
BSC.  
Collision cases are treated as specified in 3GPPTS44.006.  
If BTS has repeated the DISC frame N200 times, BTS sends a RELease
INDication and an ERRor INDication message to BSC (cf. 3GPPTS44.006).  
![](media/image7.png){width="3.65625in" height="1.2083333333333333in"}  
3.5 Transmission of a transparent L3-Message in acknowledged mode
-----------------------------------------------------------------  
This procedure is used by BSC to request the sending of a L3 message to
MS in acknowledged mode.  
BSC sends a DATA REQuest message to BTS. The message contains the
complete L3 message to be sent in acknowledged mode.  
![](media/image8.png){width="3.6979166666666665in" height="1.0625in"}  
3.6 Reception of a transparent L3-Message in acknowledged mode
--------------------------------------------------------------  
This procedure is used by BTS to indicate the reception of a L3 message
in acknowledged mode.  
BTS sends a DATA INDication message to BSC. The message 

# Accuracy Evaluation

## Create prompt and Ask function for Llama 3.2 with no Fine-Tuning

In [7]:

def ask_gpt4_RAG(question_data, top_k=5, index_file_path="../../Files/faiss_index.bin", chunks_path="../../Files/tspec_chunks_markdown.pkl"):
    """
    Function to generate an answer using the GPT-4o-mini model based on the given question and options.

    Parameters:
    - question_data: Dictionary containing the question and options.
    - top_k: Number of relevant chunks to retrieve from the search.
    - index_file_path: Path to the FAISS index file.
    - chunks_path: Path to the chunks file.

    Returns:
    - String: Model's generated response.
    """
    # Initialize the OpenAI client
    client = OpenAI()

    # Extract question and options
    question = question_data['question']
    options = [f"{key}: {value}" for key, value in question_data.items() if 'option' in key]
    
    question_search = (
        f"{question}\n" +
        " ".join(options) + " "
    )
    

    # Perform RAG search using the question to retrieve relevant information
    rag_results = search_RAG(question_search, index_file_path=index_file_path, chunks_path=chunks_path, top_k=top_k)

    prompt = (
        f"Relevant Information:\n{rag_results}\n"
        f"Question: {question}\n"
        f"Options:\n" + "\n".join(options) + "\n"
        # "Think step by step and analyze the relevant information carefully, then choose the correct option.\n"
        # "Think step by step and choose the correct option. Analyse the Relevant Information.\n"
        "Think step by step and choose the correct option.\n"
        "You must respond in the format 'correct option: <X>', where <X> is the correct letter for the option."
    )
    
    # print(prompt)

    # Generate the response using GPT-4o-mini
    completion = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.7,  # Controls randomness
        max_tokens=512,   # Limits the response length
        top_p=0.9,        # Nucleus sampling
        frequency_penalty=0,  # Prevents word repetition
        presence_penalty=0  # Encourages variety in output
    )

    # Extract and return the generated response
    response = completion.choices[0].message.content.strip()
    return response

In [14]:
question_data = {
    'question': 'Which physical channel informs the UE and the RN about the number of OFDM symbols used for the PDCCHs? [3GPP Release 17]',
    'option 1': 'PBCH',
    'option 2': 'PCFICH',
    'option 3': 'PDSCH',
    'option 4': 'PHICH',
    'answer': 'option 2: PCFICH',
    'explanation': 'The physical control format indicator channel (PCFICH) informs the UE and the RN about the number of OFDM symbols used for the PDCCHs.',
    'category': 'Standards specifications'
}

gpt4_response = ask_gpt4_RAG(question_data)
print(gpt4_response)

Relevant Information:
Information 1:
of OFDM symbols of the PUSCH, including all OFDM symbols used for DMRS;  
\- for any OFDM symbol that carries DMRS of the PUSCH,
$M_{\text{sc}}^{\text{UCI}}\left( l \right) = 0$;  
\- for any OFDM symbol that does not carry DMRS of the PUSCH,
$M_{\text{sc}}^{\text{UCI}}\left( l \right) = M_{\text{sc}}^{\text{PUSCH}} - \ M_{\text{sc}}^{PT - RS}\left( l \right)$;  
\- $\alpha$ is configured by higher layer parameter *scaling*;  
\- $l_{0}$ is the symbol index of the first OFDM symbol that does not
carry DMRS of the PUSCH, after the first DMRS symbol(s), in the PUSCH
transmission.  
For CG-UCI transmission on PUSCH with UL-SCH, and if
*numberOfSlotsTBoMS* is present in the resource allocation table and the
value of *numberOfSlotsTBoMS* in the row indicated by the Time domain
resource assignment field in DCI is larger than 1, the number of coded
modulation symbols per layer for CG-UCI transmission, denoted as
$Q_{CG - UCI}^{'}$, is determined as follows

## Create Function to Evaluate Question 

In [8]:
import re

def extract_option(answer):
    """
    Extract the option part from the answer string, removing all punctuation and converting to lowercase.
    
    Parameters:
    - answer: A string containing the answer in the format 'option X: ...'.

    Returns:
    - String: Extracted option (e.g., 'option 2'), or None if no match is found.
    """
    # Remove all punctuation and convert to lowercase
    cleaned_answer = re.sub(r'[^\w\s]', '', answer.lower())
   # Find all matches for the format "option X"
    matches = re.findall(r'option \d+', cleaned_answer)
    # Return the last match with stripped whitespace if any found, otherwise None
    return matches[-1].strip() if matches else None

In [9]:
def evaluate_model_response(model_response, question_data):
    """
    Compare the model's response with the correct answer from the question data.
    
    Parameters:
    - model_response: The response string generated by the model.
    - question_data: Dictionary containing the question, options, and the correct answer.

    Returns:
    - 1 if the response is correct, otherwise the extracted model option.
    """
    correct_option = extract_option(question_data['answer'])  # Extract correct option
    model_option = extract_option(model_response)  # Extract model's option
    # print(model_option, correct_option)

    return 1 if model_option == correct_option else model_option  # Return 1 if correct, else model's option


In [10]:
question_data = {
    'question': 'Which physical channel informs the UE and the RN about the number of OFDM symbols used for the PDCCHs? [3GPP Release 17]',
    'option 1': 'PBCH',
    'option 2': 'PCFICH',
    'option 3': 'PDSCH',
    'option 4': 'PHICH',
    'answer': 'option 2: PCFICH',
    'explanation': 'The physical control format indicator channel (PCFICH) informs the UE and the RN about the number of OFDM symbols used for the PDCCHs.',
    'category': 'Standards specifications'
}

In [41]:
evaluation_result = evaluate_model_response(gpt4_response, question_data)
print(evaluation_result)

1


## Ask to model gpt-4o-mini TeleQnA 100 question 

### Release 17 200 questions

In [11]:
def gpt4_evaluate_questions(questions):
    """
    Process all questions and return the model responses.
    
    Parameters:
    - questions: List of dictionaries containing question data, where each dictionary has:
        - 'question': A string representing the question to be asked to the model.
        - 'answer': A string representing the correct answer format (e.g., 'option 2: PCFICH').
        - 'response': A string that will contain the model's generated response to the question.
    
    Returns:
    - List: A list of dictionaries where each dictionary contains:
        - 'question': The question as a string.
        - 'answer': The correct answer as a string.
        - 'response': The model's generated response for that question.
    """
    
    responses = []
    total_questions = len(questions)
    
    for idx, question_data in enumerate(questions):
        response = ask_gpt4_RAG(question_data)
        responses.append({
            "question": question_data['question'],
            "answer": question_data['answer'],
            "response": response
        })
        
        # Print progress
        print(f"Responded {idx + 1} of {total_questions} questions...")

    return responses

In [12]:
# Process all questions and get responses
gpt4_responses = gpt4_evaluate_questions(rel17_200_questions)

Responded 1 of 200 questions...
Responded 2 of 200 questions...
Responded 3 of 200 questions...
Responded 4 of 200 questions...
Responded 5 of 200 questions...
Responded 6 of 200 questions...
Responded 7 of 200 questions...
Responded 8 of 200 questions...
Responded 9 of 200 questions...
Responded 10 of 200 questions...
Responded 11 of 200 questions...
Responded 12 of 200 questions...
Responded 13 of 200 questions...
Responded 14 of 200 questions...
Responded 15 of 200 questions...
Responded 16 of 200 questions...
Responded 17 of 200 questions...
Responded 18 of 200 questions...
Responded 19 of 200 questions...
Responded 20 of 200 questions...
Responded 21 of 200 questions...
Responded 22 of 200 questions...
Responded 23 of 200 questions...
Responded 24 of 200 questions...
Responded 25 of 200 questions...
Responded 26 of 200 questions...
Responded 27 of 200 questions...
Responded 28 of 200 questions...
Responded 29 of 200 questions...
Responded 30 of 200 questions...
Responded 31 of 200

In [13]:
print(rel17_200_questions[1]['question'])
print(rel17_200_questions[1]['answer'])
print(gpt4_responses[1]['response'])

What is covered by enhanced application layer support for V2X services? [3GPP Release 17]
option 2: Advanced V2X services
To determine what is covered by enhanced application layer support for V2X services in the context of 3GPP Release 17, let's analyze the provided information.

1. The conclusions of the technical report mention enhancements to application layer support specifically for V2X services enabled on 3GPP systems (EPS, 5GS).
2. The key issues identified include support for dynamic information for HD maps and enhancements to V2X group management and communication, which indicates a focus on improving various aspects of V2X services.
3. The document emphasizes architecture requirements and solutions for V2X communications, particularly highlighting features like QoS requirements, capability exposure, and support for advanced scenarios like tele-operated driving.

Now, evaluating the options:

- **Option 1: PC5 radio resource control** - This is more related to the radio acces

#### Save accuracy responses

In [12]:
def save_responses_to_json(responses, filename):
    """
    Save the model responses to a JSON file.
    
    Parameters:
    - responses: List of responses to save.
    - filename: Name of the JSON file.
    """
    
    with open(filename, "w") as json_file:
        json.dump(responses, json_file, indent=4)

In [15]:
# save_responses_to_json(gpt4_responses,"../../Models_responses/Accuracy_larger_test/gpt4_responses_release_17_200_questions.json")

#### Evaluate responses from GPT-4o-mini

In [13]:
# Path to the TeleQnA processed question in JSON file
gpt4_responses_path = r"../../Models_responses/Accuracy_larger_test/gpt4_responses_release_17_200_questions.json"

# Load the TeleQnA data just release 17
with open(gpt4_responses_path, "r", encoding="utf-8") as file:
    gpt4_responses = json.load(file)
print(len(gpt4_responses))

200


In [14]:
def evaluate_accuracy(model_responses, rel_questions):
    """
    Evaluate the model's responses and calculate accuracy.
    """
    correct_count = 0  # Track the number of correct responses
    none_count = 0  # Track the number of 'None' responses

    for index, question_data in enumerate(model_responses):
        evaluation_result = evaluate_model_response(question_data['response'], question_data)
        options = [f"{key}: {value}" for key, value in rel_questions[index].items() if 'option' in key]

        if evaluation_result == 1:
            correct_count += 1  # Increment for correct response
        elif evaluation_result is None:
            # Print only responses that are None
            print("\nWrong Answer")
            print(f"Question {index + 1}: {question_data['question']}")
            print(f"Options:\n" + "\n".join(options) + "\n")
            print(f"Full model response:\n{question_data['response']}")
            print(f"Correct response: {question_data['answer']}")
            print("----------------------------------------------------------------------------------------")
            none_count += 1  # Increment for None response
        else:
            print("\nWrong Answer")
            print(f"Question {index + 1}: {question_data['question']}")
            print(f"Options:\n" + "\n".join(options) + "\n")
            print(f"Model response: {evaluation_result}")
            print(f"Correct response: {question_data['answer']}")
            print("----------------------------------------------------------------------------------------")

    # Calculate and print accuracy
    accuracy = correct_count / len(model_responses) * 100
    print(f"\nAccuracy: {accuracy:.2f}%")
    print(f"Total 'None' responses: {none_count}")
    print(f"'None' responses means that the model did not give an option")


In [19]:
evaluate_accuracy(gpt4_responses, rel17_200_questions)


Wrong Answer
Question 1: Which NGAP procedure is used for inter-system load balancing? [3GPP Release 17]
Options:
option 1: eNB Configuration Transfer
option 2: Downlink RAN Configuration Transfer
option 3: Uplink RAN Configuration Transfer
option 4: MME Configuration Transfer

Model response: option 2
Correct response: option 3: Uplink RAN Configuration Transfer
----------------------------------------------------------------------------------------

Wrong Answer
Question 15: What functionality does Signalling Based Activation provide? [3GPP Release 17]
Options:
option 1: Activation of management services for UEs in a specified area
option 2: Deactivation of management services for UEs in a specified area
option 3: Temporary stop and restart of management services during RAN overload
option 4: Request for end user service performance information for one specific UE
option 5: Handling management services during handover

Model response: option 5
Correct response: option 4: Request for

### Release 18 200 questions

In [15]:
import json

# Path to the TeleQnA processed question in JSON file
rel18_questions_path = r"../../Files/rel18_questions.json"

# Load the TeleQnA data just release 17
with open(rel18_questions_path, "r", encoding="utf-8") as file:
    rel18_questions = json.load(file)
print(len(rel18_questions))

780


In [16]:
rel18_test_size = 200
rel18_questions = rel18_questions[:rel18_test_size]
print(len(rel18_questions))

200


In [19]:
# Process all questions and get responses
gpt4_responses = gpt4_evaluate_questions(rel18_questions)

Responded 1 of 200 questions...
Responded 2 of 200 questions...
Responded 3 of 200 questions...
Responded 4 of 200 questions...
Responded 5 of 200 questions...
Responded 6 of 200 questions...
Responded 7 of 200 questions...
Responded 8 of 200 questions...
Responded 9 of 200 questions...
Responded 10 of 200 questions...
Responded 11 of 200 questions...
Responded 12 of 200 questions...
Responded 13 of 200 questions...
Responded 14 of 200 questions...
Responded 15 of 200 questions...
Responded 16 of 200 questions...
Responded 17 of 200 questions...
Responded 18 of 200 questions...
Responded 19 of 200 questions...
Responded 20 of 200 questions...
Responded 21 of 200 questions...
Responded 22 of 200 questions...
Responded 23 of 200 questions...
Responded 24 of 200 questions...
Responded 25 of 200 questions...
Responded 26 of 200 questions...
Responded 27 of 200 questions...
Responded 28 of 200 questions...
Responded 29 of 200 questions...
Responded 30 of 200 questions...
Responded 31 of 200

#### Save accuracy responses

In [20]:
save_responses_to_json(gpt4_responses,"../../Models_responses/Accuracy_larger_test/gpt4_responses_release_18_200_questions.json")

#### Evaluate responses from Llama 3.2 Standart

In [22]:
import json

# Load responses from the JSON file
with open("../../Models_responses/Accuracy_larger_test/gpt4_responses_release_18_200_questions.json", "r") as file:
    gpt4_responses = json.load(file)

# Print the loaded responses to verify
print("Responses loaded")
# for response in responses_llama_3_2:
#     print(response)


Responses loaded


In [23]:
evaluate_accuracy(gpt4_responses, rel18_questions)


Wrong Answer
Question 3: In online charging, how are chargeable events transformed into charging events? [3GPP Release 18]
Options:
option 1: By the CTF
option 2: By the OCF
option 3: By the CGF
option 4: By the CHF

Model response: option 1
Correct response: option 2: By the OCF
----------------------------------------------------------------------------------------

Wrong Answer
Question 8: What is the purpose of the TRP Measurement Grid? [3GPP Release 18]
Options:
option 1: To calculate the CDF of the EIRP/EIS distribution in 3D
option 2: To determine the total power radiated by the DUT in the TX beam peak direction
option 3: To determine the TX and RX beam peak direction
option 4: To perform TRP measurements taken on the sampling grid
option 5: To perform 3D Throughput/RSRP/EIS scans for RX beam peak direction

Model response: option 2
Correct response: option 4: To perform TRP measurements taken on the sampling grid
----------------------------------------------------------------

### Other Releases 200 questions

In [24]:
import json

# Path to the TeleQnA processed question in JSON file
other_rel_questions_path = r"../../Files/other_rel_questions.json"

# Load the TeleQnA data just release 17
with open(other_rel_questions_path, "r", encoding="utf-8") as file:
    other_rel_questions = json.load(file)
print(len(other_rel_questions))

4987


In [27]:
other_rel_test_size = 200
other_rel_questions = other_rel_questions[:other_rel_test_size]
print(len(other_rel_questions))

200


In [28]:
# Process all questions and get responses
gpt4_responses = gpt4_evaluate_questions(other_rel_questions)

Responded 1 of 200 questions...
Responded 2 of 200 questions...
Responded 3 of 200 questions...
Responded 4 of 200 questions...
Responded 5 of 200 questions...
Responded 6 of 200 questions...
Responded 7 of 200 questions...
Responded 8 of 200 questions...
Responded 9 of 200 questions...
Responded 10 of 200 questions...
Responded 11 of 200 questions...
Responded 12 of 200 questions...
Responded 13 of 200 questions...
Responded 14 of 200 questions...
Responded 15 of 200 questions...
Responded 16 of 200 questions...
Responded 17 of 200 questions...
Responded 18 of 200 questions...
Responded 19 of 200 questions...
Responded 20 of 200 questions...
Responded 21 of 200 questions...
Responded 22 of 200 questions...
Responded 23 of 200 questions...
Responded 24 of 200 questions...
Responded 25 of 200 questions...
Responded 26 of 200 questions...
Responded 27 of 200 questions...
Responded 28 of 200 questions...
Responded 29 of 200 questions...
Responded 30 of 200 questions...
Responded 31 of 200

#### Save accuracy responses

In [30]:
save_responses_to_json(gpt4_responses,"../../Models_responses/Accuracy_larger_test/gpt4_responses_other_rel_200_questions.json")

#### Evaluate responses from Llama 3.2 Standart

In [32]:
import json

# Load responses from the JSON file
with open("../../Models_responses/Accuracy_larger_test/gpt4_responses_other_rel_200_questions.json", "r") as file:
    gpt4_responses = json.load(file)

# Print the loaded responses to verify
print("Responses loaded")
# for response in responses_llama_3_2:
#     print(response)


Responses loaded


In [33]:
evaluate_accuracy(gpt4_responses, other_rel_questions)


Wrong Answer
Question 3: What is the role of the Orchestrator in the O-RAN architecture?
Options:
option 1: To control and monitor dApps
option 2: To provide resource management for dApps
option 3: To ensure conflict mitigation between dApps
option 4: To determine which applications should be executed and where
option 5: To leverage O-RAN interfaces for dApp deployment

Model response: option 2
Correct response: option 4: To determine which applications should be executed and where
----------------------------------------------------------------------------------------

Wrong Answer
Question 6: What is the primary benefit of offloading ML computations onto edge servers?
Options:
option 1: Improved privacy and data security
option 2: Reduced training times on resource-constrained devices
option 3: Optimal utilization of edge resources
option 4: Enhanced accuracy of ML models

Model response: option 3
Correct response: option 2: Reduced training times on resource-constrained devices
---