# Process the TeleQnA Dataset

In [1]:
import json

teleQnA_questions_path = r"../Dataset/TeleQnA/TeleQnA.txt"

# Load TeleQnA dataset
with open(teleQnA_questions_path, "r", encoding="utf-8") as file:
    teleQnA_dataset = json.load(file)
print(len(teleQnA_dataset))

10000


In [2]:
teleQnA_dataset["question 0"]

{'question': 'What is the purpose of the Nmfaf_3daDataManagement_Deconfigure service operation? [3GPP Release 18]',
 'option 1': 'To configure the MFAF to map data or analytics received by the MFAF to out-bound notification endpoints',
 'option 2': 'To configure the MFAF to stop mapping data or analytics received by the MFAF to out-bound notification endpoints',
 'option 3': 'To supply data or analytics from the MFAF to notification endpoints',
 'option 4': 'To fetch data or analytics from the MFAF based on fetch instructions',
 'answer': 'option 2: To configure the MFAF to stop mapping data or analytics received by the MFAF to out-bound notification endpoints',
 'explanation': 'The Nmfaf_3daDataManagement_Deconfigure service operation is used to stop mapping data or analytics received by the MFAF to one or more out-bound notification endpoints.',
 'category': 'Standards specifications'}

## Choose only Release 17 Questions

In [3]:
# Filter only the questions that contain "[3GPP Release 17]" in the question text and save in a list
rel17_questions = [
    value for key, value in teleQnA_dataset.items() 
    if "[3GPP Release 17]" in value["question"]
]

print(f"Total questions with '[3GPP Release 17]': {len(rel17_questions)}")

Total questions with '[3GPP Release 17]': 733


In [4]:
rel17_questions[0]

{'question': 'How does a supporting UE attach to the same core network operator from which it detached in a shared network? [3GPP Release 17]',
 'option 1': 'It requests the core network node to remember its previous selection.',
 'option 2': 'It uses information stored in the UE when it was detached.',
 'option 3': 'It relies on the SIM/USIM card for information.',
 'option 4': 'It performs a fresh attach procedure.',
 'answer': 'option 2: It uses information stored in the UE when it was detached.',
 'explanation': 'A supporting UE in a shared network attaches to the same core network operator it detached from by using information stored in the UE when it was detached.',
 'category': 'Standards specifications'}

In [5]:
# Save the filtered questions to a new JSON file
rel17_questions_path = r"../Files/rel17_questions.json"
with open(rel17_questions_path, "w", encoding="utf-8") as file:
    json.dump(rel17_questions, file, indent=4, ensure_ascii=False)

## Choose 100 questions

In [23]:
# Create a dictionary to store the count of each category
category_counts = {}

# Extract categories from the questions and count occurrences
for question in rel17_questions:
    category = question.get("category", "Unknown")
    if category in category_counts:
        category_counts[category] += 1
    else:
        category_counts[category] = 1

# Print unique categories and their counts
print("Categories found and counts:")
for category, count in category_counts.items():
    print(f"- {category}: {count}")


Categories found and counts:
- Standards specifications: 641
- Standards overview: 92


In [9]:
number_questions = 100

In [10]:
# Calculate how many questions to take from each category
questions_per_category = number_questions // len(categories)

rel17_100_questions = []

for category in categories:
    category_questions = [q for q in rel17_questions if q.get("category", "Unknown") == category]
    rel17_100_questions.extend(category_questions[:questions_per_category])
    

In [11]:
len(rel17_100_questions)

100

In [13]:
# Print the total number of selected questions
print(f"\nTotal selected questions: {len(rel17_100_questions)}")
for idx, question in enumerate(rel17_100_questions):
    print(f"{idx + 1}. {question['question']} (Category: {question['category']})")


Total selected questions: 100
1. Which NGAP procedure is used for inter-system load balancing? [3GPP Release 17] (Category: Standards overview)
2. What is covered by enhanced application layer support for V2X services? [3GPP Release 17] (Category: Standards overview)
3. What does the Load-Balancing steering mode do? [3GPP Release 17] (Category: Standards overview)
4. What is the main objective of intent driven management for service or network management? [3GPP Release 17] (Category: Standards overview)
5. What does MINT stand for? [3GPP Release 17] (Category: Standards overview)
6. What is the purpose of the Media Streaming AF Event Exposure work item? [3GPP Release 17] (Category: Standards overview)
7. What is the purpose of load-balancing steering mode enhancements? [3GPP Release 17] (Category: Standards overview)
8. What is a capability added in the V2X Application Enabler (VAE) layer? [3GPP Release 17] (Category: Standards overview)
9. What is the purpose of the Edge Data Network

In [24]:
# # Save the selected questions to a new JSON file
# rel17_100_questions_path = r"../Files/rel17_100_questions.json"
# with open(rel17_100_questions_path, "w", encoding="utf-8") as file:
#     json.dump(rel17_100_questions, file, indent=4, ensure_ascii=False)

# Choose 200 questions

In [24]:
number_questions = 216 # Only 92 questions of Standards overview

In [25]:
# Calculate how many questions to take from each category
questions_per_category = number_questions // len(categories)

rel17_200_questions = []

for category in categories:
    category_questions = [q for q in rel17_questions if q.get("category", "Unknown") == category]
    rel17_200_questions.extend(category_questions[:questions_per_category])
    

In [26]:
len(rel17_200_questions)

200

In [27]:
# # Save the selected questions to a new JSON file
# rel17_200_questions_path = r"../Files/rel17_200_questions.json"
# with open(rel17_200_questions_path, "w", encoding="utf-8") as file:
#     json.dump(rel17_200_questions, file, indent=4, ensure_ascii=False)

# Take only release 18 Questions

In [20]:
# Filter only the questions that contain "[3GPP Release 18]" in the question text and save in a list
rel18_questions = [
    value for key, value in teleQnA_dataset.items() 
    if "[3GPP Release 18]" in value["question"]
]

print(f"Total questions with '[3GPP Release 18]': {len(rel18_questions)}")

Total questions with '[3GPP Release 18]': 780


In [21]:
# Save the filtered questions to a new JSON file
rel18_questions_path = r"../Files/rel18_questions.json"
with open(rel18_questions_path, "w", encoding="utf-8") as file:
    json.dump(rel18_questions, file, indent=4, ensure_ascii=False)

# Separate TeleQnA without rel 17 and 18 Questions

In [25]:
questions_no_rel_17_18 = [value for key, value in teleQnA_dataset.items() if (value not in rel17_questions) and (value not in rel18_questions)]
len(questions_no_rel_17_18)

8487

In [26]:
# Save the filtered questions to a new JSON file
questions_no_rel_17_18_path = r"../Files/questions_no_rel_17_18.json"
with open(questions_no_rel_17_18_path, "w", encoding="utf-8") as file:
    json.dump(questions_no_rel_17_18, file, indent=4, ensure_ascii=False)