In [4]:
"""
This Python script extracts exam data from an API endpoint and saves the information as JSON files. 
The script loops through a range of exam IDs, fetches data including metadata (e.g., exam name, level, subject), 
questions, and choices, and saves the files in the "output" directory. 
The JSON files are named based on the exam metadata for easy identification.
"""

import requests
import json
import os
import re

# Function to sanitize filenames
def sanitize_filename(filename):
    # Replace invalid characters with an underscore
    return re.sub(r'[<>:"/\\|?*]', '_', filename)

# Loop through the range of exam IDs
for exam_id in range(12585, 13500):  # Adjust the range as needed
    # API endpoint URL
    api_url = f"https://www.trueplookpanya.com/webservice/api/examination/formdoexamination?exam_id={exam_id}"

    # Send a GET request to the API endpoint
    response = requests.get(api_url)

    # Check if the request was successful
    if response.status_code == 200:
        # Parse the JSON response
        data = response.json()
        
        # Extract metadata
        metadata = {
            "exam_id": data['data']['exam']['exam_id'],
            "exam_name": data['data']['exam']['exam_name'],
            "level_name": data['data']['exam']['level_name'],
            "subject_name": data['data']['exam']['subject_name'],
            "question_count": data['data']['exam']['question_count']
        }
        
        # Create a filename based on metadata and sanitize it
        filename = f"{metadata['exam_id']}_{metadata['exam_name']}_{metadata['level_name']}_{metadata['subject_name']}.json"
        filename = sanitize_filename(filename)  # Sanitize the filename
        
        # Ensure the output directory exists
        output_dir = "output"
        os.makedirs(output_dir, exist_ok=True)
        
        # Full path for the output file
        file_path = os.path.join(output_dir, filename)
        
        # Extract questions and choices
        questions_list = []
        for i, question_data in enumerate(data['data']['formdo'], 1):
            question_detail = {
                "question_number": i,
                "question_id": question_data['question_id'],
                "question_text": question_data['question_detail'],
                "choices": []
            }
            
            for j, choice in enumerate(question_data['choice'], 1):
                choice_detail = {
                    "choice_number": j,
                    "choice_text": choice['detail'],
                    "is_correct": True if choice['answer'] == "true" else False
                }
                question_detail["choices"].append(choice_detail)
            
            questions_list.append(question_detail)
        
        # Combine metadata and questions into a single dictionary
        output_data = {
            "metadata": metadata,
            "questions": questions_list
        }
        
        # Save the result to a JSON file in the "output" folder
        with open(file_path, "w", encoding="utf-8") as json_file:
            json.dump(output_data, json_file, ensure_ascii=False, indent=4)

        print(f"Data extraction complete for exam_id {exam_id}. The data has been saved to '{file_path}'.")
    else:
        print(f"Failed to retrieve data for exam_id {exam_id}. Status code: {response.status_code}")


Data extraction complete for exam_id 12585. The data has been saved to 'output\12585_สิ่งต่างๆ รอบตัวเด็ก อ.1-อ.3 สีอะไรเอ่ย__ปฐมวัย_สิ่งต่าง ๆ รอบตัวเด็ก.json'.
Data extraction complete for exam_id 12586. The data has been saved to 'output\12586_ภาษาไทย ป.1-ป.3 ข้อใดผิด_ประถมต้น_ภาษาไทย.json'.
Failed to retrieve data for exam_id 12587. Status code: 201
Failed to retrieve data for exam_id 12588. Status code: 201
Data extraction complete for exam_id 12589. The data has been saved to 'output\12589_สิ่งต่างๆ รอบตัวเด็ก  อ.1 คณิตศาสตร์ของเด็กน้อย_อ.1_สิ่งต่าง ๆ รอบตัวเด็ก.json'.
Failed to retrieve data for exam_id 12590. Status code: 201
Failed to retrieve data for exam_id 12591. Status code: 201
Failed to retrieve data for exam_id 12592. Status code: 201
Failed to retrieve data for exam_id 12593. Status code: 201
Failed to retrieve data for exam_id 12594. Status code: 201
Failed to retrieve data for exam_id 12595. Status code: 201
Failed to retrieve data for exam_id 12596. Status code: 20