In [None]:

import os
import json
import time
import requests
from tqdm import tqdm


In [None]:

SECRET_KEY = os.getenv("API_SECRET_KEY", "replace-with-your-api-key")


In [None]:

def fetch_mock_data(count=100):
    sample_data = []
    for idx in range(count):
        sample_data.append({
            "doc_text": f"Sample content {idx} related to robotics and AI systems.",
            "doc_title": f"Document {idx}",
            "doc_platform": "Robotics Tech Framework",
            "doc_links": {
                "reference_url": "https://sampledomain.com/doc"
            },
            "doc_language": "en"
        })
    return sample_data


In [None]:

def execute_process():
    total_iterations = 2
    segment_size = 2

    records = fetch_mock_data(count=total_iterations * segment_size)
    print(f"Starting process with {total_iterations} iterations of size {segment_size}")

    output_data = []
    token_usage = 0

    assistant_prompt = """You are an AI that specializing in generating QA pairs from provided content regarding Robotics. 
    Instructions:
    - Develop queries as a typical user might ask, incorporating informal language if relevant.
    - Ensure responses are comprehensive and precise, referencing the context.
    - Provide step-by-step solutions where applicable.
    - Questions must remain context-specific, balancing general and detailed levels.
    - Create 2 basic-level and 2 advanced-level questions per context.
    - Diversify content and avoid repetitions.
    """

    for idx in tqdm(range(min(total_iterations, len(records) // segment_size)), desc="Processing data"):
        subset = records[idx * segment_size:(idx + 1) * segment_size]
        print(f"Processing segment {idx+1}/{total_iterations}")

        documentation = ""
        for count, record in enumerate(subset):
            documentation += f"Source {count+1}:
"
            documentation += f"Content: {record['doc_text']}
"
            documentation += f"Title: {record.get('doc_title', 'Not Available')}
"
            documentation += f"Platform: {record.get('doc_platform', 'Not Available')}
"
            documentation += f"Reference URL: {record.get('doc_links', {}).get('reference_url', 'Not Available')}
"
            documentation += f"Language: {record.get('doc_language', 'Not Available')}
"
            documentation += "-" * 50 + "\n"

        user_query = f"""Based on these documents, generate 4 Q&A pairs according to the provided guidelines. 
        Use the source content to ensure accuracy and natural flow.
        Document Content:
        {documentation}"""

        request_headers = {
            "Content-Type": "application/json",
            "Authorization": f"Bearer {SECRET_KEY}"
        }

        request_payload = {
            "model": "gpt-4-0613",
            "messages": [
                {"role": "system", "content": assistant_prompt},
                {"role": "user", "content": user_query}
            ],
            "temperature": 1,
            "max_tokens": 2000,
            "top_p": 1,
            "frequency_penalty": 0.5,
            "presence_penalty": 0.5
        }

        try:
            response = requests.post("https://api.openai.com/v1/chat/completions", headers=request_headers, json=request_payload)
            response.raise_for_status()
            api_result = response.json()

            if 'usage' in api_result:
                prompt_tokens = api_result['usage'].get('prompt_tokens', 0)
                completion_tokens = api_result['usage'].get('completion_tokens', 0)
                token_usage += prompt_tokens + completion_tokens
                print(f"Tokens Used - Prompt: {prompt_tokens}, Completion: {completion_tokens}")

            response_content = api_result['choices'][0]['message']['content']
            generated_pairs = json.loads(response_content)['qa_pairs']
            output_data.extend(generated_pairs)

            print(f"Generated {len(generated_pairs)} pairs for segment {idx+1}")

            time.sleep(1)
        except Exception as ex:
            print(f"Error processing segment {idx+1}: {str(ex)}")
            continue

    print(f"Total tokens used: {token_usage}")
    print(json.dumps({"qa_pairs": output_data}, indent=2))

if __name__ == "__main__":
    execute_process()
