In [None]:
import json
import random
import re
import time
import os
from typing import List, Dict, Any, Optional, Annotated, Callable, Tuple
from difflib import SequenceMatcher # Option 1 for basic similarity
import heapq # For finding top N similar items efficiently
import numpy as np # For calculating mean similarity
from rouge_score import rouge_scorer
from openai import OpenAI, APIError
from function_schema import get_function_schema

In [None]:
# --- Friendli AI Client Setup ---
token = os.getenv("FRIENDLI_TOKEN")
if not token:
    print("Error: FRIENDLI_TOKEN environment variable not set.")
    print("Please set the environment variable or replace '<YOUR_FRIENDLI_TOKEN>' in the code.")
    token = "<YOUR_FRIENDLI_TOKEN>" # Placeholder

if token == "<YOUR_FRIENDLI_TOKEN>":
     print("Warning: Using placeholder Friendli token. LLM calls will likely fail.")

client = OpenAI(
    base_url = "https://api.friendli.ai/serverless/v1",
    api_key = token
)
LLM_MODEL_NAME = "deepseek-r1" # Specify the model

In [None]:
def get_weather(
    city: Annotated[str, "The city to get the weather for"],
    unit: Annotated[Optional[str], "The unit to return the temperature in"] = "celcius",
) -> str:
    """Returns the weather for the given city."""
    return f"Weather for {city} is 20°C"

def get_news(
    topic: Annotated[str, "The topic to get news for"],
    source: Annotated[Optional[str], "The source to get news from"] = None,
) -> str:
    """Returns the news for the given topic."""
    return f"News for {topic} from {source if source else 'all sources'}"

def get_current_location() -> str:
    """Returns the current location of the user."""
    return "Current location is Seoul, South Korea"

tools = [
    get_weather,
    get_news,
    get_current_location,
]

tool_schemas = [get_function_schema(tool) for tool in tools]
tool_schemas_json = json.dumps(tool_schemas, indent=2)


In [None]:
# --- Configuration ---
QUERIES_FILENAME = "diverse_queries_with_scores_v4.json" # New filename for this version
NUM_GENERATION_TURNS = 3
QUERIES_TO_GENERATE_PER_TURN = 10
REQUEST_BATCH_SIZE_PER_TURN = 15
MAX_ATTEMPTS_PER_TURN = 5
SIMILARITY_THRESHOLD = 0.8
TOP_N_SIMILAR = 10 # Alpaca stores top 10


In [None]:

# --- Helper Functions ---

def load_queries_with_scores(filename: str) -> List[Dict[str, Any]]:
    """Loads previously generated query objects from a JSON file."""
    if os.path.exists(filename):
        try:
            with open(filename, 'r', encoding='utf-8') as f:
                data = json.load(f)
                if isinstance(data, list) and all(isinstance(item, dict) and 'q' in item for item in data):
                    print(f"Loaded {len(data)} query objects from {filename}")
                    return data
                else:
                    print(f"Warning: Invalid format found in {filename}. Starting fresh.")
                    return []
        except (json.JSONDecodeError, IOError) as e:
            print(f"Error loading {filename}: {e}. Starting fresh.")
            return []
    else:
        print(f"No existing query file found ({filename}). Starting fresh.")
        return []

def save_queries_with_scores(query_objects: List[Dict[str, Any]], filename: str):
    """Saves the list of query objects to a JSON file."""
    try:
        with open(filename, 'w', encoding='utf-8') as f:
            # Use indent=4 like Alpaca's output
            json.dump(query_objects, f, indent=4, ensure_ascii=False)
        print(f"Saved {len(query_objects)} query objects to {filename}")
    except IOError as e:
        print(f"Error saving queries to {filename}: {e}")

def is_valid_query_line(q_text: str) -> bool:
    """Checks if a single line looks like a valid user query."""
    q_text = q_text.strip()
    if not q_text: return False
    # More robust filtering based on common LLM reasoning/meta-commentary patterns
    if q_text.startswith(("- ", "* ", "Okay,", "First,", "Next,", "Now,", "Let me", "Wait,", "Also,", "###", "//", "```", "Queries", "That's", "This should", "Avoid", "Check for", "Example", "User Query:", "Generated Query:")): return False
    if q_text.endswith(":") or "→" in q_text: return False
    if re.match(r"^\d+\.", q_text): return False
    if len(q_text.split()) <= 1 and not re.search(r'[a-zA-Z]', q_text): return False
    if "/" in q_text and "." in q_text and " " not in q_text: return False
    # Filter lines that are likely descriptions of tools or parameters
    if any(tool_name in q_text.lower() for tool_name in ["get_weather", "get_news", "get_current_location"]):
        if "parameter" in q_text.lower() or "require" in q_text.lower() or "tool" in q_text.lower():
            return False
    return True

def remove_think_blocks(text: str) -> str:
    """Removes <think>...</think> blocks from the text."""
    return re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL | re.IGNORECASE)


In [None]:

# --- Step 1: Generate Diverse User Queries ('q') ---

def generate_candidate_qs_with_llm(
    tool_schemas_str: str,
    num_to_generate: int,
    existing_qs_list: List[str],
) -> List[str]:
    """Generates candidate 'q' strings using the LLM, removing think blocks."""
    
    system_prompt = f"""Your ONLY task is to generate realistic, diverse user queries or requests ('q') suitable for an AI assistant with access to specific tools. These queries should be answerable using the provided tools. Vary the complexity, phrasing (questions, commands), and the tools potentially required.

**CRITICAL INSTRUCTIONS:**
1.  Output ONLY the raw user queries.
2.  Each query MUST be on a new line.
3.  **ABSOLUTELY DO NOT** include:
    * Explanations, comments, or justifications.
    * Thinking processes, reasoning steps (including anything like `<think>...</think>`).
    * Numbered lists, bullet points, or any formatting other than one query per line.
    * Any text before the first query or after the last query.
"""

    examples_prompt = ""
    if existing_qs_list:
        sample_existing = random.sample(existing_qs_list, min(len(existing_qs_list), 5))
        examples_prompt = "Critically, avoid generating queries too similar to these examples:\n- " + "\n- ".join(sample_existing) + "\n\n"

    user_prompt = f"""Based on the following available tools: {tool_schemas_str}
Generate exactly {num_to_generate} diverse user queries ('q'). Remember to vary the required tools, complexity, and phrasing. {examples_prompt}Output ONLY the queries, one per line:"""

    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt}
    ]
    try:
        print(f"--- Calling LLM ({LLM_MODEL_NAME}) to generate ~{num_to_generate} queries ---")
        completion = client.chat.completions.create(
            model=LLM_MODEL_NAME, messages=messages, temperature=0.8, max_tokens=4096,
        )
        raw_llm_output = completion.choices[0].message.content
        print("--- LLM Response Received ---")
    except APIError as e:
        print(f"LLM API Error: {e}"); return []
    except Exception as e:
        print(f"An unexpected error occurred during LLM call: {e}"); return []
    cleaned_output = remove_think_blocks(raw_llm_output)
    candidate_qs = []
    raw_lines = cleaned_output.split('\n')
    print(f"--- Lines after removing <think> blocks: {len(raw_lines)} ---") # Debugging
    for line in raw_lines:
        clean_line = line.strip()
        if is_valid_query_line(clean_line):
            candidate_qs.append(clean_line)
        elif clean_line:
             print(f"Filtered out invalid line: '{clean_line}'")

    print(f"--- Parsed {len(candidate_qs)} potentially valid candidate queries ---")
    return candidate_qs





In [None]:
candidate_qs_strings = generate_candidate_qs_with_llm(
    tool_schemas_json,
    num_to_generate=5,
    existing_qs_list=[],
)

print(candidate_qs_strings) # Debugging



In [None]:
# 라이브러리 설치: pip install sentence-transformers
from sentence_transformers import SentenceTransformer, util
import torch # sentence-transformers가 내부적으로 사용

# 한국어 사전 훈련 모델 로드 (다양한 모델 사용 가능)
# 예시: 'jhgan/ko-sbert-sts', 'snunlp/KR-SBERT-V40K-klueNLI-augSTS' 등
# 모델 목록: https://huggingface.co/models?language=ko&library=sentence-transformers
print("Loading Korean Sentence Transformer model...")
# model = SentenceTransformer('jhgan/ko-sbert-sts')
model = SentenceTransformer('snunlp/KR-SBERT-V40K-klueNLI-augSTS')
print("Model loaded.")

# 비교할 텍스트
references_ko = [
    "The cat was found under the bed",
    "The cat was under the bed"
]
candidate_ko = "The cat likes to eat Churu"

# 문장 임베딩 계산
# 참고: GPU 사용 가능 시 자동으로 활용하여 속도 향상
print("Encoding sentences...")
ref_embeddings = model.encode(references_ko, convert_to_tensor=True)
cand_embedding = model.encode(candidate_ko, convert_to_tensor=True)
print("Encoding complete.")

# 코사인 유사도 계산
# 후보 문장과 각 참조 문장 간의 유사도 계산
cosine_scores = util.pytorch_cos_sim(cand_embedding, ref_embeddings)

# 결과 출력
print("\n--- Sentence Embedding Cosine Similarity ---")
for i, score in enumerate(cosine_scores[0]): # cand_embedding은 하나이므로 [0] 인덱스 사용
    print(f"Candidate vs Reference {i+1}: {score.item():.4f}")

# 여러 참조 문장 중 가장 높은 유사도 선택
max_similarity = torch.max(cosine_scores[0]).item()
print(f"\nMaximum Similarity Score: {max_similarity:.4f}")

In [None]:
# -*- coding: utf-8 -*-
import torch
from sentence_transformers import SentenceTransformer, util
from typing import List, Dict, Any
import numpy as np
import heapq # top_n_similar 계산을 위해 유지

# --- 모델 로딩 (애플리케이션 시작 시 한 번 수행하는 것이 이상적) ---
# 한국어 사전 훈련 모델 로드
# 사용 가능한 모델 예시: 'jhgan/ko-sbert-sts', 'snunlp/KR-SBERT-V40K-klueNLI-augSTS' 등
print("Loading Korean Sentence Transformer model...")
# model_name = 'jhgan/ko-sbert-sts'
model_name = 'snunlp/KR-SBERT-V40K-klueNLI-augSTS'
try:
    # GPU 사용 가능 시 자동으로 활용
    model = SentenceTransformer(model_name)
    print(f"Model '{model_name}' loaded successfully.")
    # 모델을 GPU로 이동 (사용 가능한 경우)
    if torch.cuda.is_available():
        model = model.to(torch.device("cuda"))
        print("Model moved to GPU.")
    else:
        print("GPU not available, using CPU.")
except Exception as e:
    print(f"Error loading Sentence Transformer model: {e}")
    model = None # 모델 로딩 실패 시 None으로 설정

# --- 개선된 필터링 함수 ---
def filter_and_score_qs_sentence_transformer(
    candidate_qs: List[str],
    existing_query_objects: List[Dict[str, Any]],
    model: SentenceTransformer, # 로드된 모델을 인자로 받음
    similarity_threshold: float = 0.8,
    top_n_similar: int = 10
) -> List[Dict[str, Any]]:
    """
    Filters candidate questions based on maximum semantic similarity using Sentence Transformers
    and scores the accepted ones.

    Args:
        candidate_qs: List of new candidate question strings.
        existing_query_objects: List of dictionaries, each representing an existing query
                                (must contain at least a 'q' key with the query string).
        model: The pre-loaded Sentence Transformer model.
        similarity_threshold: The maximum similarity score allowed for a candidate to be accepted.
        top_n_similar: The number of most similar existing questions to record for accepted candidates.

    Returns:
        List of dictionaries, each representing an accepted new query with similarity scores.
    """
    if not model:
        print("Error: Sentence Transformer model is not loaded. Cannot perform filtering.")
        return []

    if not candidate_qs:
        print("No candidate questions provided.")
        return []

    newly_accepted_query_objects = []
    existing_qs_list = [obj['q'] for obj in existing_query_objects]

    # --- 임베딩 계산 ---
    # 기존 질문 임베딩 (비어있지 않은 경우에만 계산)
    existing_embeddings = None
    if existing_qs_list:
        print(f"Encoding {len(existing_qs_list)} existing queries...")
        # 배치 처리 및 GPU 활용 (모델이 GPU에 있다면)
        existing_embeddings = model.encode(existing_qs_list, convert_to_tensor=True, show_progress_bar=True)
        print("Existing queries encoded.")

    # 후보 질문 임베딩
    print(f"Encoding {len(candidate_qs)} candidate queries...")
    candidate_embeddings = model.encode(candidate_qs, convert_to_tensor=True, show_progress_bar=True)
    print("Candidate queries encoded.")

    # 비교 대상 질문 목록 및 임베딩 (반복문 내에서 업데이트됨)
    # 초기에는 기존 질문들로 설정
    all_qs_strings_for_comparison = list(existing_qs_list)
    # .clone()을 사용하여 원본 existing_embeddings가 변경되지 않도록 함
    all_embeddings_for_comparison = existing_embeddings.clone() if existing_embeddings is not None else None

    print(f"\n--- Filtering {len(candidate_qs)} candidates for diversity against {len(all_qs_strings_for_comparison)} existing/accepted queries ---")

    # 후보 질문들을 순회하며 필터링
    for i, q_new in enumerate(candidate_qs):
        q_new_lower = q_new.lower()
        cand_embedding = candidate_embeddings[i] # 해당 후보의 미리 계산된 임베딩 사용

        # 1. 정확히 동일한 질문인지 확인 (소문자 기준)
        is_exact_duplicate = any(q_new_lower == q_old.lower() for q_old in all_qs_strings_for_comparison)
        if is_exact_duplicate:
            # print(f"Skipping exact duplicate: \"{q_new}\"") # 로그 출력 필요 시 주석 해제
            continue

        # 2. 기존/수락된 질문들과의 유사도 계산
        max_similarity = 0.0
        avg_similarity = 0.0
        similarities_list = [] # (score, query) 튜플 저장 리스트

        # 비교 대상 임베딩이 있는 경우에만 유사도 계산
        if all_embeddings_for_comparison is not None and all_embeddings_for_comparison.shape[0] > 0:
            # 코사인 유사도 계산 (후보 1개 vs 모든 비교 대상)
            # cand_embedding 차원을 [1, embedding_dim]으로 맞춰줌
            cosine_scores = util.pytorch_cos_sim(cand_embedding.unsqueeze(0), all_embeddings_for_comparison)[0] # 결과는 tensor([score1, score2, ...])

            # CPU로 이동 후 numpy 배열로 변환하여 사용 (GPU 메모리 절약 및 호환성)
            cosine_scores_cpu = cosine_scores.cpu().numpy()

            if cosine_scores_cpu.size > 0: # 유사도 점수가 계산된 경우
                max_similarity = np.max(cosine_scores_cpu)
                avg_similarity = np.mean(cosine_scores_cpu)
                # 유사도 점수와 해당 질문 텍스트를 묶어서 리스트 생성
                similarities_list = list(zip(cosine_scores_cpu, all_qs_strings_for_comparison))
            else: # 비교 대상은 있으나 어떤 이유로든 점수 계산이 안된 경우 (이론상 발생하기 어려움)
                max_similarity = 0.0
                avg_similarity = 0.0
                similarities_list = []

        # 3. Alpaca 스타일 필터링: 최대 유사도가 임계값보다 높으면 건너뛰기
        if max_similarity > similarity_threshold:
            print(f"Skipping (MaxSim {max_similarity:.3f} > {similarity_threshold}): \"{q_new}\"")
            continue

        # 4. 필터를 통과한 경우: 결과 저장 및 다음 비교를 위해 추가
        print(f"Accepting (MaxSim {max_similarity:.3f} <= {similarity_threshold}): \"{q_new}\"")

        # 가장 유사한 N개 찾기 (heapq 사용)
        most_similar_dict = {}
        if similarities_list:
            # 실제 top_n 개수는 유사도 리스트 크기와 top_n_similar 중 작은 값
            actual_top_n = min(top_n_similar, len(similarities_list))
            # 점수가 높은 순서대로 정렬 (튜플의 첫 번째 요소인 점수 기준)
            top_n = heapq.nlargest(actual_top_n, similarities_list, key=lambda item: item[0])
            # 딕셔너리로 변환 (소수점 4자리까지 반올림, 0.01 이하 점수는 제외)
            most_similar_dict = {q: round(float(score), 4) for score, q in top_n if float(score) > 0.01}


        # 새로운 질문 객체 생성
        new_obj = {
            "q": q_new,
            "max_similarity_score_against_all": round(float(max_similarity), 4), # float로 변환
            "avg_similarity_score": round(float(avg_similarity), 4), # float로 변환
            "most_similar_instructions": most_similar_dict # 기존 키 이름 유지
        }
        newly_accepted_query_objects.append(new_obj)

        # 다음 후보 비교를 위해 현재 수락된 질문과 임베딩을 비교 대상 목록에 추가
        all_qs_strings_for_comparison.append(q_new)
        # 임베딩 추가: all_embeddings_for_comparison이 None이면 새로 생성, 아니면 이어붙임
        cand_embedding_expanded = cand_embedding.unsqueeze(0) # 차원 맞추기 [1, embedding_dim]
        if all_embeddings_for_comparison is None:
            all_embeddings_for_comparison = cand_embedding_expanded
        else:
            all_embeddings_for_comparison = torch.cat((all_embeddings_for_comparison, cand_embedding_expanded), dim=0)

    print(f"\n--- Accepted {len(newly_accepted_query_objects)} new diverse query objects this round ---")
    return newly_accepted_query_objects

# --- 예시 사용법 ---
if __name__ == "__main__":
    if model: # 모델이 성공적으로 로드되었을 때만 실행
        # 기존에 존재하는 질문들 (예시)
        existing_queries = [
            {"q": "오늘 날씨 어때?", "other_data": 1},
            {"q": "서울 맛집 추천해줘", "other_data": 2},
            {"q": "파이썬으로 웹사이트 만드는 법 알려줘", "other_data": 3}
        ]

        # 새로 들어온 후보 질문들 (예시)
        candidate_queries = [
            "오늘 서울 날씨 알려줄래?", # 기존 질문과 유사
            "제주도 가볼만한 곳",
            "파이썬 웹 개발 방법", # 기존 질문과 유사
            "점심 메뉴 추천",
            "오늘 날씨 어때?" # 기존 질문과 정확히 일치
        ]

        # 필터링 및 스코어링 실행
        accepted_queries = filter_and_score_qs_sentence_transformer(
            candidate_qs=candidate_queries,
            existing_query_objects=existing_queries,
            model=model,
            similarity_threshold=0.8, # 유사도 임계값 (조정 가능)
            top_n_similar=3
        )

        print("\n--- Accepted Queries ---")
        for i, query_obj in enumerate(accepted_queries):
            print(f"{i+1}. Query: \"{query_obj['q']}\"")
            print(f"   Max Similarity: {query_obj['max_similarity_score_against_all']:.4f}")
            print(f"   Avg Similarity: {query_obj['avg_similarity_score']:.4f}")
            print(f"   Most Similar ({len(query_obj['most_similar_instructions'])}): {query_obj['most_similar_instructions']}")
            print("-" * 20)
    else:
        print("Skipping example usage because the model could not be loaded.")



  from .autonotebook import tqdm as notebook_tqdm


In [None]:


# --- Main Execution Logic ---
if __name__ == "__main__":

    if not model:
        print("Sentence Transformer model is not loaded. Exiting.")
        exit() # 모델 없이는 실행 불가

    # 이전에 승인된 쿼리 객체 로드
    accepted_query_objects = load_queries_with_scores(QUERIES_FILENAME)
    initial_query_count = len(accepted_query_objects)
    # 전체 목표 계산: 초기 개수 + (턴 수 * 턴당 목표 개수)
    overall_target = initial_query_count + (NUM_GENERATION_TURNS * QUERIES_TO_GENERATE_PER_TURN)

    print(f"Starting Generation Process.")
    print(f"Initial query objects loaded: {initial_query_count}")
    print(f"Targeting {QUERIES_TO_GENERATE_PER_TURN} new queries per turn for {NUM_GENERATION_TURNS} turns.")
    print(f"Overall target: {overall_target} query objects.")
    print(f"Using Similarity Threshold (Sentence Transformer): {SIMILARITY_THRESHOLD}")
    print("-" * 30)

    total_added_this_session = 0

    # 지정된 턴 수만큼 반복
    for turn in range(1, NUM_GENERATION_TURNS + 1):
        print(f"\n=== Turn {turn}/{NUM_GENERATION_TURNS} ===")
        target_for_this_turn = QUERIES_TO_GENERATE_PER_TURN # 이번 턴에서 추가할 목표 개수
        added_in_this_turn = 0 # 이번 턴에서 실제로 추가된 개수
        attempts_this_turn = 0 # 이번 턴에서의 시도 횟수

        # LLM 프롬프트에 사용할 현재 쿼리 문자열 목록 (매 턴 시작 시 업데이트)
        # 주의: accepted_query_objects가 매우 커지면 이 목록 생성에 시간이 걸릴 수 있음
        current_qs_list_for_prompting = [obj['q'] for obj in accepted_query_objects]

        # 이번 턴의 목표를 달성하거나 최대 시도 횟수에 도달할 때까지 반복
        while added_in_this_turn < target_for_this_turn and attempts_this_turn < MAX_ATTEMPTS_PER_TURN:
            attempts_this_turn += 1
            print(f"\n--- Turn {turn} | Attempt {attempts_this_turn}/{MAX_ATTEMPTS_PER_TURN} ---")
            print(f"Current total query objects: {len(accepted_query_objects)}")
            print(f"Goal for this turn: {added_in_this_turn}/{target_for_this_turn} new queries")

            # 이번 시도에서 필요한 쿼리 개수 계산
            num_needed_for_turn = target_for_this_turn - added_in_this_turn
            # 필요한 개수보다 약간 더 많이 생성 요청 (필터링으로 일부 탈락될 것을 대비)
            num_to_generate_this_attempt = min(REQUEST_BATCH_SIZE_PER_TURN, num_needed_for_turn + 5)

            # LLM을 사용하여 후보 쿼리 문자열 생성
            candidate_qs_strings = generate_candidate_qs_with_llm(
                tool_schemas_json,
                num_to_generate=num_to_generate_this_attempt,
                # LLM에는 현재까지 승인된 쿼리 문자열 목록만 전달
                existing_qs_list=current_qs_list_for_prompting,
            )

            # LLM이 유효한 후보를 반환하지 않은 경우
            if not candidate_qs_strings:
                print("LLM did not return any valid candidate queries or an error occurred. Retrying after delay...")
                time.sleep(5) # 잠시 대기 후 재시도
                continue

            # *** 변경된 부분: Sentence Transformer 기반 필터링 함수 호출 ***
            # 후보 쿼리 필터링 및 점수 계산
            # 필터링 함수에는 전체 쿼리 객체 목록과 로드된 모델 전달
            new_query_objects = filter_and_score_qs_sentence_transformer(
                candidate_qs=candidate_qs_strings,
                existing_query_objects=accepted_query_objects, # 비교 대상은 현재까지 승인된 모든 객체
                model=model, # 로드된 Sentence Transformer 모델 전달
                similarity_threshold=SIMILARITY_THRESHOLD,
                top_n_similar=TOP_N_SIMILAR
            )
            # **********************************************************

            # 새로 승인된 쿼리 객체 추가
            added_now = 0
            for obj in new_query_objects:
                # 이번 턴의 목표 개수를 초과하지 않도록 확인
                if added_in_this_turn < target_for_this_turn:
                    accepted_query_objects.append(obj)
                    # 중요: 다음 LLM 호출 및 다음 필터링 시 사용될 목록에도 즉시 반영
                    # 이렇게 하면 동일 배치 내에서도 중복/유사성 검사가 더 정확해짐
                    current_qs_list_for_prompting.append(obj['q'])
                    added_in_this_turn += 1
                    added_now += 1
                else:
                    break # 이번 턴 목표 달성 시 중단

            print(f"Accepted {added_now} new diverse query objects in this attempt.")

            # 두 번째 시도부터는 진행 상황이 없으면 경고 출력
            if added_now == 0 and attempts_this_turn > 1:
                print("Warning: No new diverse queries accepted in this attempt.")

            # 이번 턴의 목표 달성 여부 확인
            if added_in_this_turn >= target_for_this_turn:
                print(f"--- Turn {turn} goal reached ({added_in_this_turn} new queries added). ---")
                break # 목표 달성 시 이번 턴의 시도 루프 종료

            time.sleep(1) # 시도 사이에 약간의 지연 시간

        # 각 턴 종료 시 (목표 달성 또는 최대 시도 도달 시) 업데이트된 목록 저장
        total_added_this_session += added_in_this_turn
        save_queries_with_scores(accepted_query_objects, QUERIES_FILENAME)
        print(f"--- End of Turn {turn}. Total query objects now: {len(accepted_query_objects)}. Added this turn: {added_in_this_turn}. ---")


    # --- 최종 결과 출력 ---
    print("-" * 30)
    print(f"Generation process completed after {NUM_GENERATION_TURNS} turns.")
    print(f"Total query objects generated or loaded: {len(accepted_query_objects)}")
    print(f"Total new query objects added in this session: {total_added_this_session}")
    print(f"Final results saved to {QUERIES_FILENAME}")

    print("\nFinal list of diverse query objects (showing last added marked with '*'):")
    # 이번 세션에서 추가된 쿼리 식별 시작 인덱스
    start_index = max(0, len(accepted_query_objects) - total_added_this_session)
    for i, obj in enumerate(accepted_query_objects):
         marker = "*" if i >= start_index else " " # 이번 세션 추가분 표시
         # 결과 객체의 키 이름 확인 (filter_and_score_qs_sentence_transformer 반환값 기준)
         similar_dict = obj.get('most_similar_instructions', {}) # 가장 유사한 지시사항 딕셔너리
         # 유사도 높은 항목들을 간결하게 표시 (쿼리 앞부분 + 점수)
         similar_items = [f"'{q[:30]}...':{s:.2f}" for q, s in similar_dict.items()]
         similar_str = ", ".join(similar_items) if similar_items else "{}" # 비어있으면 {} 표시

         # 최대 및 평균 유사도 점수 가져오기
         max_sim_score = obj.get('max_similarity_score_against_all', 0)
         avg_sim_score = obj.get('avg_similarity_score', 0)

         # 최종 출력 형식
         print(f"{marker} {i+1}. q: \"{obj['q']}\" (MaxS: {max_sim_score:.3f}, AvgS: {avg_sim_score:.3f}, TopSim: {similar_str})")


    # --- Placeholder for Step 2 (Generating Full Blueprints) ---
    print("\n--- Placeholder for Step 2: Generating Full Blueprints ---")
    # 이제 accepted_query_objects 리스트를 순회하며 각 'q' 필드를 사용하여
    # 전체 블루프린트를 생성하는 로직을 구현할 수 있습니다.
    # 예시:
    # final_blueprints = []
    # for query_obj in accepted_query_objects:
    #     q_final = query_obj['q']
    #     # blueprint_dict = generate_full_blueprint_for_q(q_final, tool_schemas_json, client)
    #     # if blueprint_dict: final_blueprints.append(blueprint_dict)
    # print(f"\n--- Generated {len(final_blueprints)} full Blueprints ---")

