In [None]:
# from google.colab import auth
# auth.authenticate_user()

In [None]:
from src.evaluator.evaluator import (
    run_subjective_evaluation,
    run_free_form_evaluation,
    run_rubric_evaluation,
)
from src.data import get_evaluation_dataset

In [None]:
import json
from pathlib import Path
from datetime import datetime

# 評価対象データを取得する。
input_data = get_evaluation_dataset()

# 評価モデルを選択する。
evaluation_model_name = "claude-sonnet-4-5"

# タイムスタンプディレクトリを作成する。
timestamp = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
output_dir = Path("src/data/evaluation_result") / timestamp
output_dir.mkdir(parents=True, exist_ok=True)
print(f"出力ディレクトリ: {output_dir}")

In [None]:
# 1. Subjective Evaluation (主観評価)
results = []
for i, data in enumerate(input_data, start=1):
    print(f"\n{'='*20} Data {i} {'='*20}\n")
    
    # 会話履歴の構築
    conversation = ""
    for p in data['prompts']:
        conversation += f"{p['role']}: {p['content']}\n"
    conversation += f"assistant: {data['llm_response_text']}"
    
    result = run_subjective_evaluation(conversation=conversation, model_name=evaluation_model_name)
    if result:
        print(json.dumps(result, ensure_ascii=False, indent=2))
        results.append(result)
        print("\n")

# JSON ファイルに保存する。
if results:
    output_path = output_dir / "subjective.json"
    with open(output_path, 'w', encoding='utf-8') as f:
        json.dump(results, f, ensure_ascii=False, indent=2)
    print(f"保存しました: {output_path}")


In [None]:
# 2. Free Form Evaluation (自由記述評価)
results = []
for i, data in enumerate(input_data, start=1):
    print(f"\n{'='*20} Data {i} {'='*20}\n")
    
    # 会話履歴の構築
    conversation = ""
    for p in data['prompts']:
        conversation += f"{p['role']}: {p['content']}\n"
    conversation += f"assistant: {data['llm_response_text']}"
    
    rubrics = data['rubrics']
    
    result = run_free_form_evaluation(conversation=conversation, rubrics=rubrics, model_name=evaluation_model_name)
    if result:
        print(json.dumps(result, ensure_ascii=False, indent=2))
        results.append(result)
        print("\n")

# JSON ファイルに保存する。
if results:
    output_path = output_dir / "free_form.json"
    with open(output_path, 'w', encoding='utf-8') as f:
        json.dump(results, f, ensure_ascii=False, indent=2)
    print(f"保存しました: {output_path}")


In [None]:
# 3. Rubric Evaluation (ルーブリック評価)
results = []
for i, data in enumerate(input_data, start=1):
    print(f"\n{'='*20} Data {i} {'='*20}\n")
    
    result = run_rubric_evaluation(data=data, model_name=evaluation_model_name)
    if result:
        result_dict = result.model_dump()
        print(json.dumps(result_dict, ensure_ascii=False, indent=2))
        results.append(result_dict)
        print("\n")

# JSON ファイルに保存する。
if results:
    output_path = output_dir / "rubric.json"
    with open(output_path, 'w', encoding='utf-8') as f:
        json.dump(results, f, ensure_ascii=False, indent=2)
    print(f"保存しました: {output_path}")
