In [1]:
import json
from pathlib import Path

In [2]:
# /home/matt/dere/gensee_odr/outputs/drb_g25_tav.json \
# /home/matt/dere/gensee_odr/outputs/drb_g25_gen.json \
# /home/matt/dere/gensee_odr/outputs/drb_g3_tav.json \
# /home/matt/dere/gensee_odr/outputs/drb_g3_gen.json

OUTPUT_DIR = Path("/home/matt/dere/gensee_odr/outputs")
target_files = [
    OUTPUT_DIR / "drb_g25_tav.json",
    OUTPUT_DIR / "drb_g25_gen.json",
    OUTPUT_DIR / "drb_g3_tav.json",
    OUTPUT_DIR / "drb_g3_gen.json",
]

In [3]:
f0 = json.loads(target_files[0].read_text())

In [4]:
# Score locally generated Deep Research reports against DeepResearch-Bench references.

# Input: a JSON file with either:
#   A) {"prompt1": "your article string", "prompt2": "...", ...}
#   B) [{"prompt": "...", "article": "..."}, ...]
# Optional: query.jsonl to infer per-prompt language (en/zh). If absent, defaults to --language or 'en'.

In [None]:
SCORE_FILE_DIR = OUTPUT_DIR / "scoring_files"
SCORE_FILE_DIR.mkdir(exist_ok=True, parents=True)
scoring_files = [
    tgt_file.parent / "scoring_files" / tgt_file.name for tgt_file in target_files
]

In [9]:
print(f0["69"]["final_report"])

# A Comparative Analysis of Google's A2A and MCP Protocols

This report provides a detailed explanation of Google's Automotive-to-Accessory (A2A) protocol and the Mobile Carrier Plan (MCP) protocol. It analyzes their differences and connections, elaborates on the innovative aspects of the A2A protocol, and details the specific problems it is designed to address, based on available technical documentation.

## 1. Definitions

To establish a clear foundation for comparison, this section defines the two protocols and the key technical terms necessary for their understanding.

### Google's A2A (Automotive-to-Accessory) Protocol

The A2A protocol is an application-layer protocol developed by Google for the Android Automotive OS ecosystem. Its primary function is to facilitate robust, high-throughput, and low-latency communication between an Android Automotive head unit (the in-car infotainment system) and accessory devices, such as a user's mobile phone. It is designed to operate over moder

In [10]:
for tgt_file, score_file in zip(target_files, scoring_files):
    data = json.loads(tgt_file.read_text())
    # entries = []
    entry_dict = {}
    for prompt_id, info in data.items():
        prompt = info["prompt"]
        article = info["final_report"]
        # score_entry = {"prompt": prompt, "article": article}
        # entries.append(score_entry)
        entry_dict[prompt] = article
    # score_file.write_text(json.dumps(entries, indent=2, ensure_ascii=False))
    score_file.write_text(json.dumps(entry_dict, indent=2, ensure_ascii=False))
    print(f"Wrote {len(entry_dict)} entries to {score_file}")

Wrote 3 entries to /home/matt/dere/gensee_odr/outputs/scoring_files/drb_g25_tav.json
Wrote 3 entries to /home/matt/dere/gensee_odr/outputs/scoring_files/drb_g25_gen.json
Wrote 3 entries to /home/matt/dere/gensee_odr/outputs/scoring_files/drb_g3_tav.json
Wrote 3 entries to /home/matt/dere/gensee_odr/outputs/scoring_files/drb_g3_gen.json


In [8]:
for score_file in scoring_files:
    print(score_file)

/home/matt/dere/gensee_odr/outputs/scoring_files/drb_g25_tav.json
/home/matt/dere/gensee_odr/outputs/scoring_files/drb_g25_gen.json
/home/matt/dere/gensee_odr/outputs/scoring_files/drb_g3_tav.json
/home/matt/dere/gensee_odr/outputs/scoring_files/drb_g3_gen.json


# reformat into jsonl

```
{
  "response": "Your model's generated survey text here...",
  "arxiv_id": "2024.12345",  // Optional: will be extracted from filename if not present
  "query": "Original query prompt",  // Optional
  // ... other metadata fields
}
```


In [7]:
report_bench_files = [
    OUTPUT_DIR / "rb_g25_tav.json",
    OUTPUT_DIR / "rb_g25_gen.json",
    OUTPUT_DIR / "rb_g3_tav.json",
    OUTPUT_DIR / "rb_g3_gen.json",
]

In [8]:
dummy = json.loads(report_bench_files[0].read_text())

In [9]:
dummy.keys()

dict_keys(['2011.13534', '2206.05498', '2207.14394'])

In [11]:
dummy["2011.13534"].keys()

dict_keys(['prompt', 'final_report', 'total_cost_usd', 'total_prompt_tokens', 'total_completion_tokens', 'spans', 'raw_events'])

In [13]:
# {
#   "response": "Your model's generated survey text here...",
#   "arxiv_id": "2024.12345",  // Optional: will be extracted from filename if not present
#   "query": "Original query prompt",  // Optional
#   // ... other metadata fields
# }
jsonl_files = []
for rbf in report_bench_files:
    data = json.loads(rbf.read_text())
    jsonl_path = rbf.with_suffix(".jsonl")
    with jsonl_path.open("w") as f:
        for arxiv_id, item in data.items():
            entry = {
                "response": item["final_report"],
                "arxiv_id": arxiv_id,
                "query": item.get("prompt", ""),
            }
            f.write(json.dumps(entry) + "\n")
        print(f"Wrote {jsonl_path}")
    jsonl_files.append(jsonl_path)

Wrote /home/matt/dere/gensee_odr/outputs/rb_g25_tav.jsonl
Wrote /home/matt/dere/gensee_odr/outputs/rb_g25_gen.jsonl
Wrote /home/matt/dere/gensee_odr/outputs/rb_g3_tav.jsonl
Wrote /home/matt/dere/gensee_odr/outputs/rb_g3_gen.jsonl
