In [1]:
# 1  Install & import libraries
!pip install -q google-genai tqdm

from google import genai
from google.genai import types
from pathlib import Path
import os, json, time, re, shutil, tqdm
from typing import List, Dict

In [13]:
# 2  Authenticate Gemini & mount Drive


# === Provide your API key (or export it beforehand) ===
os.environ['GEMINI_API_KEY'] = ''  
client = genai.Client()
print('Gemini client initialised')

# Mount Drive (optional – comment out if running locally)
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

✅ Gemini client initialised
Mounted at /content/drive


In [3]:
# ## 3  Copy evidence files into the working directory

drive_root = Path('/content/drive/MyDrive/KGP_evidence')
work_dir   = Path('/content/llm_answer_generation')
work_dir.mkdir(parents=True, exist_ok=True)

for filename in ['hotpotqa_evidence_qwen.json', '2wiki_evidence_qwen.json']:
    shutil.copy2(drive_root/filename, work_dir/filename)
print('📄 Evidence files ready')

📄 Evidence files ready


In [4]:
# ## 4  Utility helpers


def load_json(path: Path) -> List[Dict]:
    with open(path, 'r') as f:
        return json.load(f)


def save_json(obj, path: Path):
    path.parent.mkdir(parents=True, exist_ok=True)
    with open(path, 'w') as f:
        json.dump(obj, f, indent=4, ensure_ascii=False)


def _clean_answer(text: str) -> str:
    """Collapse whitespace & strip any <think>…</think> blocks if they slipped through."""
    text = re.sub(r"<think[^>]*?>.*?</think>", " ", text, flags=re.S)
    return re.sub(r"\s+", " ", text).strip()


def split_thoughts_and_answer(parts) -> (str, str):
    """Separate thought summaries from the final answer."""
    thoughts, answer = [], []
    for p in parts:
        if not getattr(p, 'text', None):
            continue
        if getattr(p, 'thought', False):
            thoughts.append(p.text)
        else:
            answer.append(p.text)
    return "\n".join(thoughts).strip(), " ".join(answer).strip()

In [11]:
# ## 5  Generation pipeline (thinking ON, thoughts stripped from `response`)

def generate_answers(
    evidence_file: Path,
    save_file: Path,
    *,
    max_output_tokens: int = 128,
    temperature: float = 0.6,
    top_p: float = 0.95,
    thinking_budget: int = -1,   # dynamic
    pause_secs: float = 5.0,
):
    data = load_json(evidence_file)
    responses = []

    prompt_template = (
        "Given the question and its associated contexts below, please generate a concise, "
        "precise answer in English. The answer must strictly adhere to the following guidelines:\n"
        "- The answer should be directly relevant to the question.\n"
        "- Provide the answer in a clear, straightforward format.\n"
        "- Limit your answer to no more than 10 words, focusing on the essential information requested.\n"
        "- If the provided contexts do not contain enough information but you know the answer, still provide it; "
        "otherwise respond with \"Information not available\".\n"
        "- Do not include any additional tokens, explanations, or information beyond the direct answer.\n\n"
        "QUESTION: {question}\n"
        "CONTEXT:\n{context}\n"
        "ANSWER:"
    )

    none_template = (
        "Given the following question, create a final answer in English to the question.\n"
        "QUESTION: {question}\n"
        "ANSWER:"
    )

    for rec in tqdm.tqdm(data, total=len(data)):
        ctx_list = rec['evidence']
        if ctx_list:
            ctx_block = "\n".join(f"{i+1}: {c}" for i, c in enumerate(ctx_list))
            user_prompt = prompt_template.format(question=rec['question'], context=ctx_block)
        else:
            user_prompt = none_template.format(question=rec['question'])

        response = client.models.generate_content(
            model='gemini-2.5-flash',
            contents=user_prompt,
            config=types.GenerateContentConfig(
                temperature=temperature,
                top_p=top_p,
                max_output_tokens=max_output_tokens,
                thinking_config=types.ThinkingConfig(
                    thinking_budget=thinking_budget,
                    include_thoughts=True
                ),
            ),
        )

        try:
            cand_container = response.candidates
            cand = cand_container[0] if isinstance(cand_container, (list, tuple)) else cand_container
            parts = cand.content.parts
            thought_text, answer_text = split_thoughts_and_answer(parts)
        except (TypeError, IndexError, AttributeError):
            thought_text = ""
            answer_text  = getattr(response, "text", "Information not available")

        short_answer = _clean_answer(answer_text)

        responses.append({
            'type':     rec['type'],
            'question': rec['question'],
            'gt':       rec['answer'],
            'thoughts': thought_text,
            'answer':   answer_text,
            'response': short_answer,
        })

        save_json(responses, save_file)  # checkpoint
        time.sleep(pause_secs)           # respect 10 RPM

    print(f"Finished – saved to {save_file}")
    return responses


In [6]:
# 6  Run generation on HotpotQA & 2Wiki

# %%
hotpot_out = drive_root / 'hotpotqa_answers_gemini.json'
_ = generate_answers(
        work_dir / 'hotpotqa_evidence_qwen.json',
        hotpot_out,
        max_output_tokens=1024,
        temperature=0.6,
        top_p=0.95,
        thinking_budget=-1,  # dynamic
        pause_secs=5.0,
)


100%|██████████| 100/100 [13:42<00:00,  8.22s/it]

Finished – saved to /content/drive/MyDrive/KGP_evidence/hotpotqa_answers_gemini.json



