In [1]:
import pandas as pd
import os 
import google.generativeai as genai
from tqdm.auto import tqdm
from dotenv import load_dotenv
import json

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
df = pd.read_csv('../Data/data_danang_ok.csv')
documents = df.to_dict(orient='records')

In [3]:
entry_template = """
Name: {name}
Type: {type}
Description: {description}
Time: {time}
Price: {price}
Location: {location}
Area: {area}
Note: {note}
""".strip()

prompt_template = """
You emulate a user of our localguide chatbot assistant application.
Formulate 3 questions this user might ask based on a provided record.
Make the questions specific to this record.
The record should contain the answer to the questions, and the questions should
be complete and not too short. Use as few words as possible from the record.
RECORD:
{entry}

Provide the output in parsable JSON without using code blocks:
{{"questions": ["question1", "question2", "question3"]}}
""".strip()

In [4]:
load_dotenv()
api_key = os.getenv("GOOGLE_API_KEY")
genai.configure(api_key=api_key)
modelgpt = genai.GenerativeModel('gemma-3-12b-it')

In [5]:
def llm(prompt):
    response = modelgpt.generate_content(prompt)
    return response.text

In [6]:
def generate_questions(doc):
    entry = entry_template.format(**doc)
    prompt = prompt_template.format(entry=entry)
    response = llm(prompt)
    return response

In [7]:
results = []
for doc in tqdm(documents):
    doc_id = doc['id'] if 'id' in doc else None
    questions_raw = generate_questions(doc)
    try:
        questions = json.loads(questions_raw)
        for q in questions['questions']:
            results.append({'id': doc_id, 'question': q})
    except Exception as e:
        print(f"Lỗi với doc_id {doc_id}: {e}")
        print(questions_raw)

df_results = pd.DataFrame(results)
df_results.to_csv('../Data/ground-truth-retrieval.csv', index=False)
print("Done! File lưu tại ../data/ground-truth-retrieval.csv")

 10%|████▉                                            | 30/299 [00:50<07:34,  1.69s/it]


ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_requests_per_model"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel"
  quota_dimensions {
    key: "model"
    value: "gemma-3-12b"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 30
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 47
}
]

In [None]:
final_results[0]