In [1]:
import pandas as pd
import os 
import google.generativeai as genai
from tqdm.auto import tqdm
from dotenv import load_dotenv
import json

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
df = pd.read_csv('../Data/data_danang_ok.csv')
documents = df.to_dict(orient='records')

In [3]:
entry_template = """
Name: {name}
Type: {type}
Description: {description}
Time: {time}
Price: {price}
Location: {location}
Area: {area}
Note: {note}
""".strip()

prompt_template = """
You emulate a user of our localguide chatbot assistant application.
Formulate 1 questions this user might ask based on a provided record.
Make the questions specific to this record.
The record should contain the answer to the questions, and the questions should
be complete and not too short. Use as few words as possible from the record.
RECORD:
{entry}

Provide the output in parsable JSON without using code blocks:
{{"questions": ["question1", "question2", "question3"]}}
""".strip()

In [8]:
load_dotenv()
api_key = os.getenv("GOOGLE_API_KEY")
genai.configure(api_key=api_key)
modelgpt = genai.GenerativeModel('gemma-3-4b-it')

In [5]:
def llm(prompt):
    response = modelgpt.generate_content(prompt)
    return response.text

In [6]:
def generate_questions(doc):
    entry = entry_template.format(**doc)
    prompt = prompt_template.format(entry=entry)
    response = llm(prompt)
    return response

In [9]:
results = []
for doc in tqdm(documents):
    doc_id = doc['id'] if 'id' in doc else None
    questions_raw = generate_questions(doc)
    try:
        questions = json.loads(questions_raw)
        for q in questions['questions']:
            results.append({'id': doc_id, 'question': q})
    except Exception as e:
        print(f"Lỗi với doc_id {doc_id}: {e}")
        print(questions_raw)

df_results = pd.DataFrame(results)
df_results.to_csv('../Data/ground-truth-retrieval.csv', index=False)
print("Done! File lưu tại ../data/ground-truth-retrieval.csv")

  0%|▏                                                 | 1/299 [00:01<09:15,  1.86s/it]

Lỗi với doc_id eat_000: Expecting value: line 1 column 1 (char 0)
```json
{
  "questions": [
    "What kind of dish is Bun Cha Ca Ba Lu?",
    "How much does a bowl of Bun Cha Ca Ba Lu cost?",
    "When is Bun Cha Ca Ba Lu typically served?"
  ]
}
```


  1%|▎                                                 | 2/299 [00:03<07:52,  1.59s/it]

Lỗi với doc_id eat_001: Expecting value: line 1 column 1 (char 0)
```json
{
  "questions": [
    "What kind of noodles does My Quang Ba Mua serve?",
    "How much does a serving at 95 Nguyen Tri Phuong cost?",
    "When is My Quang Ba Mua typically open – is it an afternoon eatery?"
  ]
}
```


  1%|▌                                                 | 3/299 [00:04<07:25,  1.50s/it]

Lỗi với doc_id eat_002: Expecting value: line 1 column 1 (char 0)
```json
{
  "questions": [
    "What kind of food is Banh Xeo Ba Duong?",
    "How much does a Banh Xeo cost at this location?",
    "Where is Banh Xeo Ba Duong located specifically?"
  ]
}
```


  1%|▋                                                 | 4/299 [00:06<07:23,  1.50s/it]

Lỗi với doc_id eat_003: Expecting value: line 1 column 1 (char 0)
```json
{
  "questions": [
    "What kind of dish does Oc Hut Cay Duong serve?",
    "Approximately how much does a serving at this eatery cost?",
    "Where is Oc Hut Cay Duong located specifically?"
  ]
}
```


  2%|▊                                                 | 5/299 [00:07<07:34,  1.54s/it]

Lỗi với doc_id eat_004: Expecting value: line 1 column 1 (char 0)
```json
{
  "questions": [
    "What kind of dish is Banh Canh Ruong?",
    "When is Banh Canh Ruong typically served?",
    "How much does a bowl of Banh Canh Ruong cost?"
  ]
}
```


  2%|█                                                 | 6/299 [00:09<07:38,  1.56s/it]

Lỗi với doc_id eat_005: Expecting value: line 1 column 1 (char 0)
```json
{
  "questions": [
    "What kind of food does Nem Lui Ba Sen serve?",
    "How much does a serving at 183 Tran Phu cost?",
    "When is Nem Lui Ba Sen typically open – is it evening?"
  ]
}
```


  2%|█▏                                                | 7/299 [00:10<07:40,  1.58s/it]

Lỗi với doc_id eat_006: Expecting value: line 1 column 1 (char 0)
```json
{
  "questions": [
    "What kind of dish is Bun Mam Nem Ba Thuyen?",
    "When is Bun Mam Nem Ba Thuyen typically served?",
    "How much does a serving of Bun Mam Nem Ba Thuyen cost?"
  ]
}
```


  3%|█▎                                                | 8/299 [00:12<07:41,  1.59s/it]

Lỗi với doc_id eat_007: Expecting value: line 1 column 1 (char 0)
```json
{
  "questions": [
    "What kind of dish does Com Ga A Hai offer?",
    "How much does a serving at Com Ga A Hai cost?",
    "Where is Com Ga A Hai located specifically?"
  ]
}
```


  3%|█▎                                                | 8/299 [00:13<08:14,  1.70s/it]


KeyboardInterrupt: 

In [10]:
final_results[0]

NameError: name 'final_results' is not defined