In [67]:
!pip install -U -q google-generativeai

In [68]:
import textwrap
import numpy as np
import pandas as pd

import google.generativeai as genai

# Used to securely store your API key
from google.colab import userdata

from IPython.display import Markdown

In [69]:
# Or use `os.getenv('API_KEY')` to fetch an environment variable.
API_KEY=userdata.get('GEMINI_API_KEY')

genai.configure(api_key=API_KEY)

In [84]:
import pandas as pd

# Replace these with your actual CSV links
candidates = ['syaikhu', 'jeje', 'dedi', 'acep']
csv_links = [
    (x, f"https://raw.githubusercontent.com/rendybjunior/pilkada-rag/refs/heads/main/data/{x}.csv")
    for x in candidates
]

# Create an empty list to store the dataframes
dfs = []

# Iterate through the CSV links and load each into a dataframe
for candidate, link in csv_links:
  try:
    df = pd.read_csv(link)
    df['candidate'] = candidate
    dfs.append(df)
  except Exception as e:
    print(f"Error loading CSV from {link}: {e}")

# Concatenate all dataframes into one
combined_df = pd.concat(dfs, ignore_index=True)
combined_df.dropna(inplace=True)

# Print the combined dataframe (optional)
df = combined_df[['title', 'box_text', 'candidate']]

In [85]:
def find_best_passage(query, dataframe):
  """
  Compute the distances between the query and each document in the dataframe
  using the dot product.
  """
  query_embedding = genai.embed_content(model='models/embedding-001',
                                        content=query,
                                        task_type="retrieval_query")
  dot_products = np.dot(np.stack(dataframe['embeddings']), query_embedding["embedding"])
  idx = np.argmax(dot_products)
  return dataframe.iloc[idx]['title'], dataframe.iloc[idx]['box_text'] # Return text from index with max value

In [86]:
# Get the embeddings of each text and add to an embeddings column in the dataframe
def embed_fn(title, text):
  return genai.embed_content(model='models/embedding-001',
                             content=text,
                             task_type="retrieval_document",
                             title=title)["embedding"]

In [87]:
df['embeddings'] = df.apply(lambda row: embed_fn(row['title'], row['box_text']), axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['embeddings'] = df.apply(lambda row: embed_fn(row['title'], row['box_text']), axis=1)


In [88]:
def make_prompt(query, relevant_passage):
  escaped = relevant_passage.replace("'", "").replace('"', "").replace("\n", " ")
  prompt = textwrap.dedent("""You are a helpful and informative bot that answers questions using text from the reference passage included below. \
  Be sure to respond in a complete sentence, being comprehensive, including all relevant background information. \
  However, you are talking to a non-technical audience, so be sure to break down complicated concepts and \
  strike a friendly and converstional tone. \
  If the passage is irrelevant to the answer, you may ignore it.
  QUESTION: '{query}'
  PASSAGE: '{relevant_passage}'

    ANSWER:
  """).format(query=query, relevant_passage=escaped)

  return prompt

In [89]:
def answer_this(query):
  title, box_text = find_best_passage(query, df)
  prompt = make_prompt(query, f'{title} : {box_text}')
  answer = genai.GenerativeModel('gemini-1.5-pro-latest').generate_content(prompt)
  return Markdown(answer.text)

In [90]:
# Catatan: contoh saja, bukan mendukung paslon tertentu
query = "Siapa yang punya visi atau mendukung hal terkait teknologi informasi?"
answer_this(query)

Sayangnya, teks ini tidak menyebutkan siapa saja yang mendukung teknologi informasi secara umum.  Teks ini hanya memberitahu kita bahwa Dedi Mulyadi, calon Gubernur Jabar, mendukung penggunaan teknologi digital, khususnya untuk museum di Jawa Barat.  Beliau ingin museum-museum ini didigitalisasi agar lebih mudah diakses generasi muda dan untuk menyebarkan pengetahuan tentang pengobatan tradisional. 
