In [2]:
import llama_index
from llama_index.vector_stores.elasticsearch import ElasticsearchStore
from llama_index.core import Document, Settings, VectorStoreIndex, QueryBundle, StorageContext, load_index_from_storage

In [2]:
import pandas as pd
data = pd.read_csv("reviews.csv")
df = data.drop(columns=['photo', 'author_name'], axis=1)
df = df.rename(columns={'business_name': 'restaurant_name', 'text': 'review'})
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1100 entries, 0 to 1099
Data columns (total 4 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   restaurant_name  1100 non-null   object
 1   review           1100 non-null   object
 2   rating           1100 non-null   int64 
 3   rating_category  1100 non-null   object
dtypes: int64(1), object(3)
memory usage: 34.5+ KB


In [3]:
records = df.to_dict("records")
documents = [Document(text=records['review'], metadata={'name': records['restaurant_name'], 'rating': records['rating'], 'category':records['rating_category']}) for records in records]

In [None]:
import os
API_KEY = os.environ['GEMINI_API_KEY']

In [3]:
from llama_index.embeddings.gemini import GeminiEmbedding
embed_model = GeminiEmbedding(model_name='models/text-embedding-004', api_key=API_KEY)

In [9]:
es_vector_store = ElasticsearchStore(
    index_name="reviewresto",
    vector_field='review_vector',
    text_field='review',
    es_url='http://localhost:9200/'
)

In [5]:
Settings.embed_model = embed_model

In [None]:
storage_context = StorageContext.from_defaults(vector_store=es_vector_store)
index = VectorStoreIndex.from_documents(documents,storage_context=storage_context)
index.set_index_id("vector_index")
index.storage_context.persist(persist_dir="./storage")

In [6]:
storage_context = StorageContext.from_defaults(persist_dir="./storage")
index = load_index_from_storage(storage_context=storage_context, index_id="vector_index")

In [8]:
from llama_index.llms.gemini import Gemini
llm = Gemini(model='models/gemini-1.5-flash', api_key=API_KEY)
query_engine = index.as_query_engine(llm, similarity_top_k=10)
query = "what is Sardines"
bundle = QueryBundle(query_str=query, embedding=Settings.embed_model.get_query_embedding(query=query))
response = query_engine.query(bundle)
print(response.response)

Based on the provided reviews, sardines are a type of oily fish served in large portions.  Some find them delicious, while others find them too oily.  They are often served with bread.

