In [1]:
%pip install pandas langchain langchain_core langchain_community langchain_google_genai python-dotenv faiss-cpu

Collecting langchain_google_genai
  Downloading langchain_google_genai-1.0.6-py3-none-any.whl.metadata (3.8 kB)
Collecting langchain_core
  Downloading langchain_core-0.1.52-py3-none-any.whl.metadata (5.9 kB)
Collecting langsmith<0.1.0,>=0.0.77 (from langchain)
  Downloading langsmith-0.0.92-py3-none-any.whl.metadata (9.9 kB)
INFO: pip is looking at multiple versions of langchain-core to determine which version is compatible with other requirements. This could take a while.
Collecting langchain_core
  Downloading langchain_core-0.1.51-py3-none-any.whl.metadata (5.9 kB)
  Downloading langchain_core-0.1.50-py3-none-any.whl.metadata (5.9 kB)
  Downloading langchain_core-0.1.49-py3-none-any.whl.metadata (5.9 kB)
  Downloading langchain_core-0.1.48-py3-none-any.whl.metadata (5.9 kB)
  Downloading langchain_core-0.1.47-py3-none-any.whl.metadata (5.9 kB)
  Downloading langchain_core-0.1.46-py3-none-any.whl.metadata (5.9 kB)
  Downloading langchain_core-0.1.45-py3-none-any.whl.metadata (5.9 kB

In [None]:
# for pulling huggingface models
%pip install transformers bitsandbytes torch

In [32]:
import numpy as np
import pandas as pd
from langchain.prompts import (
    PromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
    ChatPromptTemplate,)
from langchain_core.output_parsers import StrOutputParser
from langchain_community.vectorstores import FAISS
from langchain.schema.runnable import RunnablePassthrough
from langchain_community.document_loaders.dataframe import DataFrameLoader
from langchain.storage import LocalFileStore
from langchain.embeddings import CacheBackedEmbeddings
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.llms import HuggingFacePipeline


try:
    from google.colab.userdata import get as getenv
    running_on_colab = True
except ImportError:
    from os import getenv
    import dotenv
    dotenv.load_dotenv(dotenv_path=".env")
    running_on_colab = False

print("Running on colab:", running_on_colab)

Running on colab: False


## Constants

In [45]:
# Dataset files: 
### You can find the data files here: https://drive.google.com/drive/folders/1UFYOc2HsPu5KHfyfwyE-P8p9MIC24ZjC?usp=sharing
PLACES_PATH = "data/places.csv"
REVIEWS_PATH = "data/reviews.csv"

# Embeddings cache store path:
EMBEDDINGS_CACHE_STORE="./cache/"
EMBEDDINGS_BATCH_SIZE=100

# Faiss cache and strategies for both cosine and euclidean distance
FAISS_REVIEWS_PATH_EUCLIDEAN = "faiss_index_euclidean"
FAISS_REVIEWS_PATH_COSINE = "faiss_index_cosine"
FAISS_INDEX_NAME = "index"
FAISS_DISTANCE_STRATEGY_EUCLIDEAN ='EUCLIDEAN_DISTANCE'
FAISS_DISTANCE_STRATEGY_COSINE = "COSINE_DISTANCE"


# Optional: for using LangSmith visualization tool
LANGCHAIN_API_KEY = getenv('LANGCHAIN_API_KEY')
LANGCHAIN_ENDPOINT = getenv('LANGCHAIN_ENDPOINT')

### LLM and Embedding model 
In the code there are two implemented options for each of LLM and Embedding models:  
one options to two pull the models (preferably tiny) using **HuggingFace** and utilize them through LangChain interfaces.
the other option is to use models API such as ollama (served Llama models), Cohere, Anthropic, ...

**Note:** having decided to proceed with the first option (pulling models from hugging-face) assure that you have required space and resources (GPU) available or alternatively execute the code on google-colab.


Format of *LLM_MODEL_NAME* and _EMBEDDING_MODEL_NAME_:  
**<MODEL_PROVIDER>::<MODEL_NAME>**

Available providers:
- **gemini**
- **hf** (huggingface)

*Default model provider: **gemini**

#### LLM model examples:
- gemini::gemini-1.5-flash
- hf::meta-llama/Llama-2-7b-hf

#### Embedding model examples:
- gemini::models/text-embedding-004
- hf::all-MiniLM-L6-v2

In [24]:
LLM_MODEL_NAME = "gemini::gemini-1.5-flash" #"gemini-pro"
EMBEDDING_MODEL_NAME = "gemini::models/text-embedding-004"


# Using gemini models, make sure that you have a Google API key:
GOOGLE_API_KEY = getenv('GOOGLE_API_KEY')

## Load Dataset
The data is collected and by another source code: **places.ipynb** which could be found here:

RestoRecommender:   
https://drive.google.com/drive/folders/1lCt-lWE1CbGOuc0eQYUnWfzsyO_bTgCZ?usp=sharing

And the collected data:
https://drive.google.com/drive/folders/1UFYOc2HsPu5KHfyfwyE-P8p9MIC24ZjC?usp=sharing

#### Example of places.csv:
|place_id|place_name                   |place_types|place_address                                |place_average_ratings|place_ratings_count|place_reviews_count|place_location|place_website                                                                                    |place_phone_number|place_price_level      |place_primary_type|has_delivery|has_dine_in|is_reservable|serves_breakfast|serves_lunch|serves_dinner|serves_beer|serves_wine|places_opening_hours                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
|--------|-----------------------------|-----------|---------------------------------------------|---------------------|-------------------|-------------------|--------------|-------------------------------------------------------------------------------------------------|------------------|-----------------------|------------------|------------|-----------|-------------|----------------|------------|-------------|-----------|-----------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|ChIJORN_mdnbfkcRq7g9fQtODbE|Rasa multi cuisine restaurant|['indian_restaurant', 'pizza_restaurant', 'restaurant', 'food', 'point_of_interest', 'establishment']|Via T. Aspetti, 51, 35132 Padova PD, Italy   |4.9                  |234                |                   |              |                                                                                                 |+39 347 360 4372  |                       |indian_restaurant |True        |True       |True         |                |True        |True         |True       |True       |['Monday: Closed', 'Tuesday: 7:00\u2009–\u200911:00\u202fPM','...']                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           |


#### Example of reviews.csv:
|place_id|review                       |review_rating|review_publish_time                          |review_publish_time_str|
|--------|-----------------------------|-------------|---------------------------------------------|-----------------------|
|ChIJORN_mdnbfkcRq7g9fQtODbE|Really gentle and humble people. On top, food’s quality is great. They are not just serving a regular Indian menu, they have items from different parts of India which makes them a unique place to try.|5            |2024-03-17T14:46:18Z                         |2 months ago           |


In [42]:
def get_documents(content_func=lambda row:row['review'],
                  source_func=lambda row:row['place_id'],
                  metadata_fields=[]):

  # Load both data files
  places_df = pd.read_csv(PLACES_PATH)
  reviews_df = pd.read_csv(REVIEWS_PATH)
  reviews_df.drop_duplicates(inplace=True)
  places_df.drop_duplicates(inplace=True)

  # merge them on 'place_id'
  merged_df = pd.merge(places_df, reviews_df, on='place_id', how='inner')

  # add page_content and source columns using their corresponing functions
  merged_df['page_content'] = merged_df.apply(content_func, axis=1)
  merged_df['source'] = merged_df.apply(source_func, axis=1)

  # update metadata_fields with 'page_content', 'source'
  metadata_fields = list(set(metadata_fields + ['page_content', 'source']))

  loader = DataFrameLoader(merged_df[metadata_fields],page_content_column='page_content')
  return loader.load()

In [43]:
# def content_func(row) -> str:
#   content_fields = ['place_name', 'place_types', 'place_address', 'place_average_ratings', 'review']
#   return '\n'.join(f"{key}={row[key]}" for key in content_fields)

def content_func(row) -> str:
  content_fields = ["place_name",
                    "review",
                    "place_address",
                    "place_average_ratings",
                    "place_price_level",
                    "place_primary_type",
                    "has_delivery",
                    "is_reservable",
                    ]
  return '\n'.join(f"{key}={row[key]}" for key in content_fields)

metadata_fields = ["places_opening_hours", "place_website", "place_phone_number",]

documents = get_documents(content_func, metadata_fields=metadata_fields)

In [44]:
## Take a look at a sample document
print(documents[0].page_content)
print(documents[0].metadata)

place_name=Rasa multi cuisine restaurant
review=Really gentle and humble people. On top, food’s quality is great. They are not just serving a regular Indian menu, they have items from different parts of India which makes them a unique place to try.
place_address=Via T. Aspetti, 51, 35132 Padova PD, Italy
place_average_ratings=4.9
place_price_level=nan
place_primary_type=indian_restaurant
has_delivery=True
is_reservable=True
{'place_phone_number': '+39 347 360 4372', 'source': 'ChIJORN_mdnbfkcRq7g9fQtODbE', 'place_website': nan, 'places_opening_hours': "['Monday: Closed', 'Tuesday: 7:00\\u2009–\\u200911:00\\u202fPM', 'Wednesday: 7:00\\u2009–\\u200911:00\\u202fPM', 'Thursday: 12:00\\u2009–\\u20093:00\\u202fPM, 7:00\\u2009–\\u200911:00\\u202fPM', 'Friday: 12:00\\u2009–\\u20093:00\\u202fPM, 7:00\\u2009–\\u200911:00\\u202fPM', 'Saturday: 12:00\\u2009–\\u20093:00\\u202fPM, 7:00\\u2009–\\u200911:00\\u202fPM', 'Sunday: 12:00\\u2009–\\u20093:00\\u202fPM, 7:00\\u2009–\\u200911:00\\u202fPM']"}


## Load Embeddings model

In [33]:
def get_hf_embedding_model(embedding_model_name,
                           cache_embeddings_store=EMBEDDINGS_CACHE_STORE,
                           device='cpu',
                           normalize_embeddings=False,
                           ):
  model_kwargs = {'device': device}
  encode_kwargs = {'normalize_embeddings': normalize_embeddings} # Set `True` for cosine similarity
  embedding_model = HuggingFaceEmbeddings(
      model_name=embedding_model_name,
      model_kwargs=model_kwargs,
      encode_kwargs=encode_kwargs
      )
  store = LocalFileStore(cache_embeddings_store)
  embedding_model = CacheBackedEmbeddings.from_bytes_store(
                    embedding_model, store)
  return embedding_model

In [25]:
embedding_provider, _sep, embedding_model =  EMBEDDING_MODEL_NAME.partition("::")

if _sep == "": # Default
    embedding_provider = "gemini" 
    embedding_model = EMBEDDING_MODEL_NAME

if embedding_provider == "gemini":
    assert GOOGLE_API_KEY, "Please set GOOGLE_API_KEY"
    embedding_model = GoogleGenerativeAIEmbeddings(model=embedding_model, google_api_key=GOOGLE_API_KEY)
elif embedding_provider == "hf":
    embedding_model = HuggingFaceEmbeddings(model_name=embedding_model)
else:
    raise ValueError(f"Unknown embedding provider: {embedding_provider}")

In [31]:
_embedding_example = embedding_model.embed_query("One sample query!")
array = np.array(_embedding_example)
print(f"embedding shape: {array.shape}\nembedding norm: {np.linalg.norm(array, ord=2)}")

embedding shape: (768,)
embedding norm: 0.9999997572765927


## Create FAISS (Vector Database)

In [36]:
def get_vector_database(documents, embedding_model, distance_strategy):

  vector_database = FAISS.from_documents(
      documents, embedding_model,
      distance_strategy= distance_strategy
      )
  return vector_database

In [61]:
import time
from tqdm import trange
def prepare_vector_db(distance_strategy=FAISS_DISTANCE_STRATEGY_COSINE, sleep_secs=10):
    doclen = len(documents)
    for batch in trange(doclen//EMBEDDINGS_BATCH_SIZE + 1):
        docs = documents[batch*EMBEDDINGS_BATCH_SIZE:(batch+1)*EMBEDDINGS_BATCH_SIZE]
        if batch ==0:
            vector_db = get_vector_database(docs, embedding_model, distance_strategy=distance_strategy)
        else:
            vector_db.merge_from(get_vector_database(docs, embedding_model, distance_strategy=distance_strategy))
        time.sleep(sleep_secs) # Sleep for 10 seconds to avoid hitting rate limits
    return vector_db

In [66]:
distance_strategy = FAISS_DISTANCE_STRATEGY_EUCLIDEAN
vector_db_path = FAISS_REVIEWS_PATH_COSINE if distance_strategy == FAISS_DISTANCE_STRATEGY_COSINE else FAISS_REVIEWS_PATH_EUCLIDEAN

try:
    # Try to load vector database
    vector_db = FAISS.load_local(folder_path=vector_db_path,
                             embeddings=embedding_model,
                             index_name=FAISS_INDEX_NAME)
except:
    print("Could not load vector database, providing new one:")
    # Could not load vector database, providing new one
    vector_db = prepare_vector_db()

    # Save vector database
    vector_db.save_local(folder_path=vector_db_path, index_name=FAISS_INDEX_NAME)

    # Load vector database
    vector_db = FAISS.load_local(folder_path=vector_db_path,
                             embeddings=embedding_model,
                             index_name=FAISS_INDEX_NAME)

print("Vector database loaded from:", vector_db_path )

Vector database loaded from: faiss_index_euclidean


In [67]:
docs = vector_db.similarity_search("Give me information about some of the best pizza restaurant in the city?", k = 5)
for doc in docs:
    print(doc, end="\n\n")

page_content='place_name=Pizza Mia\nreview=very good 😌\nplace_address=Via B. M. Kolbe, 1E, 35020 Ponte San Nicolò PD, Italy\nplace_average_ratings=4.3\nplace_price_level=PRICE_LEVEL_INEXPENSIVE\nplace_primary_type=meal_takeaway\nhas_delivery=True\nis_reservable=True' metadata={'place_phone_number': '+39 049 896 0507', 'source': 'ChIJXeYGFkvbfkcRX7-5-2Ew__o', 'place_website': 'https://m.facebook.com/PizzaMiaRoncaglia/', 'places_opening_hours': "['Monday: 6:00\\u2009–\\u20099:30\\u202fPM', 'Tuesday: Closed', 'Wednesday: 6:00\\u2009–\\u20099:30\\u202fPM', 'Thursday: 6:00\\u2009–\\u20099:30\\u202fPM', 'Friday: 6:00\\u2009–\\u20099:30\\u202fPM', 'Saturday: 6:00\\u2009–\\u20099:30\\u202fPM', 'Sunday: 6:00\\u2009–\\u20099:30\\u202fPM']"}

page_content='place_name=Pizzeria Came\nreview=The pizza is very good and the dough of excellent quality\nplace_address=Via G. Garibaldi, 22, 35010 Cadoneghe PD, Italy\nplace_average_ratings=4.0\nplace_price_level=PRICE_LEVEL_INEXPENSIVE\nplace_primary_type=

In [68]:
question = "where is the Enoteca Barcollo located? and what is its phone number?"

docs = vector_db.similarity_search(question, k = 5)

for i in range(5):
  print(docs[i], end="\n\n")

page_content='place_name=Enoteca Barcollo\nreview=Top\nplace_address=Via Alessandro Guidi, 23, 35142 Padova PD, Italy\nplace_average_ratings=4.3\nplace_price_level=PRICE_LEVEL_MODERATE\nplace_primary_type=bar\nhas_delivery=False\nis_reservable=True' metadata={'place_phone_number': '+39 393 082 6866', 'source': 'ChIJwzs5vKLbfkcRp43hqXskPKw', 'place_website': nan, 'places_opening_hours': "['Monday: Closed', 'Tuesday: Closed', 'Wednesday: 6:00\\u2009–\\u200911:00\\u202fPM', 'Thursday: 6:00\\u2009–\\u200911:00\\u202fPM', 'Friday: 6:00\\u2009–\\u200911:00\\u202fPM', 'Saturday: 6:00\\u2009–\\u200911:00\\u202fPM', 'Sunday: 6:00\\u2009–\\u200911:00\\u202fPM']"}

page_content='place_name=Enoteca Barcollo\nreview=Brave!\nplace_address=Via Alessandro Guidi, 23, 35142 Padova PD, Italy\nplace_average_ratings=4.3\nplace_price_level=PRICE_LEVEL_MODERATE\nplace_primary_type=bar\nhas_delivery=False\nis_reservable=True' metadata={'place_phone_number': '+39 393 082 6866', 'source': 'ChIJwzs5vKLbfkcRp43hq

## Load Vector Database

In [22]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from langchain_community.vectorstores import FAISS
from langchain.storage import LocalFileStore
from langchain.embeddings import CacheBackedEmbeddings
from os import getenv
import dotenv
dotenv.load_dotenv()


# Faiss
FAISS_REVIEWS_PATH_EUCLIDEAN = "faiss_index_euclidean"
FAISS_INDEX_NAME = "index"
FAISS_DISTANCE_STRATEGY='EUCLIDEAN_DISTANCE'
EMBEDDING_MODEL_NAME = "models/text-embedding-004"
EMBEDDINGS_CACHE_STORE="./cache/"

GOOGLE_API_KEY = getenv('GOOGLE_API_KEY')

embedding_model = GoogleGenerativeAIEmbeddings(model=EMBEDDING_MODEL_NAME)
store = LocalFileStore(EMBEDDINGS_CACHE_STORE)
embedding_model = CacheBackedEmbeddings.from_bytes_store(embedding_model, store)

vector_db = FAISS.load_local(folder_path=FAISS_REVIEWS_PATH_EUCLIDEAN,
                             embeddings=embedding_model,
                             index_name=FAISS_INDEX_NAME,
                             allow_dangerous_deserialization=True)

## Load LLM

In [None]:
def get_hf_llm(model_name):
  from transformers import pipeline
  from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
  import torch

  bnb_config = BitsAndBytesConfig(
      load_in_4bit=True,
      bnb_4bit_use_double_quant=True,
      bnb_4bit_quant_type="nf4",
      bnb_4bit_compute_dtype=torch.bfloat16,
  )
  model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=bnb_config, )
  tokenizer = AutoTokenizer.from_pretrained(model_name)

  pipe = pipeline(
      model=model,
      tokenizer=tokenizer,
      return_full_text=True,  # langchain expects the full text
      task='text-generation',
      # we pass model parameters here too
      temperature=0.0001,  # 'randomness' of outputs, 0.0 is the min and 1.0 the max
      max_new_tokens=512,  # mex number of tokens to generate in the output
      repetition_penalty=1.1  # without this output begins repeating
  )

  llm = HuggingFacePipeline(pipeline=pipe,)
  return llm

In [70]:
llm_provider, _sep, llm_model_name = LLM_MODEL_NAME.partition('::')

if _sep == "": # Default
    llm_provider = "gemini" 
    llm_model_name = LLM_MODEL_NAME

if llm_provider == "gemini":
    assert GOOGLE_API_KEY, "Please set GOOGLE_API_KEY"
    llm = ChatGoogleGenerativeAI(model=llm_model_name, google_api_key=GOOGLE_API_KEY)
elif llm_provider == "hf":
    llm = get_hf_llm(model_name=llm_model_name)
else:
    raise ValueError(f"Unknown LLM provider: {llm_provider}")

In [71]:
llm.invoke("Hi")

AIMessage(content='Hi! What can I do for you today? \n')

## Create LangChain pipeline

In [72]:
from langchain.prompts import (
    PromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
    ChatPromptTemplate,)
from langchain_core.output_parsers import StrOutputParser
from langchain_community.vectorstores import FAISS
from langchain.schema.runnable import RunnablePassthrough

### Prompt evolution


#### Prompt 1: 

```
Use
the following context to answer questions.
context:
{context}
```

**Results:**
1. Q: "What are the most affordable but high-quality restaurants in City?"   A: "The context you provided doesn't specify a city, so I can't answer the question about the most affordable high-quality restaurants in that city. 

However, I can tell you that based on the information provided, these restaurants seem to offer good value for money:

* **Ristorante Locanda Munerato:**  This Italian restaurant is rated 4.0 stars and is considered inexpensive. The review says it's the "Best bang for your buck."
* **Circolo RIST. Giusti:** This restaurant has a fixed menu for 15€, which is considered good value for money. It's described as a good option for eating cheaply and well in an Italian atmosphere.
* **Le Midi:** This restaurant offers a complete menu for €12.50, which is also considered good value for money. 
* **Da Pippo:** This Italian restaurant offers a full meal with wine, water, and coffee for 12€.
* **Pizzeria Sotto Casa:** This pizzeria offers a good pizza and two drinks for 12 euros, which is considered reasonable.

**To find the best restaurants in a specific city, I recommend using a search engine or a restaurant review website like Yelp or TripAdvisor.** You can then filter your results by price and rating to find the most affordable options. 
"



2. Q: "What is RAG?"   A: "The provided text doesn't contain any information about RAG.  RAG is a term commonly used in the context of artificial intelligence and machine learning, and it likely refers to **Retrieval-Augmented Generation**. 

**Retrieval-Augmented Generation (RAG)** is a technique that combines information retrieval with language models. It allows AI systems to:

1. **Retrieve relevant information** from a knowledge base or external sources.
2. **Use that information** to generate more accurate, informative, and contextually relevant responses. 

For example, a RAG-powered chatbot could access a database of product information and use it to answer customer questions about specific products. 

If you'd like to learn more about RAG, please let me know! "

#### Prompt 2: 
```
Your job is to use Google Map
reviews to answer questions about their experience at a restaurant. Use
the following context to answer questions. provide a brief answer. If you don't know
an answer based on the context, say user to search online: {context}
....
```
**Results:**
1. Q: "What are the most affordable but high-quality restaurants in City?"   A: "The provided reviews mention several affordable restaurants with high quality, but they don't specify the city. To answer your question, I need to know what city you're interested in.  
"


2. Q: "What is RAG?"   A: "The provided Google Map reviews don't mention anything about "RAG".  You'll need to search online for more information.


#### Prompt 3: 
```
Your job is to use Google Map restaurants and bars reviews to help people find best places to go for a meal or a drink.
Use the following information and reviews to answer the questions. if the context is not about restaurants,
then kindly tell the user that you can only provide assistance and answer questions related to restaurants.
If you don't know an answer based on the context, say you don't know. Answer context:
{context}
```
**Results:**
1. Q: "What are the most affordable but high-quality restaurants in City?"   A: "I can only answer questions related to restaurants. Based on the reviews, here are some affordable and highly rated restaurants in Padova:

* **Ristorante Locanda Munerato:**  This Italian restaurant is known for being "best bang for your buck". 
* **Circolo RIST. Giusti:** This restaurant offers a fixed menu for 15€, which is considered good value for money.
* **Da Pippo:** This Italian restaurant offers a full meal with wine, water, and coffee for 12€.
* **Pizzeria Sotto Casa:** This pizzeria offers a good pizza and two drinks for 12 euros. 
* **Bocca della Verità:** This pizza restaurant has good food and is pretty cheap. 

These restaurants are all rated highly and are considered to be affordable options.  
"


2. Q: "What is RAG?"   A: "I'm sorry, but I don't have any information about "RAG" in the context of the provided restaurant reviews.  My knowledge is limited to the information in the text.






### Final prompt:

In [110]:
# review_template_str = """
# Your job is to use Google Map
# reviews to answer questions about their experience at a restaurant. Use
# the following context to answer questions. Be as detailed as possible, but
# don't make up any information that's not from the context. If you don't know
# an answer based on the context, say you don't know.
# context:
# {context}
# """

# review_template_str = """Your job is to use Google Map restaurants and bars reviews to help people find best places to go for a meal or a drink.
# Use the following information and reviews to answer the questions. if the context is not about restaurants,
# then kindly tell the user that you can only provide assistance and answer questions related to restaurants.
# If you don't know an answer based on the context, say you don't know. Answer context:
# {context}
# """


review_template_str = """Your job is to use Google Map
reviews to answer questions about their experience at a restaurant. Use
the following context to answer questions. provide a brief answer. If you don't know
an answer based on the context, say user to search online: {context}"""


In [107]:
system_prompt = SystemMessagePromptTemplate(
    prompt=PromptTemplate(
        input_variables=["context"], template=review_template_str
    )
)

human_prompt = HumanMessagePromptTemplate(
    prompt=PromptTemplate(input_variables=["question"], template="{question}")
)
messages = [system_prompt, human_prompt]

review_prompt_template = ChatPromptTemplate(
    input_variables=["context", "question"], messages=messages
)

reviews_retriever = vector_db.as_retriever(search_kwargs={'k': 20,})

review_chain = (
    {"context": reviews_retriever, "question": RunnablePassthrough()}
    | review_prompt_template
    | llm
    | StrOutputParser()
)

## Sample usage

In [111]:
question = """Where can I find delicious pizzas?"""
print(review_chain.invoke(question))

Here are some places mentioned in the reviews that serve delicious pizzas:

* **New Pizza:**  One reviewer called it their new favorite pizzeria in the area, praising the deliciousness, strong flavors, and punctuality of deliveries.
* **Officina Della Pizza:**  Reviewers described it as exceptional, with delicious pizzas made with quality ingredients. They also mentioned the courteous and kind staff.
* **Master Pizza:**  This pizzeria was praised for its kind staff, accurate delivery times, and delicious dough that is dry, tasty, and never hard.
* **da Pino Padova:**  This restaurant was highlighted for its exceptional pizza with a perfectly crispy crust and rich, flavorful sauce. The reviewer recommended the pepperoni pizza.
* **Pizzeria Al Quadrifoglio:**  Reviewers raved about the excellent pizza and quick, friendly service. One reviewer even said it sets the standard for all other pizzas they order. 
* **Pizzeria Antica Osteria Dazzo:**  This restaurant was praised for its amazing 

In [113]:
question = """Where can I find delicious pizzas?"""
print(review_chain.invoke(question))

The reviews mention several places with delicious pizzas:

* **New Pizza:**  Reviewers rave about the delicious pizza, strong flavors, and punctuality of deliveries.
* **Officina Della Pizza:**  Reviewers highlight the exceptional pizzas with quality ingredients and the courteous staff.
* **Master Pizza:**  Reviewers praise the delicious dough, kind staff, and hot, steaming pizzas.
* **da Pino Padova:**  Reviewers highlight the perfectly balanced crust, flavorful sauce, and fresh toppings.
* **Pizzeria Al Quadrifoglio:**  Reviewers mention the excellent pizza, quick service, and friendly staff.
* **Pizzeria Antica Osteria Dazzo:**  Reviewers emphasize the amazing pizza with delicious ingredients, friendly service, and a wide selection of flavors.
* **Crazy Pizza:**  Reviewers appreciate the woodburning oven, fresh products, variety of pizzas, and kind staff.
* **Pizzalonga Away Ponte San Nicolò:**  Reviewers highlight the delicious and delicate pizza, fresh ingredients, and efficient s

In [30]:
question = """What are the pros and cons of Napoli Centrale?"""
print(review_chain.invoke(question))

Here are the pros and cons of Ristorante Napoli Centrale based on the Google Map reviews:

**Pros:**

* **Delicious food:** Reviews mention tasty pizza, grilled branzino fish, seafood pasta, and desserts.
* **Good service:**  Reviewers describe the service as nice, great, and excellent. They also mention the staff is friendly and attentive.
* **Cute atmosphere:** One reviewer found the place to be cute. 

**Cons:**

*  The reviews don't mention any specific cons about the restaurant. 



In [116]:
question = """Give the name, address and phone number of some good steak houses for a romantic dinner."""
print(review_chain.invoke(question))

Here are some good steak houses based on the reviews:

* **Vecchio Falconiere:**  
    * Address: Via Umberto I, 31, 35122 Padova PD, Italy
    * Phone: +39 049 656544 
    * Review: "This family run restaurant services food with passion and the owner is so friendly, and cook the steak on table size. There are a big variety of steak, from local to Wangu beef, and different sizes as well."

* **La Fiorentina:** 
    * Address: Via G. Marconi, 123, 35020 Ponte San Nicolò PD, Italy
    * Phone: +39 049 816 4672
    * Review: "I had dinner with a slice of beef cooked to perfection seasoned with parmesan rocket and balsamic vinegar and accompanied by a totally USELESS knife because that cut of meat was so tender."

* **Osteria BocaBona:** 
    * Address: Str. Pelosa, 4, 35136 Padova PD, Italy
    * Phone: +39 049 871 3898
    * Review: "Excellent choice of meats, matured and cooked very well. Excellent beef steak." 

* **Roadhouse Grill:** 
    * Address: Via Pietro Donà, 2, 35100 Padova PD

In [54]:
question = """Give the name, address and phone number of the best steak houses with a 50 euro budget?"""

print(review_chain.invoke(question))

I can't definitively say which restaurant is "best" as that's subjective. However, based on the reviews and price level, here are some options for steak houses in Padova with a 50 euro budget:

* **Roadhouse Grill:**  This steak house is highly recommended by reviewers for its delicious meat.  The price range is a little above average, but worth it for the quality.  
    * **Address:** Via Pietro Donà, 2, 35100 Padova PD, Italy
    * **Phone:** +39 049 780 8353
* **Ristorante Vecchio Falconiere:** This restaurant is known for its large selection of steaks, cooked on a table-sized grill. The owner is friendly and passionate about his food.
    * **Address:** Via Umberto I, 31, 35122 Padova PD, Italy
    * **Phone:** +39 049 656544
* **Osteria BocaBona:** This restaurant offers a wide variety of well-matured and cooked meats, including excellent beef steak. 
    * **Address:** Str. Pelosa, 4, 35136 Padova PD, Italy
    * **Phone:** +39 049 871 3898

Remember, prices can vary depending on

In [112]:
question = """Give the name, address and phone number of the some good sandwich places?"""
print(review_chain.invoke(question))

Here are some good sandwich places based on the Google Maps reviews:

* **Panini e bibite da Sandro e Eugenio**: 
    * Address: Via Cardinale Callegari, 63, 35133 Padova PD, Italy
    * Phone: +39 339 219 7526
* **Paninaro**:
    * Address: Via Messico, 46/53, 35127 Padova PD, Italy
    * Phone: User to search online (phone number not available in the context) 
* **DAL BAFFO**: 
    * Address: Corso Stati Uniti, 18, 35127 Padova PD, Italy
    * Phone: User to search online (phone number not available in the context) 
* **Corner**:
    * Address: Via Cattaro, 20, 35135 Padova PD, Italy
    * Phone: +39 351 669 4354 



In [117]:
question = """What are the most affordable but high-quality restaurants in City?"""
result = review_chain.invoke(question)
print(result)

The provided reviews mention several affordable restaurants with high quality, but they don't specify the city. To answer your question, I need to know what city you're interested in. 



In [119]:
question = """How can I make a roast beef sandwich at home?"""
print(review_chain.invoke(question))

User to search online. 



In [120]:
question = """What is RAG?"""
print(review_chain.invoke(question))

The provided Google Map reviews don't mention anything about "RAG".  You'll need to search online for more information. 



In [122]:
question = """What is Natural Language Processing?"""
print(review_chain.invoke(question))

This question is not answerable from the provided Google Maps reviews. I need more information about Natural Language Processing to answer your question. 



In [121]:
question = """Explain Natural Language Processing."""
print(review_chain.invoke(question))

Natural Language Processing (NLP) is a field of computer science that focuses on enabling computers to understand, interpret, and generate human language. It's like teaching computers to "speak" and "read" like we do. 

Here's a breakdown:

**What NLP Does:**

* **Understanding Language:** NLP systems analyze text and speech to extract meaning, identify patterns, and understand the context. This involves tasks like:
    * **Text Classification:** Categorizing text into different types (e.g., news, reviews, spam)
    * **Sentiment Analysis:** Determining the emotional tone of text (e.g., positive, negative, neutral)
    * **Named Entity Recognition:** Identifying key entities in text (e.g., people, places, organizations)
    * **Part-of-Speech Tagging:** Identifying the grammatical role of words (e.g., noun, verb, adjective)
* **Generating Language:** NLP systems can create natural-sounding text or speech, including:
    * **Machine Translation:** Translating text from one language to a

## Evaluation on Synthetic Questions

In [39]:
from pprint import pprint
import random

In [40]:
def get_question_answer_pairs(documents, generator_llm, num_pairs=30):
  question_answer_pairs = []
  for _ in range(num_pairs):
    document = random.choice(documents)
    page_content = document.page_content
    prompt = f"This is a factual text passage: {page_content}. Write only one question about the restaurant based on the provided text passage. only write the question and nothing else."

    question = generator_llm.invoke(prompt).content
    answer = generator_llm.invoke(f"From the following passage, answer the question: {question}\n{page_content}").content
    question_answer_pairs.append({"question": question, "answer": answer, "document": document})

  return question_answer_pairs

In [41]:
generator_llm = ChatGoogleGenerativeAI(model=LLM_MODEL_NAME)
question_answer_pairs = get_question_answer_pairs(documents, generator_llm, num_pairs=30)
df = pd.DataFrame(question_answer_pairs)

In [42]:
rag_answers = []
for question in df["question"]:
    question = question.split("\n")[0]
    answer = review_chain.invoke(question)
    rag_answers.append(answer)

df["rag_answer"] = rag_answers
df.to_csv('question_answer_pairs.csv', index=False)

In [43]:
df.to_csv('question_answer_pairs.csv', index=False)
df.head()

Unnamed: 0,question,answer,document,rag_answer
0,Is Atelier del pesce Franco e Maria a large re...,"The passage states that the restaurant is ""sma...",page_content='place_name=Atelier del pesce Fra...,"No, Atelier del pesce Franco e Maria is a smal..."
1,Is Fuori Di Zucca a good option for a casual p...,"Based on the provided information, **Fuori Di ...",page_content='place_name=Fuori Di Zucca\nrevie...,"Yes, Fuori Di Zucca seems like a good option f..."
2,Is Osteria Nonna Pina known for its meat dishe...,"Yes, Osteria Nonna Pina is known for its meat ...",page_content='place_name=Osteria Nonna Pina\nr...,"Yes, Osteria Nonna Pina is known for its meat ..."
3,Does Hazzard Pizza e Kebab da Ciccio offer del...,"Yes, Hazzard Pizza e Kebab da Ciccio offers bo...",page_content='place_name=Hazzard Pizza e Kebab...,"Yes, Hazzard Pizza e Kebab da Ciccio offers bo..."
4,Is Brutal a popular restaurant in Padua? \n,"The passage states ""always busy but for a reas...",page_content='place_name=Brutal\nreview=always...,"Yes, Brutal is a popular restaurant in Padua. ..."


In [44]:
df = pd.read_csv('question_answer_pairs.csv')
df.head()

Unnamed: 0,question,answer,document,rag_answer
0,Is Atelier del pesce Franco e Maria a large re...,"The passage states that the restaurant is ""sma...",page_content='place_name=Atelier del pesce Fra...,"No, Atelier del pesce Franco e Maria is a smal..."
1,Is Fuori Di Zucca a good option for a casual p...,"Based on the provided information, **Fuori Di ...",page_content='place_name=Fuori Di Zucca\nrevie...,"Yes, Fuori Di Zucca seems like a good option f..."
2,Is Osteria Nonna Pina known for its meat dishe...,"Yes, Osteria Nonna Pina is known for its meat ...",page_content='place_name=Osteria Nonna Pina\nr...,"Yes, Osteria Nonna Pina is known for its meat ..."
3,Does Hazzard Pizza e Kebab da Ciccio offer del...,"Yes, Hazzard Pizza e Kebab da Ciccio offers bo...",page_content='place_name=Hazzard Pizza e Kebab...,"Yes, Hazzard Pizza e Kebab da Ciccio offers bo..."
4,Is Brutal a popular restaurant in Padua? \n,"The passage states ""always busy but for a reas...",page_content='place_name=Brutal\nreview=always...,"Yes, Brutal is a popular restaurant in Padua. ..."


In [45]:
for i in range(10): 
    print(df["question"].loc[i])

Is Atelier del pesce Franco e Maria a large restaurant? 

Is Fuori Di Zucca a good option for a casual pizza dinner with a beer? 

Is Osteria Nonna Pina known for its meat dishes? 

Does Hazzard Pizza e Kebab da Ciccio offer delivery and reservations? 

Is Brutal a popular restaurant in Padua? 

What is the atmosphere like at Bar Tabacchi Sunny? 

Is Pizzeria alla Palma a good place to get pizza? 

What kind of drinks are available at Busa dei Briganti Pub Padova besides beer? 

Is Il Carro a moderately priced Italian restaurant in Padua, Italy? 

Does Pizzeria Orsetto offer a loyalty program with rewards for frequent customers? 



## Limitatoins of classic LLMs

In [46]:
question = "Does Bar Fortuna Sas in Padova city offer delivery services?"
answer = llm.invoke(question).content
pprint(question)
pprint(answer)

'Does Bar Fortuna Sas in Padova city offer delivery services?'
('I do not have access to real-time information, including business details '
 'like whether a specific restaurant offers delivery services. \n'
 '\n'
 'To find out if Bar Fortuna Sas in Padova offers delivery, I recommend '
 'checking their:\n'
 '\n'
 '* **Website:** Many restaurants have their own website where they list their '
 'services.\n'
 '* **Social media pages:** Check their Facebook or Instagram for updates on '
 'delivery options.\n'
 '* **Online food delivery platforms:** Search for Bar Fortuna Sas on popular '
 'delivery apps like Deliveroo, Uber Eats, or Just Eat.\n'
 '* **Call the restaurant directly:** The most reliable way to find out is to '
 'call them and ask. \n'
 '\n'
 'Good luck finding your next delicious meal! \n')


In [47]:
question = "Does Bar Fortuna Sas in Padova city offer delivery services?"
answer = review_chain.invoke(question)
print(question)
print(answer)

Does Bar Fortuna Sas in Padova city offer delivery services?
No, Bar Fortuna Sas in Padova does not offer delivery services. 



In [48]:
question = "What kind of food is served at Veni Vidi Vino Enoteca?"
answer = llm.invoke(question).content
pprint(question)
pprint(answer)

'What kind of food is served at Veni Vidi Vino Enoteca?'
('I do not have access to real-time information, including restaurant menus. '
 'To find out what kind of food is served at Veni Vidi Vino Enoteca, I '
 'recommend checking their website or calling the restaurant directly. \n')


In [49]:
question = "What kind of food is served at Veni Vidi Vino Enoteca?"
answer = review_chain.invoke(question)
pprint(question)
pprint(answer)

'What kind of food is served at Veni Vidi Vino Enoteca?'
('The reviews mention "tasty food", "meatballs", "platters", "cicchetti", and '
 '"hams and cheeses".  It seems Veni Vidi Vino Enoteca serves Italian-style '
 'food with a focus on wine pairings. \n')
