In [None]:
!pip install -q \
    transformers==4.31.0 \
    accelerate==0.21.0 \
    bitsandbytes==0.41.0 \
    sentence-transformers==2.2.2 \
    xformers==0.0.20 \

!pip install -q \
    langchain==0.1.0 \
    langchain-community==0.0.12 \
    langchainhub==0.1.14 \
    faiss-gpu \
    faiss-cpu

!pip install -q pandas
# !pip install -q colab-xterm
!pip install -qU langchain-anthropic
!pip install -q python-dotenv

In [None]:
import pandas as pd
from langchain.prompts import (
    PromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
    ChatPromptTemplate,)
from langchain_core.output_parsers import StrOutputParser
from langchain_community.vectorstores import FAISS
from langchain.schema.runnable import RunnablePassthrough


from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain_community.document_loaders.dataframe import DataFrameLoader
from langchain.storage import LocalFileStore
from langchain.embeddings import CacheBackedEmbeddings
from langchain.llms import HuggingFacePipeline
from langchain_anthropic import ChatAnthropic


from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import huggingface_hub as hf_hub
import torch

try:
    from google.colab.userdata import get as getenv
    print("Running in colab")
except ImportError:
    from os import getenv
    import dotenv
    dotenv.load_dotenv()

In [None]:
try:
    from torch import cuda
    device = 'cuda' if cuda.is_available() else 'cpu'
except ImportError:
    device = 'cpu'

print("Device:", device)

In [None]:
HF_TOKEN = getenv('HF_TOKEN')
assert HF_TOKEN, "A valid HuggingFace token is required to be set as <HF_TOKEN>."
hf_hub.login(HF_TOKEN)

In [None]:
ANTHROPIC_API_KEY = getenv('ANTHROPIC_API_KEY')

## Constants

In [None]:
# Dataset files
PLACES_PATH = "data/places.csv"
REVIEWS_PATH = "data/reviews.csv"

## Models
# LLM_MODEL format: <model_type>::<model_name>
# 
# model types: ['hf', 'anthropic']
# 
# Example:
# hf::meta-llama/Llama-2-7b-hf
# anthropic::claude-3-sonnet-20240229
# 
LLM_MODEL = "anthropic::claude-3-sonnet-20240229"
EMBEDDING_MODEL_NAME = "all-MiniLM-L6-v2"

# Embeddings
EMBEDDINGS_CACHE_STORE="./cache/"

# Faiss
FAISS_REVIEWS_PATH = "faiss_index"
FAISS_INDEX_NAME = "index"
FAISS_DISTANCE_STRATEGY='EUCLIDEAN_DISTANCE'

## Load Dataset

Here we are using 2 csv files containing places (restuarants, bars, ...) info and reviews for each of them.

In [None]:
def get_documents(content_func=lambda row:row['review'],
                  source_func=lambda row:row['place_id'],
                  metadata_fields=[]):

  # Load both data files
  places_df = pd.read_csv(PLACES_PATH)
  reviews_df = pd.read_csv(REVIEWS_PATH)

  # merge them on 'place_id'
  merged_df = pd.merge(places_df, reviews_df, on='place_id', how='inner')

  # add page_content and source columns using their corresponing functions
  merged_df['page_content'] = merged_df.apply(content_func, axis=1)
  merged_df['source'] = merged_df.apply(source_func, axis=1)

  # update metadata_fields with 'page_content', 'source'
  metadata_fields = list(set(metadata_fields + ['page_content', 'source']))

  loader = DataFrameLoader(merged_df[metadata_fields],page_content_column='page_content')
  return loader.load()

In [None]:
def content_func(row) -> str:
  content_fields = ['place_name', 'place_types', 'place_address', 'place_average_ratings', 'review']
  return '\n'.join(f"{key}={row[key]}" for key in content_fields)

documents = get_documents(content_func)

In [None]:
documents[0]

## Load Embeddings model

In [None]:
def get_hf_embedding_model(embedding_model_name,
                           cache_embeddings_store,
                           device='cpu',
                           normalize_embeddings=False,
                           ):
  model_kwargs = {'device': device}
  encode_kwargs = {'normalize_embeddings': normalize_embeddings} # Set `True` for cosine similarity
  embedding_model = HuggingFaceEmbeddings(
      model_name=embedding_model_name,
      model_kwargs=model_kwargs,
      encode_kwargs=encode_kwargs
      )
  store = LocalFileStore(cache_embeddings_store)
  embedding_model = CacheBackedEmbeddings.from_bytes_store(
                    embedding_model, store)
  return embedding_model



In [None]:
embedding_model = get_hf_embedding_model(EMBEDDING_MODEL_NAME,
                                         EMBEDDINGS_CACHE_STORE,
                                         device=device,
                                         normalize_embeddings=False)

## Load FAISS (Vector Database)

In [None]:
def get_vector_database(documents, embedding_model):

  vector_database = FAISS.from_documents(
      documents, embedding_model,
      distance_strategy= FAISS_DISTANCE_STRATEGY
      )
  return vector_database

In [None]:
vector_db = get_vector_database(documents, embedding_model)

In [None]:
## if you want to save the db and use the files to load it again later.
vector_db.save_local(folder_path=FAISS_REVIEWS_PATH, index_name=FAISS_INDEX_NAME)
vector_db = FAISS.load_local(folder_path=FAISS_REVIEWS_PATH,
                             embeddings=embedding_model,
                             index_name=FAISS_INDEX_NAME)


In [None]:
docs = vector_db.similarity_search("which one is the best pizza restaurant in the city?", k = 5)

In [None]:
docs[0]

### Load LLM

In [None]:
def get_anthropic_api_llm(model_name):
  llm = ChatAnthropic(model_name=model_name, anthropic_api_key=ANTHROPIC_API_KEY,)

  return llm


In [None]:
def get_hf_llm(model_name):

  bnb_config = BitsAndBytesConfig(
      load_in_4bit=True,
      bnb_4bit_use_double_quant=True,
      bnb_4bit_quant_type="nf4",
      bnb_4bit_compute_dtype=torch.bfloat16,
  )
  model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=bnb_config, )
  tokenizer = AutoTokenizer.from_pretrained(model_name)

  pipe = pipeline(
      model=model,
      tokenizer=tokenizer,
      return_full_text=True,  # langchain expects the full text
      task='text-generation',
      # we pass model parameters here too
      temperature=0.0001,  # 'randomness' of outputs, 0.0 is the min and 1.0 the max
      max_new_tokens=512,  # mex number of tokens to generate in the output
      repetition_penalty=1.1  # without this output begins repeating
  )

  llm = HuggingFacePipeline(pipeline=pipe,)
  return llm


In [None]:
model_type, _, model_name = LLM_MODEL.partition('::')

# If model type is not set, use anthropic
if model_name == "":
    model_type = "anthropic"
    model_name = model_type


if model_type == "anthropic":
    llm = get_anthropic_api_llm(model_name)
else:
    llm = get_hf_llm(model_name)
    
llm

In [None]:
llm.invoke("Hi")

## Create LangChain pipeline

In [None]:
review_template_str = """
Your job is to use Google Map restaurants and bars reviews to help people find best places to go for a meal or a drink.
Use the following information and reviews to answer the questions.
If you don't know an answer based on the context, say you don't know. Answer context:
{context}
"""
## """
# If you don't know an answer based on the context, say you don't know, and
# if the context is not about restaurants, then kindly tell them that  you can
# only provide assistance and answer questions related to restaurants.
##"""

review_system_prompt = SystemMessagePromptTemplate(
    prompt=PromptTemplate(
        input_variables=["context"], template=review_template_str
    )
)

review_human_prompt = HumanMessagePromptTemplate(
    prompt=PromptTemplate(input_variables=["question"], template="{question}")
)
messages = [review_system_prompt, review_human_prompt]

review_prompt_template = ChatPromptTemplate(
    input_variables=["context", "question"], messages=messages
)



reviews_retriever = vector_db.as_retriever(k=10,)

review_chain = (
    {"context": reviews_retriever, "question": RunnablePassthrough()}
    | review_prompt_template
    | llm
    | StrOutputParser()
)

In [None]:
# review_chain = (
#     {"context": reviews_retriever, "question": RunnablePassthrough()}
#     | review_prompt_template
# )

question = """Where can I find delicious pizzas?"""
print(review_chain.invoke(question))

In [None]:
question = """What are the pros and cons of the best pizza restaurant in the city?"""
print(review_chain.invoke(question))

# {"context": reviews_retriever, "question": RunnablePassthrough()}

# review_prompt_template

In [None]:
question = """What are the pros and cons of the best pizza restaurant in the city?"""
review_chain.invoke(question)