In [3]:
import numpy as np
import pandas as pd

df = pd.read_csv('10 Hotels.csv')

In [4]:
from langchain_community.document_loaders import DataFrameLoader

loader = DataFrameLoader(df, page_content_column="Overall_Review")
data = loader.load()

In [11]:
import os
os.environ['OPENAI_API_KEY'] = '' 
openai.api_key = os.getenv('OPENAI_API_KEY')

In [13]:
from langchain.embeddings.openai import OpenAIEmbeddings

embedding = OpenAIEmbeddings()

In [14]:
from langchain.vectorstores import Chroma

vectordb = Chroma.from_documents(
    documents=data,
    embedding=embedding,
)

In [15]:
def get_model_response(messages,
                       model = 'gpt-3.5-turbo',
                       temperature = 0,
                       max_tokens = 1000):
    response = openai.chat.completions.create(
        model=model,
        messages=messages,
        temperature=temperature,
        max_tokens=max_tokens,
    )

    return response.choices[0].message.content

Function to break long queries down into separate portions.

In [16]:
def editPrompt(userQuery):

  delimiter = '####'
  system_message = "You're a helpful assistant. Your task is to break down user queries about hotels."
  user_message = f'''
  Below is a user query delimited with {delimiter}.

  Then, break down the query into multiple queries if the query is asking about multiple matters.
  There is no need to break down queries that are asking about one topic and one hotel only, or if it does not make sense to begin with.

  Remove the names of hotels that are not in {hotels_list_str}.

  Output is a Python list of strings containing the breakdown of the original query.
  If no changes were made, just output the original query as the 0th index.
  An example is:
  ["<query 1>", "<query 2>", ...]

  User Query:
  {delimiter}
  {userQuery}
  {delimiter}
  '''

  messages =  [
          {'role':'system',
          'content': system_message},
          {'role':'user',
          'content': f"{user_message}"},
  ]

  topics_class_response = get_model_response(messages,
                    model = 'gpt-3.5-turbo',
                    temperature = 0,
                    max_tokens = 1000)

  return topics_class_response

Function to convert string output to a Python list.

In [17]:
import json

def convertJSONToList(input_string):

  # Convert the string to a Python list
  return json.loads(input_string)

Function to extract the relevant hotel to the query.

In [18]:
hotels_list_str = df['Hotel_Name'].unique().tolist()

In [19]:
def getHotel(userQuery):

  delimiter = '####'
  system_message = "You're a helpful assistant. Your task is to analyse user queries."
  user_message = f'''
  Below is a user query delimited with {delimiter}.
  Please, identify the hotel mentioned in this comment from the list of hotels below.

  Return just the name of the hotel in question in string format.

  If the user query does not mention any hotels, return None.

  List of hotels:
  {hotels_list_str}

  User Query:
  {delimiter}
  {userQuery}
  {delimiter}
  '''


  messages =  [
          {'role':'system',
          'content': system_message},
          {'role':'user',
          'content': f"{user_message}"},
  ]

  topics_class_response = get_model_response(messages,
                    model = 'gpt-3.5-turbo',
                    temperature = 0,
                    max_tokens = 1000)

  return topics_class_response

Answer template of LLM.

In [20]:
from langchain.prompts import PromptTemplate

template = """
Use the following pieces of context to answer the question at the end.
If the relevant information is not available in the data, consider other interpretations of the prompt due to semantics.
If the relevant information is still not available in the data, just say that you don't know, don't try
to make up an answer.
If the data has mixed opinions, tell us about the mixed opinions.

Keep the answer as concise as possible. Use 1 sentence to sum all points up.
______________
{context}
Question: {question}
Helpful Answer:"""

QA_CHAIN_PROMPT = PromptTemplate.from_template(template)

In [21]:
from langchain.chains import RetrievalQA

from langchain.chat_models import ChatOpenAI
llm = ChatOpenAI(model_name='gpt-4', temperature=0.1)

  warn_deprecated(


Function to answer a query.

In [22]:
def answerQuery(curQuery, curHotel):

  qa_chain = RetrievalQA.from_chain_type(
      llm,
      retriever = vectordb.as_retriever(),
      metadata = {'Hotel_Name': curHotel},
      return_source_documents = True,
      chain_type_kwargs = {"prompt": QA_CHAIN_PROMPT}
  )

  response = qa_chain({"query": curQuery})

  return response

Overall function that takes in any query and processes it before returning the result.

In [23]:
def getAnswer(userQuery):
  queryList = convertJSONToList(editPrompt(userQuery))

  finalAnswer = ''
  responseList = []

  for curQuery in queryList:
    curHotel = getHotel(curQuery)

    curResponse = answerQuery(curQuery, curHotel)

    finalAnswer += curResponse['result'] + " "
    responseList.append(curResponse)

  return [finalAnswer, queryList, responseList]

Sample response.

In [28]:
sampleResponse = getAnswer('How are the staff, and are the toilets good in Hotel Arena and The Park Grand London Paddington')
sampleResponse

['The staff at Hotel Arena are reported to be very helpful and friendly. The toilets in Hotel Arena have received mixed reviews, with some guests complaining about the bathrooms being like a cubicle with transparent glass, causing privacy issues, and others mentioning issues with the shower causing water to flow over to the elevated floor. The staff at The Park Grand London Paddington are considered good, lovely, polite, helpful, and efficient. The toilets in The Park Grand London Paddington have received mixed reviews, with some guests finding them to be of a good standard and spacious, while others complained about unpleasant smells, wet floors, and poor cleanliness. ',
 ['How are the staff in Hotel Arena',
  'How are the toilets in Hotel Arena',
  'How are the staff in The Park Grand London Paddington',
  'How are the toilets in The Park Grand London Paddington'],
 [{'query': 'How are the staff in Hotel Arena',
   'result': 'The staff at Hotel Arena are reported to be very helpful a