In [3]:
import chromadb
from langchain.chains import RetrievalQA, ConversationChain, RetrievalQAWithSourcesChain, ConversationalRetrievalChain
from langchain.chains.question_answering import load_qa_chain
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import CSVLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.llms import OpenAI
from langchain.memory import ConversationSummaryMemory, ConversationBufferMemory, CombinedMemory, ChatMessageHistory
from langchain.prompts import PromptTemplate
from langchain.schema import AIMessage, HumanMessage, SystemMessage
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from typing import Any, Dict, Optional, Tuple
from langchain.chains.question_answering import load_qa_chain


OPENAI_API_KEY=''


# Generate Listings

In [10]:
llm = ChatOpenAI(
    openai_api_key=OPENAI_API_KEY, 
    model_name="gpt-3.5-turbo", 
    temperature=0
)
instruction = """
    You are generating real estate listing data.
    An example of a listing:
    
    Neighborhood: Green Oaks
    Price: $800,000
    Bedrooms: 3
    Bathrooms: 2
    House Size: 2,000 sqft

    Description: Welcome to this eco-friendly oasis nestled in the 
    heart of Green Oaks. This charming 3-bedroom, 2-bathroom home 
    boasts energy-efficient features such as solar panels and a 
    well-insulated structure. Natural light floods the living 
    spaces, highlighting the beautiful hardwood floors and 
    eco-conscious finishes. The open-concept kitchen and dining 
    area lead to a spacious backyard with a vegetable garden, 
    perfect for the eco-conscious family. Embrace sustainable 
    living without compromising on style in this Green Oaks gem.

    Neighborhood Description: Green Oaks is a close-knit, 
    environmentally-conscious community with access to organic 
    grocery stores, community gardens, and bike paths. 
    Take a stroll through the nearby Green Oaks Park or grab a cup 
    of coffee at the cozy Green Bean Cafe. With easy access to 
    public transportation and bike lanes, commuting is a breeze.

        
    Task:  
        
    Generate  23 unique real estate listings, produce descriptions 
    of various properties. 
    Write the answer in a csv format, all strings should be double quoted.
    Attributes should also be double quouted in the first row.


"""

resp = llm.invoke(instruction)
with open("data_generated.csv", "w") as f:
    f.write(resp.content)



# Helper functions

In [5]:
class MementoBufferMemory(ConversationBufferMemory):
    def save_context(self, inputs: Dict[str, Any], outputs: Dict[str, str]) -> None:
        input_str, output_str = self._get_input_output(inputs, outputs)
        self.chat_memory.add_ai_message(output_str)


def create_db(file_path, openai_api_key):
    loader = CSVLoader(
        file_path=file_path,
        csv_args={
            "delimiter": ",",
            "quotechar": '"',
            "fieldnames": [
                "Neighborhood", 
                "Price",
                "Bedrooms",
                "Bathrooms",
                "House Size",
                "Description",
                "Neighborhood Description"
            ],
        }
    )
    data = loader.load()
    cts = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
    split_docs = cts.split_documents(data)
    emb = OpenAIEmbeddings(
        openai_api_key=openai_api_key
    )
    db = Chroma.from_documents(split_docs, emb)

    return db


def get_llm():
    model_name = 'gpt-3.5-turbo'
    llm = OpenAI(
        openai_api_key=OPENAI_API_KEY,
        model_name=model_name, 
        temperature=0, 
        max_tokens=2000, 
    )
    return llm


def get_memory(questions, answers):

    history = ChatMessageHistory()
    history.add_user_message(
        f"""
        You are AI that will enrich the desrciptions for home 
        listings for a user based on their answers to personal 
        questions.The augmentation should personalize the listing 
        without changing factual information. 
        Ask user {len(questions)} questions
        """
    )
    for i in range(len(questions)):
        history.add_ai_message(questions[i])
        history.add_user_message(answers[i])
    conversational_memory = MementoBufferMemory(
        chat_memory=history,
        memory_key="questions_and_answers"
    )
    return conversational_memory



def get_history(questions, answers):
    history = ChatMessageHistory()
    history.add_user_message(
        f"""
        You are AI that will enrich the desrciptions for home 
        listings for a user based on their answers to personal 
        questions.The augmentation should personalize the listing 
        without changing factual information. 
        Ask user {len(questions)} questions
        """
    )
    for i in range(len(questions)):
        history.add_ai_message(questions[i])
        history.add_user_message(answers[i])

    history_tuples = []
    for i in range(0, int(len(history.messages)), 2):
        try:
            history_tuples.append((history.messages[i].content, history.messages[i+1].content))
        except IndexError:
            history_tuples.append((history.messages[i].content, ""))
    #print(history_tuples)
    return history_tuples


def sim_search(query, db, llm):
    similar_docs = db.similarity_search(query, k=3)
    print(f"**Similar documents: **\n{similar_docs}")
    prompt = PromptTemplate(
        template="{query}\nContext: {context}",
        input_variables=["query", "context"],
    )
    chain = load_qa_chain(llm, prompt = prompt, chain_type="stuff")
    print(
        f"**Recommendation based on similarity search:** \n"\
        f"{chain.run(input_documents=similar_docs, query = query)}")
    return similar_docs


def augment_search(query, db, llm, history):
    chain = ConversationalRetrievalChain.from_llm(
        llm, 
        db.as_retriever()
    )
    response = chain({"question": query, "chat_history": history})
    print(
        f"**Recommendation based on buyer's preferences: **\n"\
        f"{response['answer']}")



In [11]:
file_path = "./data_generated.csv"
db = create_db(file_path, OPENAI_API_KEY)
llm = get_llm()



# Similarity search

In [12]:
query_for_similarity_search = """
        Based on the listings in the context recommend me a home if 
        I want to live near the mountains and I have unlimited budget.
        """
sim_search(query_for_similarity_search, db, llm)

**Similar documents: **
[Document(page_content='Neighborhood: Mountain Ridge\nPrice: $800,000\nBedrooms: 3\nBathrooms: 2\nHouse Size: 2,000 sqft\nDescription: Discover this mountain retreat in Mountain Ridge, a 3-bedroom, 2-bathroom home with panoramic views. The open-concept living area features hardwood floors and a fireplace. The kitchen boasts quartz countertops and stainless steel appliances. Relax on the deck overlooking the mountains. Mountain Ridge offers a peaceful escape with hiking trails and a community pool.\nNeighborhood Description: Mountain Ridge is a serene neighborhood with hiking trails and a community pool, perfect for outdoor enthusiasts. Residents can enjoy the nearby nature reserve and scenic views of the mountains, as well as the local farmers market and community events.', metadata={'row': 16, 'source': './data_generated.csv'}), Document(page_content='Neighborhood: Mountain View\nPrice: $950,000\nBedrooms: 4\nBathrooms: 3\nHouse Size: 2,800 sqft\nDescription: E

[Document(page_content='Neighborhood: Mountain Ridge\nPrice: $800,000\nBedrooms: 3\nBathrooms: 2\nHouse Size: 2,000 sqft\nDescription: Discover this mountain retreat in Mountain Ridge, a 3-bedroom, 2-bathroom home with panoramic views. The open-concept living area features hardwood floors and a fireplace. The kitchen boasts quartz countertops and stainless steel appliances. Relax on the deck overlooking the mountains. Mountain Ridge offers a peaceful escape with hiking trails and a community pool.\nNeighborhood Description: Mountain Ridge is a serene neighborhood with hiking trails and a community pool, perfect for outdoor enthusiasts. Residents can enjoy the nearby nature reserve and scenic views of the mountains, as well as the local farmers market and community events.', metadata={'row': 16, 'source': './data_generated.csv'}),
 Document(page_content='Neighborhood: Mountain View\nPrice: $950,000\nBedrooms: 4\nBathrooms: 3\nHouse Size: 2,800 sqft\nDescription: Enjoy mountain views fro

# Personalized search

In [13]:
personal_questions = [   
            "How big do you want your house to be?" ,
            "What are 3 most important things for you in choosing this property?", 
            "Which amenities would you like?", 
            "Which transportation options are important to you?",
            "How urban do you want your neighborhood to be?",   
        ]
personal_answers = [
    "A comfortable three-bedroom house with a spacious kitchen and a cozy living room.",
    "A quiet neighborhood, good local schools, and convenient shopping options.",
    "A backyard for gardening, a two-car garage, and a modern, energy-efficient heating system.",
    "Easy access to a reliable bus line, proximity to a major highway, and bike-friendly roads.",
    "A balance between suburban tranquility and access to urban amenities like restaurants and theaters."
]
history = get_history(personal_questions, personal_answers)

query = """
    Choose one home listing recommended for the user out of the given documents. 
    Try to convince the buyer, why this home would be suitable for them, include all of the previous answers in your explanation.
    Do not change any factual information of the properties.
    """
augment_search(query, db, llm, history)  

**Recommendation based on buyer's preferences: **
Based on the user's preference for a family-friendly atmosphere with parks and playgrounds nearby, the first home listing in Whispering Pines would be recommended. This home offers 5 bedrooms, 4 bathrooms, and a backyard oasis with a covered patio and vegetable garden, perfect for a family looking for outdoor space. Additionally, the neighborhood of Whispering Pines is described as family-friendly with parks and playgrounds, aligning with the user's preferences.
