This is a starter notebook for the project, you'll have to import the libraries you'll need, you can find a list of the ones available in this workspace in the requirements.txt file in this workspace. 

In [None]:
!pip install pandas langchain_chroma langchain_openai langchain_core langchain_community

In [1]:
import os
import pandas as pd
import json
from uuid import uuid4
from langchain.llms import OpenAI
from langchain.document_loaders import CSVLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma
from langchain_core.prompts import PromptTemplate, ChatPromptTemplate, MessagesPlaceholder
import tiktoken
from langchain_openai import ChatOpenAI
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage

In [None]:
os.environ["OPENAI_API_KEY"] = "<API_KEY>"
#os.environ["OPENAI_API_BASE"] = <BASE_API_KEY>"  #needed for Udacity project

In [3]:
def generate_listing(n_listing):
    #generate the list of home properties
    model_name = "gpt-3.5-turbo-instruct"

    model = OpenAI(model_name=model_name,
                            temperature=0, 
                            max_tokens=2000)

    template_str = """
    Create a listing of {n_listing} home properties in Texas, US. 

    Create approximately equal split of the no. of bedrooms with the minimum no. of bedrooms as 1 and max no. of bedrooms as 5

    The listing should be in the json format with following fields:

    Neighborhood, Price, No. of bedrooms, No. of bathrooms, House size, House Description, Neighbourhood Description.

    """
    
    prompt_template = PromptTemplate.from_template(template_str)
    prompt = prompt_template.format_prompt(n_listing=n_listing)

    response = model.invoke(prompt)
    
    data = json.loads(response)

    df = pd.DataFrame(data)
    df.to_csv("property_listings.csv",index=False)
    

In [4]:
def get_vector_store():
    #Embedding function
    embedding_fn = OpenAIEmbeddings(model="text-embedding-3-small")
    
    # initialie Chroma DB
    vector_store = Chroma(collection_name="temp",
                          embedding_function=embedding_fn,
                          persist_directory="./chroma_db")
    
    return vector_store

In [5]:
def store_data_in_vector_store(vector_store):
    #load the documents using langchain CSV loader
    docs = CSVLoader("property_listings.csv").load()

    #split the documents
    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
    docs_split = text_splitter.split_documents(docs)

    #add documents to chroma db with embedding
    uuids = [str(uuid4()) for _ in range(len(docs_split))]
    vector_store.add_documents(documents=docs_split, ids=uuids)


In [6]:
def get_relevant_document(vector_store, query):
    #set the db as retriever and make a query on the same for similarity
    retriever = vector_store.as_retriever()
    docs = retriever.invoke(query)
    
    return docs

In [7]:
def get_user_preferences(interactive=False):
    '''
    if interactive = True, then it will allow users to enter values
    otherwise it will take the hardocded values
    '''
    questions = [   
                "How big do you want your house to be?",
                "What are 3 most important things for you in choosing this property?", 
                "Which amenities would you like?", 
                "Which transportation options are important to you?",
                "How urban do you want your neighborhood to be?",   
            ]
    qa_list = []
    if interactive == True:
        for question in questions:
            print(question)
            answer = input()
            qa_list.append((question, answer))
    else:
        answers = ["A comfortable three-bedroom house with a spacious kitchen and a cozy living room.",
             "A quiet neighborhood, good local schools, and convenient shopping options.",
             "A backyard for gardening, a two-car garage, and a modern, energy-efficient heating system.",
             "Easy access to a reliable bus line, proximity to a major highway, and bike-friendly roads.",
             "A balance between suburban tranquility and access to urban amenities like restaurants and theaters."]
        for i in range(5):
            qa_list.append((questions[i], answers[i]))
        
    return qa_list

In [8]:
#prepare user_preference summary based on the questions/answers

# used online documentation at https://python.langchain.com/docs/how_to/chatbots_memory/ for this piece of code

def create_user_preference_summary(user_preferences):
    messages = []

    for q,a in user_preferences:
        messages.append(AIMessage(content=q)),
        messages.append(HumanMessage(content=q))

    model = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)

    prompt = ChatPromptTemplate.from_messages(
        [
            SystemMessage(
                content="""You are a helpful assistant. 

                Below is the conversation between AI and user where AI is 
                trying to understand the prefrerences of the user for recommending a house. In this conversation, AI
                has asked some questions to the human regarding user's preference and the human has provided answers 
                for the same.

                Summarize the preferences of the human while purchasing a property based on this
                conversation between AI and user. Ensure that all points are covered in the summary. 

                Summarize each set of message separately and then use these summaries to generate overall summary.

                For example:
                AIMessage: How big do you want your house to be?
                HumanMessage: A comfortable three-bedroom house with a spacious kitchen and a cozy living room.

                The above can be summarized to the user wants a comfortable three-bedroom house with a 
                spacious kitchen and a cozy living room.

                In the response, only provide overall summary and call it "User Preferences:"

                """
            ),
            MessagesPlaceholder(variable_name="messages"),
        ]
    )

    chain = prompt | model

    ai_msg = chain.invoke(
        {
            "messages": messages,
        }
    )
    print(ai_msg.content)
    
    return ai_msg.content

In [9]:
def create_custom_prompt(user_preference_summary, top_docs, max_token_count):
    '''
    this function will create the custom prompt for the question
    '''
    #create the tokenizer
    tokenizer = tiktoken.get_encoding('cl100k_base')
    
    prompt_template = """
                        Answer the question based on the context below, and if the question
                        can't be answered based on the context, say "I don't know".
                        
                        Recommend one home onlt and personalize the response to the user's preference as 
                        much as possible.
                        
                        In the response highlight the salient features of the recommended home that are in line 
                        with user's preference

                        Context:

                        {}

                        ---

                        Question: Recommend homes based on the context.
                        Answer:

                       """
    #calculate the current token count
    current_token_count = len(tokenizer.encode(prompt_template))
    
    context = []
    
    current_token_count = current_token_count + len(tokenizer.encode(user_preference_summary))
    
    context.append(user_preference_summary)
    
#     iterate through the dataframe and add rows to the context
#     till the max_token_lenth is reached
    for text in top_docs:
        additional_token_count = len(tokenizer.encode(text.page_content))
        
        current_token_count += additional_token_count
        
        if current_token_count <= max_token_count:
            context.append(text.page_content)
        else:
            break
            
    return prompt_template.format("\n\n###\n\n".join(context))


In [10]:
def generate_recommendation(prompt):
    
    sys_msg = SystemMessage('You are a home recommender system.')
    human_msg = HumanMessage(prompt)

    prompt_final = [sys_msg, human_msg]

    prompt_final
    
    model = ChatOpenAI(model="gpt-3.5-turbo-0125")
    completion = model.invoke(prompt)
    
    return completion.content

In [11]:
#generate property listing and store the same in ChromaDB
def generate_and_store_listings(n_listing):
    generate_listing(n_listing=12)
    vector_store = get_vector_store()
    store_data_in_vector_store(vector_store)

In [12]:
#generate home recommendation
def recommend_home(interactive=False):
    user_preferences = get_user_preferences(interactive)
#     print(user_preferences)
    user_pref_summary = create_user_preference_summary(user_preferences)
    vector_store = get_vector_store()
    docs = get_relevant_document(vector_store, user_pref_summary)
    # docs
    #create the prompt and generate recommendation
    prompt = create_custom_prompt(user_pref_summary, docs, 2500)
#     print(prompt)
    recommendation = generate_recommendation(prompt)
    print("-"*30)
    print(recommendation)

In [13]:
generate_and_store_listings(n_listing=12)

  model = OpenAI(model_name=model_name,


In [14]:
recommend_home()

User Preferences:
The user wants a comfortable three-bedroom house with a spacious kitchen and a cozy living room. The user values having a good school district, a safe neighborhood, and access to outdoor activities. Additionally, the user prefers amenities such as a gym, pool, and walking trails. In terms of transportation, the user values having easy access to public transportation. Lastly, the user prefers a suburban neighborhood with a mix of urban amenities.
------------------------------
Based on your preferences for a comfortable three-bedroom house with a spacious kitchen and cozy living room, I would recommend the home in Austin. This home fits your criteria with three bedrooms, two bathrooms, and a spacious 1,500 sqft size. It has a beautiful single-family design with modern finishes and a spacious backyard, perfect for outdoor activities. The neighborhood in Austin is vibrant with great schools and outdoor amenities, fitting your preference for a good school district, safe n