In [1]:
import chromadb
import pandas as pd
from langchain import LLMChain
from langchain_openai import OpenAI
from langchain.chains import RetrievalQA
from openai import OpenAI as openAI_client
from langchain.prompts import PromptTemplate
from langchain_openai import OpenAIEmbeddings
from chromadb.utils import embedding_functions
from langchain_community.vectorstores import FAISS
from langchain_text_splitters import CharacterTextSplitter
from langchain_community.document_loaders import TextLoader
from langchain.chains.question_answering import load_qa_chain
from langchain_community.document_loaders import DataFrameLoader


api_key = ""

<h2> Chapter 1: Generating Real Estate Listings

In [2]:
model = 'gpt-3.5-turbo-instruct'
temperature = 1
llm = OpenAI(model=model, temperature=temperature, max_tokens = 3700, openai_api_key=api_key)

In [3]:
house_reviews_template_prompt = """
generate reviews for luxury houses in an imaginary city called 'Uda-City'. Come up with names of neighborhoods, details about the neighborhood (busy,
quiet, near malls, transport), sizes of the houses, number of bathrooms, number of bedrooms, year built and a 
general description of available facilities (for example pool, barbeque, yard, view etc). The bathrooms should not be more than 3.
Important is to also set a price for sale for every house example.

Every time a review finishes you need to provide the word " End " before starting the next review

This is an example: 

This stunning 5 bedroom, 6 bathroom mansion in the prestigious Beverly Hills neighborhood is the epitome of luxury living. Built in 2018, this modern masterpiece boasts high-end finishes and top-of-the-line appliances. The spacious backyard features a sparkling pool, perfect for cooling off on hot summer days, and a built-in barbeque for entertaining guests. 
With breathtaking views of the city, this house truly has it all at a price of $750.000. End 
Welcome to the charming neighborhood of Willow Creek, where this cozy 3 bedroom, 2 bathroom home awaits you. End


Generate {number} reviews in total.
"""

In [4]:
generate_reviews = PromptTemplate.from_template(house_reviews_template_prompt)

In [5]:
response = llm(generate_reviews.format(number=12))
print(response)

  warn_deprecated(



Looking for the perfect luxury home in Uda-City? Look no further than the exclusive neighborhood of Golden Heights. This sprawling 6 bedroom, 3 bathroom mansion was built in 2019 and features state-of-the-art amenities and breathtaking views of the city. The spacious backyard offers a pool, outdoor kitchen, and a private yard for your enjoyment. Priced at $1.2 million, this is luxury living at its finest. End

Tired of the hustle and bustle of city life? Look no further than the quiet neighborhood of Serenity Valley. This 4 bedroom, 2 bathroom home provides a peaceful retreat with its tranquil surroundings and stunning mountain views. Built in 2015, this modern home boasts high-end finishes and a spacious backyard with a fire pit. At a price of $800,000, this is the perfect luxury escape. End

Located in the heart of Uda-City, the bustling neighborhood of Downtown Heights offers the ultimate urban living experience. This 2 bedroom, 2 bathroom loft-style condo offers sleek and modern d

I am using this 'end' string at the end of each line so it is easier to separate the reviews 

In [6]:
#clean the text and store data to a dataframe
response = response.replace('\n', '')
response = response.split("End")

house_reviews_df = pd.DataFrame()
reviews=[]
for review in response:
    reviews.append(review)

house_reviews_df['review'] = reviews
house_reviews_df = house_reviews_df[:-1]

In [7]:
house_reviews_df

Unnamed: 0,review
0,Looking for the perfect luxury home in Uda-Cit...
1,Tired of the hustle and bustle of city life? L...
2,"Located in the heart of Uda-City, the bustling..."
3,Nestled in the prestigious neighborhood of Bel...
4,Escape to the tranquil neighborhood of Green V...
5,Looking for a modern and stylish home in Uda-C...
6,Experience the best of both worlds in the neig...
7,Nestled in the quiet and elegant neighborhood ...
8,Looking for a home with a view? Look no furthe...
9,Welcome to the luxurious neighborhood of Cryst...


In [8]:
# Export the listings
house_reviews_df.to_csv('listings.csv')
house_reviews_df.to_csv('listings.txt')

<h2> Chapter 2a: Store listings and query using ChromaDB

In [9]:
house_reviews_df = pd.read_csv('listings.csv', index_col=0)
house_reviews_df.head()

Unnamed: 0,review
0,Looking for the perfect luxury home in Uda-Cit...
1,Tired of the hustle and bustle of city life? L...
2,"Located in the heart of Uda-City, the bustling..."
3,Nestled in the prestigious neighborhood of Bel...
4,Escape to the tranquil neighborhood of Green V...


In [10]:
openClient=openAI_client(api_key=api_key)
client = chromadb.Client()

openai_ef = embedding_functions.OpenAIEmbeddingFunction(
                api_key=api_key,
                model_name="text-embedding-ada-002"
            )
try:
    client.delete_collection(name="reviews")
except:
    pass
collection = client.create_collection("reviews", embedding_function=openai_ef)

In [11]:
collection.add(
    ids=[str(i) for i in range(0, len(house_reviews_df))], 
    documents=house_reviews_df["review"].to_list()
)

In [12]:
def text_embedding(text) -> None:
    response = openClient.embeddings.create(model="text-embedding-ada-002", input=text)
    return response.data[0].embedding

Lets test it with a small query

In [13]:
test_query = "I would like to buy a house that has a pool"

In [14]:
vector=text_embedding(test_query)

In [15]:
for i in range(0,2):
    print(collection.query(query_embeddings=vector, n_results=2)['documents'][0][i])
    print('')

Escape to the tranquil neighborhood of Green Valley, where this charming 4 bedroom, 2 bathroom home awaits you. Built in 2016, this home features a beautiful backyard with a pool and barbeque area, perfect for outdoor entertaining. The quiet surroundings and lush greenery make this a true oasis. Priced at $950,000, this is luxury living at its best. 

Experience the best of both worlds in the neighborhood of Riverside Retreat. This 5 bedroom, 3 bathroom home offers both stunning river views and easy access to the city. Built in 2017, this home boasts a spacious backyard with a pool and barbeque area, perfect for enjoying the outdoors. Priced at $1.2 million, this is luxury living at its finest. 



<h2> Chapter 2b: Store listings and query using FAISS database

In [16]:
house_reviews_df = pd.read_csv('listings.csv', index_col=0)
house_reviews_df.head()

Unnamed: 0,review
0,Looking for the perfect luxury home in Uda-Cit...
1,Tired of the hustle and bustle of city life? L...
2,"Located in the heart of Uda-City, the bustling..."
3,Nestled in the prestigious neighborhood of Bel...
4,Escape to the tranquil neighborhood of Green V...


In [17]:
loader = DataFrameLoader(house_reviews_df, page_content_column="review")

In [18]:
documents = loader.load()

In [19]:
text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

In [20]:
embeddings = OpenAIEmbeddings(api_key=api_key)

In [21]:
db = FAISS.from_documents(docs, embeddings)
print(db.index.ntotal)

12


In [22]:
docs = db.similarity_search(test_query)

In [23]:
for i in range(0,2):
    print(docs[i].page_content)
    print(' ')

Escape to the tranquil neighborhood of Green Valley, where this charming 4 bedroom, 2 bathroom home awaits you. Built in 2016, this home features a beautiful backyard with a pool and barbeque area, perfect for outdoor entertaining. The quiet surroundings and lush greenery make this a true oasis. Priced at $950,000, this is luxury living at its best.
 
Experience the best of both worlds in the neighborhood of Riverside Retreat. This 5 bedroom, 3 bathroom home offers both stunning river views and easy access to the city. Built in 2017, this home boasts a spacious backyard with a pool and barbeque area, perfect for enjoying the outdoors. Priced at $1.2 million, this is luxury living at its finest.
 


Both vector databases (Chromedb and FAISS) seem to store succesfully the embeddings and return relevant results based on a simple test query for an estate with a pool. 
Next step is to create prompts and enhance the answers with LLM capabilities

<h2> Chapter 3: Augmented Response Generation

I will be using the FAISS database for the response generator

In [24]:
# Create prompt with context and example
real_estate_prompt ='''
context: You are an agent for real estate and you will be provided a set of house facilities and preferences by a user that wants to buy a house (for example a pool, or a nice view).
Based on the users preferences provided you will have to describe a house based on the listings of the database in which house descriptions are stored. 
Here is a generic example and the way I want you to address similar queries:

Example:
[

question and answer by the user:
question = [   
                "Describe shortly the main features of your ideal house"
  ]
answer = [
    "I would like to have several bathrooms. Fireplace is also a must. My budget is around 2 millions. 
    ]

Your answer must contain an Original description from the context and then an augmented justification in order to pursuade the buyer that this is the house they are looking for. Here is an example of an answer you should give:

1. Give the description based on the listings provided. You should not paraphrase anything, just give the original listing as it is:
[Original description: "Welcome to Platinum Heights, an exquisite 4-bed, 5-bath masterpiece built in 2020. With timeless design and top-notch craftsmanship, this home features a cozy fireplace, gourmet kitchen, and serene master suite. Priced at $1.6 million, it offers luxury living at its finest."]

2. Provide a justification to support the description you provided before. In the justification feel free to augment the description, tailoring it to resonate with the buyer’s specific preferences. This involves subtly emphasizing aspects of the property that align with what the buyer is looking for:
[The agents view and comments:
Based on your preference description, I highly recommend considering this listing because it offers 5 bathrooms which is a feature you do want and it is not that common for any house. 
There is also a cozy wood-burning fireplace which I see is a must have feature for you. It is excellent decorated as it is stone-surrounded and elevates the house esteem. Finally it is alligned with your budget since it is priced at 1.6 million which is a fair price for a house of so much luxury and style]

]

Follow the above logic to address the following query of the new user:

question = [   
                "Describe shortly the main features of your ideal house"
  ]
answers = [
    "I would like {}. 
]

'''

In [25]:
user_1_prompt = real_estate_prompt.format("a nice pool. I would also like ideally a yard with barbecue facility. The budget should not exceed the 2 millions.")
user_2_prompt = real_estate_prompt.format("a house in order to escape the intense city life. Somewhere quiet and full of nature. We need relativelly small house with 2 bedrooms at a budget around 2 million")
user_3_prompt = real_estate_prompt.format("a luxury property, with many bedrooms somewhere around the Crystal Heights community")

I will use the retrievalQA system. 

In [26]:
# similar_docs = db.similarity_search(user_1_prompt, k=1)
# prompt = PromptTemplate(
#         template="{query}\nContext: {context}",
#         input_variables=["query", "context"],
#     )
# chain = load_qa_chain(llm, prompt = prompt, chain_type="stuff")
# print(chain.run(input_documents=similar_docs, query = user_1_prompt))


temperature = 1
llm = OpenAI(model=model, temperature=temperature, max_tokens = 3000, openai_api_key=api_key)
rag = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=db.as_retriever())

In [27]:
print(rag.run(user_1_prompt))

  warn_deprecated(



Original description: "Experience the best of luxury living in the exclusive neighborhood of Skyline Heights. This stunning 5 bedroom, 3 bathroom mansion was built in 2018 and boasts high-end finishes and top-of-the-line appliances. The spacious backyard features a sparkling pool, perfect for cooling off on hot summer days, and a built-in barbeque for entertaining guests. With breathtaking views of the city, this house truly has it all at a price of $1.1 million."

Based on your preferences, I highly recommend this listing in Skyline Heights. Not only does it have a stunning pool, but the backyard also includes a built-in barbeque, making it the ideal location for outdoor gatherings and events. The house itself was built in 2018 and offers state-of-the-art amenities, including top-of-the-line appliances and high-end finishes. Its price of $1.1 million is well within your budget while still providing the luxurious features you desire. Plus, the beautiful views of the city only add to t

In [30]:
print(rag.run(user_2_prompt))


[Original description: "Escape the hustle and bustle of city life and find solace in this charming 2-bedroom home situated in the tranquil neighborhood of Serenity Valley. Surrounded by lush greenery, this cozy abode offers the perfect opportunity to reconnect with nature. Priced under $2 million, this is an ideal retreat for those seeking peace and quiet."]

[The agent's view and comments: Based on your preference description, I highly recommend considering this listing. It is located in the serene neighborhood of Serenity Valley, where you can escape the intensity of the city and find a peaceful retreat. Surrounded by beautiful nature, this house offers a tranquil atmosphere that will surely meet your need for peace and quiet. Additionally, it is a relatively small house with 2 bedrooms, perfectly fitting your needs. And the best part, it falls under your budget of $2 million. Don't miss the chance to have your perfect getaway home.]


In [31]:
print(rag.run(user_3_prompt))



[Original description: "Welcome to the luxurious neighborhood of Crystal Heights, where this magnificent 6 bedroom, 3 bathroom estate awaits you. Built in 2020, this home offers top-of-the-line amenities such as a home theater, wine cellar, and a private elevator. The backyard features a stunning pool and spa, and a built-in barbeque for outdoor gatherings. Priced at $2.7 million, this is the epitome of luxury living in Uda-City."]

[The agents view and comments:
Based on your preference description, I highly recommend considering this listing in Crystal Heights as it offers 6 spacious bedrooms and 3 bathrooms, perfect for a large family or accommodating guests. It also features a home theater and wine cellar, aligned with your desire for a luxury property. The backyard boasts a beautiful pool and spa, as well as a built-in barbeque for entertaining. All of this for a price of $2.7 million, showcasing the epitome of luxury living in Uda-City. Crystal Heights is known for its exclusiv