# Step 1: Setting Up the Python Application.

In [1]:
#Install libraries.
!pip install pandas
!pip install chromadb
!pip install -U --quiet sentence-transformers==2.5.1 transformers==4.36.0 
!pip freeze | grep transformers

Defaulting to user installation because normal site-packages is not writeable
Collecting pandas
  Downloading pandas-2.2.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.1/13.1 MB[0m [31m61.3 MB/s[0m eta [36m0:00:00[0m00:01[0m0:01[0m
Collecting tzdata>=2022.7
  Downloading tzdata-2024.2-py2.py3-none-any.whl (346 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m346.6/346.6 kB[0m [31m37.8 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tzdata, pandas
Successfully installed pandas-2.2.3 tzdata-2024.2
Defaulting to user installation because normal site-packages is not writeable
[0msentence-transformers==2.5.1
transformers==4.36.0


In [1]:
#Import libraries.
import os
import pandas as pd
import numpy as np
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.prompts.few_shot import FewShotPromptTemplate
from langchain.output_parsers import PydanticOutputParser
from langchain.chains import LLMChain
from pydantic import BaseModel
from typing import List
from typing import Union
from fastapi.encoders import jsonable_encoder
import chromadb
from chromadb.config import Settings
from sentence_transformers import SentenceTransformer

In [2]:
#Access OpenAI API. Replace the empty string with your own API key.
os.environ["OPENAI_API_BASE"] = "https://openai.vocareum.com/v1"
os.environ["OPENAI_API_KEY"] = "" #Replace the empty string with your own API key.

# Step 2: Generating Real Estate Listings.

In [3]:
#Apply few-shot prompting to generate real estate listings with an LLM.
#This is the template for one example.
example_template = PromptTemplate(input_variables=["listing", "description", "neighborhood"], 
                                template="{listing}\n{description}\n{neighborhood}")

#Example one.
listing1 = """
Neighborhood: Green Oaks
Price: $800,000
Bedrooms: 3
Bathrooms: 2
House Size: 2,000 sqft
"""

description1 = """
Description: Welcome to this eco-friendly oasis nestled in the heart of Green Oaks. This charming 3-bedroom, 2-bathroom home boasts energy-efficient features such as solar panels and a well-insulated structure. Natural light floods the living spaces, highlighting the beautiful hardwood floors and eco-conscious finishes. The open-concept kitchen and dining area lead to a spacious backyard with a vegetable garden, perfect for the eco-conscious family. Embrace sustainable living without compromising on style in this Green Oaks gem.
"""

neighborhood1 = """
Neighborhood Description: Green Oaks is a close-knit, environmentally-conscious community with access to organic grocery stores, community gardens, and bike paths. Take a stroll through the nearby Green Oaks Park or grab a cup of coffee at the cozy Green Bean Cafe. With easy access to public transportation and bike lanes, commuting is a breeze.
"""

#Example two.
listing2 = """
Neighborhood: Newland
Price: $150,000
Bedrooms: 3
Bathrooms: 2
House Size: 2,500 sqft
"""

description2 = """
Description: You'd love this spacious (2,500 sqft) three-bedroom house in the Bohemian neighborhood of Newland. For the low price of $150,000, you can purchase this stylish house with two musical bathrooms. Multiple residents have died violently in the kitchen, including a father who was stabbed to death by his own daughter. If you die next, please replace the washing machine.
"""

neighborhood2 = """
Neighborhood Description: Newland is a Bohemian, vibrant, and incompressible region. The community is full of stupid and violent people. Easy access to drugs, gangs, and toilets. Take a piss in the nearby cemetery or vandalise in the Tesco. Crime rates are high. You should move here so that you can get into trouble.
"""

#Combine the two examples.
examples = [ 
    {
        "listing": listing1,
        "description": description1,
        "neighborhood": neighborhood1
    },
    {
        "listing": listing2,
        "description": description2,
        "neighborhood": neighborhood2
    }    
]

In [4]:
#This is the query that will become the final part of the prompt.
instruction = """
Generate 10 diverse and realistic real estate listings containing facts about the real estate.
"""

In [5]:
#Pydantic model that defines the schema for a single RE listing.
class Listing(BaseModel):
    neighborhood: str
    price: int
    bedrooms: int
    bathrooms: int
    size: int
    description: str
    neigh_description: str

#Pydantic model that defines a list of RE listings (instances of class Listing).
class ListingCollection(BaseModel):
    listings: List[Listing]
        
#The parser parses the LLM's output (10 RE listings) into a structured Python object (instance of class ListingCollection).
parser = PydanticOutputParser(pydantic_object=ListingCollection)

In [6]:
#Combine different parts into a single prompt.
prompt = FewShotPromptTemplate(
    examples=examples, 
    example_prompt=example_template, 
    suffix="Use these examples to generate a response to the problem below: \n\n{input}\n\n{format_instructions}", 
    input_variables=["input"],
    partial_variables={"format_instructions": parser.get_format_instructions()}
)
prompt_text = prompt.format(input=instruction)
print("=== Real Estate Listings Prompt ===")
print(prompt_text)

=== Real Estate Listings Prompt ===

Neighborhood: Green Oaks
Price: $800,000
Bedrooms: 3
Bathrooms: 2
House Size: 2,000 sqft


Description: Welcome to this eco-friendly oasis nestled in the heart of Green Oaks. This charming 3-bedroom, 2-bathroom home boasts energy-efficient features such as solar panels and a well-insulated structure. Natural light floods the living spaces, highlighting the beautiful hardwood floors and eco-conscious finishes. The open-concept kitchen and dining area lead to a spacious backyard with a vegetable garden, perfect for the eco-conscious family. Embrace sustainable living without compromising on style in this Green Oaks gem.


Neighborhood Description: Green Oaks is a close-knit, environmentally-conscious community with access to organic grocery stores, community gardens, and bike paths. Take a stroll through the nearby Green Oaks Park or grab a cup of coffee at the cozy Green Bean Cafe. With easy access to public transportation and bike lanes, commuting i

In [7]:
#Pass the prompt to an LLM to get a response.
model_name = "gpt-3.5-turbo"
temperature = 0.3
llm = OpenAI(model_name=model_name, temperature=temperature, max_tokens = 2000)
RElistings_LLM = llm(prompt_text)



In [8]:
#Parse the LLM's responses into the schema defined in Listing and store parsed responses in a list.
result = parser.parse(RElistings_LLM)
result.listings

[Listing(neighborhood='Sunnyvale', price=500000, bedrooms=4, bathrooms=3, size=2200, description='Beautiful 4-bedroom, 3-bathroom home in the family-friendly neighborhood of Sunnyvale. This spacious house features a large backyard perfect for entertaining, as well as a modern kitchen with stainless steel appliances. Close to schools and parks.', neigh_description='Sunnyvale is a family-friendly neighborhood with top-rated schools, parks, and community events. Enjoy the sunny weather and safe streets in this desirable area.'),
 Listing(neighborhood='Downtown', price=750000, bedrooms=2, bathrooms=2, size=1800, description='Luxurious 2-bedroom, 2-bathroom condo in the heart of Downtown. This upscale unit features high-end finishes, a rooftop terrace with city views, and a fitness center. Walking distance to restaurants, shops, and entertainment.', neigh_description='Downtown is a bustling urban neighborhood with a vibrant nightlife, trendy restaurants, and cultural attractions. Live in th

In [9]:
#Store the LLM-generated listings in a dataframe.
df = pd.DataFrame(jsonable_encoder(result.listings))
df.to_csv('RElistings_LLM.csv')
df.head()

Unnamed: 0,neighborhood,price,bedrooms,bathrooms,size,description,neigh_description
0,Sunnyvale,500000,4,3,2200,"Beautiful 4-bedroom, 3-bathroom home in the fa...",Sunnyvale is a family-friendly neighborhood wi...
1,Downtown,750000,2,2,1800,"Luxurious 2-bedroom, 2-bathroom condo in the h...",Downtown is a bustling urban neighborhood with...
2,Lakeview,300000,3,2,1600,"Charming 3-bedroom, 2-bathroom cottage in the ...",Lakeview is a serene neighborhood surrounded b...
3,Midtown,400000,1,1,1000,"Modern 1-bedroom, 1-bathroom loft in the trend...",Midtown is a hip and artistic neighborhood wit...
4,Hillside,600000,5,4,3000,"Spacious 5-bedroom, 4-bathroom home on the hil...",Hillside is an upscale neighborhood with stunn...


# Step 3: Storing Listings in a Vector Database.

In [10]:
#Create a vector database.
client = chromadb.Client(Settings(persist_directory="./database"))
collection = client.create_collection("RElistings_db")

In [11]:
#Select an embedding model and load the dataframe with 10 listings.
MODEL_NAME = 'paraphrase-MiniLM-L6-v2'
df = pd.read_csv("RElistings_LLM.csv")

In [12]:
#This function creates embeddings for textual data.
def generate_embeddings(input_data: Union[str, list[str]]) -> np.ndarray:    
    model = SentenceTransformer(MODEL_NAME)
    embeddings = model.encode(input_data)
    return embeddings

for index, row in df.iterrows():
    listing_id = str(index)  #Turn a listing's ID into a string.
    description = row['description'] #Extract a listing's description.
    embedding = generate_embeddings(description) #Embed the extracted description.
    #Add the listing to the vector database.
    collection.add(
        documents=[description],
        metadatas=[row.to_dict()],
        ids=[listing_id],
        embeddings=[embedding.tolist()]
    )

collection.get(ids=["7", "8", "9"])

modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/3.73k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/314 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]



{'ids': ['7', '8', '9'],
 'embeddings': None,
 'metadatas': [{'Unnamed: 0': 7,
   'bathrooms': 1,
   'bedrooms': 2,
   'description': "Cozy 2-bedroom, 1-bathroom bungalow in the scenic neighborhood of Mountain View. This charming home features a sunroom, a landscaped garden, and a detached garage. Close to hiking trails and farmers' markets.",
   'neigh_description': "Mountain View is a nature lover's paradise with stunning mountain views, outdoor recreation opportunities, and a laid-back atmosphere. Enjoy the beauty of the mountains in this cozy bungalow.",
   'neighborhood': 'Mountain View',
   'price': 550000,
   'size': 1200},
  {'Unnamed: 0': 8,
   'bathrooms': 3,
   'bedrooms': 4,
   'description': 'Stunning 4-bedroom, 3-bathroom beachfront home with panoramic ocean views. This luxurious house features a gourmet kitchen, a private beach access, and a rooftop deck for sunset views. Perfect for beach lovers.',
   'neigh_description': 'Beachfront is a coveted neighborhood with exclu

# Step 4: Building the User Preference Interface.

In [13]:
#Simulate the process of collecting user preferences by hard-coding.

questions = [   
                "How big do you want your house to be?",
                "What are 3 most important things for you in choosing this property?",
                "Which amenities would you like?",
                "Which transportation options are important to you?",
                "How urban do you want your neighborhood to be?"
            ]

answers = [
    "A comfortable three-bedroom house with a spacious kitchen and a cozy living room.",
    "A quiet neighborhood, good local schools, and convenient shopping options.",
    "A backyard for gardening, a two-car garage, and a modern, energy-efficient heating system.",
    "Easy access to a reliable bus line, proximity to a major highway, and bike-friendly roads.",
    "A balance between suburban tranquility and access to urban amenities like restaurants and theaters."
            ]

#Combine the provided questions and answers into a simulated conversation. List of dictionaries.
simulation = [{"question": q, "answer": a} for q, a in zip(questions, answers)]

#Convert the above list of dictionaries into one string.
preference = "\n".join([f"Q: {item['question']}\nA: {item['answer']}" for item in simulation])
print(preference)

Q: How big do you want your house to be?
A: A comfortable three-bedroom house with a spacious kitchen and a cozy living room.
Q: What are 3 most important things for you in choosing this property?
A: A quiet neighborhood, good local schools, and convenient shopping options.
Q: Which amenities would you like?
A: A backyard for gardening, a two-car garage, and a modern, energy-efficient heating system.
Q: Which transportation options are important to you?
A: Easy access to a reliable bus line, proximity to a major highway, and bike-friendly roads.
Q: How urban do you want your neighborhood to be?
A: A balance between suburban tranquility and access to urban amenities like restaurants and theaters.


# Step 5: Searching Based on Preferences.

In [14]:
def retrieve(preference):
    #Embed the user preferences.
    embedding_of_preference = generate_embeddings(preference)

    #Find the most relevant listing to the embedded user preferences in the database.
    RElistings_retrieved = collection.query(
        query_embeddings=embedding_of_preference.tolist(),
        n_results=1  #This is necessary to identify the top listing and nothing else.
    )

    #Extract the key statistics from the retrieved listing.
    metadata = RElistings_retrieved['metadatas'][0][0]
    metadata_string = f"Neighborhood: {metadata['neighborhood']}\n" \
                      f"Price: ${metadata['price']}\n" \
                      f"Size: {metadata['size']} sqft\n" \
                      f"Number of Bedrooms: {metadata['bedrooms']}\n" \
                      f"Number of Bathrooms: {metadata['bathrooms']}\n"


    #Combine the key statistics with the property and neighborhood descriptions.
    description = RElistings_retrieved['documents'][0][0]
    neigh_description = metadata['neigh_description']
    listing = f"Description:\n{description}\n\nMetadata:\n{metadata_string}\n\nNeighborhood Description:\n{neigh_description}"
    
    return listing

listing = retrieve(preference)
print("=== Most Relevant Listing ===")
print(listing)

=== Most Relevant Listing ===
Description:
Beautiful 4-bedroom, 3-bathroom home in the family-friendly neighborhood of Sunnyvale. This spacious house features a large backyard perfect for entertaining, as well as a modern kitchen with stainless steel appliances. Close to schools and parks.

Metadata:
Neighborhood: Sunnyvale
Price: $500000
Size: 2200 sqft
Number of Bedrooms: 4
Number of Bathrooms: 3


Neighborhood Description:
Sunnyvale is a family-friendly neighborhood with top-rated schools, parks, and community events. Enjoy the sunny weather and safe streets in this desirable area.


# Step 6: Personalizing Listing Descriptions.

In [15]:
#Similar to step 5, find the most relevant listing to the embedded user preferences in the database. 
#Then, combine the retrieved listing with a personalization prompt.
def create_prompt(preference):
    
    listing = retrieve(preference)
    
    prompt_template = """
For the retrieved listing below, combine the description with the metadata and neighborhood description.

---

Context: 

{}

---

Question:

Augment the result by tailoring it to resonate with the buyer’s specific preferences.
Subtly emphasize aspects of the property that align with what the buyer is looking for.
The augmentation should personalize the listing without changing factual information, especially the numbers.

---

Answer:
    """
    return prompt_template.format(listing)

prompt_personal = create_prompt(preference)

print("=== Most Relevant Listing and Instructions ===")
print(prompt_personal)

=== Most Relevant Listing and Instructions ===

For the retrieved listing below, combine the description with the metadata and neighborhood description.

---

Context: 

Description:
Beautiful 4-bedroom, 3-bathroom home in the family-friendly neighborhood of Sunnyvale. This spacious house features a large backyard perfect for entertaining, as well as a modern kitchen with stainless steel appliances. Close to schools and parks.

Metadata:
Neighborhood: Sunnyvale
Price: $500000
Size: 2200 sqft
Number of Bedrooms: 4
Number of Bathrooms: 3


Neighborhood Description:
Sunnyvale is a family-friendly neighborhood with top-rated schools, parks, and community events. Enjoy the sunny weather and safe streets in this desirable area.

---

Question:

Augment the result by tailoring it to resonate with the buyer’s specific preferences.
Subtly emphasize aspects of the property that align with what the buyer is looking for.
The augmentation should personalize the listing without changing factual info

In [16]:
#WARNING. You may need to run this cell a few times for the augmented response to update to match the provided preferences.

#This is an instance of the LLMChain class. It connects LLMs with prompt templates to generate responses in a structured way.
llm_chain = LLMChain(llm=llm, prompt=PromptTemplate.from_template(prompt_personal))

#Implement the LLM chain to augment the retrieved listing that matches with the preferences specified by a user.
personalized_listing = llm_chain.run(input=preference)

print("=== Most Relevant Listing (Augmented) ===")
print(personalized_listing)

=== Most Relevant Listing (Augmented) ===
Welcome to your dream home in the sought-after neighborhood of Sunnyvale! This beautiful 4-bedroom, 3-bathroom house is perfect for a growing family like yours. With a spacious 2200 sqft layout, a large backyard for entertaining, and a modern kitchen with stainless steel appliances, this home has everything you need. Imagine hosting family gatherings and BBQs in your own private oasis. And with top-rated schools and parks just a stone's throw away, your children will thrive in this family-friendly community. Don't miss out on the opportunity to make this house your forever home for the price of $500,000. Come experience the sunny weather and safe streets that make Sunnyvale the perfect place to call home.


# Step 7: Create new preferences to test the agent.

In [18]:
#WARNING. You may need to run this cell a few times for the augmented response to update to match the provided preferences.

llm_chain = LLMChain(llm=llm, prompt=PromptTemplate.from_template(prompt_personal))

preference = "I am looking for a luxurious property with beach access."

prompt_personal = create_prompt(preference)
print("=== Most Relevant Listing and Instructions ===")
print(prompt_personal)

personalized_listing = llm_chain.run(input=preference)
print("=== Most Relevant Listing (Augmented) ===")
print(personalized_listing)

=== Most Relevant Listing and Instructions ===

For the retrieved listing below, combine the description with the metadata and neighborhood description.

---

Context: 

Description:
Stunning 4-bedroom, 3-bathroom beachfront home with panoramic ocean views. This luxurious house features a gourmet kitchen, a private beach access, and a rooftop deck for sunset views. Perfect for beach lovers.

Metadata:
Neighborhood: Beachfront
Price: $950000
Size: 2800 sqft
Number of Bedrooms: 4
Number of Bathrooms: 3


Neighborhood Description:
Beachfront is a coveted neighborhood with exclusive beachfront properties, private beach access, and a relaxed coastal vibe. Live the beach lifestyle in this exquisite home.

---

Question:

Augment the result by tailoring it to resonate with the buyer’s specific preferences.
Subtly emphasize aspects of the property that align with what the buyer is looking for.
The augmentation should personalize the listing without changing factual information, especially the 

In [20]:
#WARNING. You may need to run this cell a few times for the augmented response to update to match the provided preferences.

llm_chain = LLMChain(llm=llm, prompt=PromptTemplate.from_template(prompt_personal))

preference = "I like fine art and want a property in a neighborhood full of galleries and museums."

prompt_personal = create_prompt(preference)
print("=== Most Relevant Listing and Instructions ===")
print(prompt_personal)

personalized_listing = llm_chain.run(input=preference)
print("=== Most Relevant Listing (Augmented) ===")
print(personalized_listing)

=== Most Relevant Listing and Instructions ===

For the retrieved listing below, combine the description with the metadata and neighborhood description.

---

Context: 

Description:
Charming 3-bedroom, 2-bathroom home in the historic Garden District. This renovated house features a chef's kitchen, a landscaped garden, and a covered porch. Close to museums and cultural attractions.

Metadata:
Neighborhood: Garden District
Price: $500000
Size: 1800 sqft
Number of Bedrooms: 3
Number of Bathrooms: 2


Neighborhood Description:
Garden District is a cultural hub with historic homes, lush gardens, and a vibrant arts scene. Immerse yourself in the charm and history of this unique neighborhood in this lovely home.

---

Question:

Augment the result by tailoring it to resonate with the buyer’s specific preferences.
Subtly emphasize aspects of the property that align with what the buyer is looking for.
The augmentation should personalize the listing without changing factual information, especia