# HomeMatch: Real Estate AI Agent

In [None]:
# This script loads environment variables from a .env file.
from dotenv import load_dotenv
load_dotenv()

True

## Synthetic Data Generation

### Generating Real Estate Listings with an LLM

**LLM PROMPT**
```
Generate at least 10 real estate listings. 
An example of a listing might be under `<listing_model>` tag. 
Output the response in json format.

<listing_model>
Neighborhood: Green Oaks
Price: $800,000
Bedrooms: 3
Bathrooms: 2
House Size: 2,000 sqft

Description: Welcome to this eco-friendly oasis nestled in the heart of Green Oaks. This charming 3-bedroom, 2-bathroom home boasts energy-efficient features such as solar panels and a well-insulated structure. Natural light floods the living spaces, highlighting the beautiful hardwood floors and eco-conscious finishes. The open-concept kitchen and dining area lead to a spacious backyard with a vegetable garden, perfect for the eco-conscious family. Embrace sustainable living without compromising on style in this Green Oaks gem.

Neighborhood Description: Green Oaks is a close-knit, environmentally-conscious community with access to organic grocery stores, community gardens, and bike paths. Take a stroll through the nearby Green Oaks Park or grab a cup of coffee at the cozy Green Bean Cafe. With easy access to public transportation and bike lanes, commuting is a breeze.
</listing_model>
```

In [None]:
# This script loads a list of real estate houses with detailed descriptions and neighborhood information.
import json

with open('data/listings.json', 'r', encoding='utf-8') as f:
    listings = json.load(f)

print(f"Total: {len(listings)}\nItems: {listings}")

Total: 10
Items: [{'Neighborhood': 'Green Oaks', 'Price': '$800,000', 'Bedrooms': 3, 'Bathrooms': 2, 'House Size': '2,000 sqft', 'Description': 'Welcome to this eco-friendly oasis nestled in the heart of Green Oaks. This charming 3-bedroom, 2-bathroom home boasts energy-efficient features such as solar panels and a well-insulated structure. Natural light floods the living spaces, highlighting the beautiful hardwood floors and eco-conscious finishes. The open-concept kitchen and dining area lead to a spacious backyard with a vegetable garden, perfect for the eco-conscious family. Embrace sustainable living without compromising on style in this Green Oaks gem.', 'Neighborhood Description': 'Green Oaks is a close-knit, environmentally-conscious community with access to organic grocery stores, community gardens, and bike paths. Take a stroll through the nearby Green Oaks Park or grab a cup of coffee at the cozy Green Bean Cafe. With easy access to public transportation and bike lanes, comm

## Semantic Search

### Creating a Vector Database and Storing Listings

In [None]:
# This script sets up a vector database for storing and retrieving embeddings.
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings

embedding = OpenAIEmbeddings()
vectorstore = Chroma(embedding_function=embedding,persist_directory="data/vectorstore")


In [None]:
# This script stores the listings in the vector database.
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)

for listing in listings:
    # Combine all relevant fields into a single string for embedding
    fields = [
        f"Neighborhood: {listing.get('Neighborhood', '')}",
        f"Price: {listing.get('Price', '')}",
        f"Bedrooms: {listing.get('Bedrooms', '')}",
        f"Bathrooms: {listing.get('Bathrooms', '')}",
        f"House Size: {listing.get('House Size', '')}",
        f"Description: {listing.get('Description', '')}",
        f"Neighborhood Description: {listing.get('Neighborhood Description', '')}"
    ]
    full_text = "\n".join(fields)
    chunks = text_splitter.split_text(full_text)
    for chunk in chunks:
        vectorstore.add_texts([chunk])

# Save the vectorstore to disk
vectorstore.persist()

### Semantic Search of Listings Based on Buyer Preferences

In [18]:
# This script performs a similarity search in the vector database.
# Hard-coded questions and example answers
questions = [   
    "How big do you want your house to be?",
    "What are 3 most important things for you in choosing this property?", 
    "Which amenities would you like?", 
    "Which transportation options are important to you?",
    "How urban do you want your neighborhood to be?",   
]
answers = [
    "A comfortable three-bedroom house with a spacious kitchen and a cozy living room.",
    "A quiet neighborhood, good local schools, and convenient shopping options.",
    "A backyard for gardening, a two-car garage, and a modern, energy-efficient heating system.",
    "Easy access to a reliable bus line, proximity to a major highway, and bike-friendly roads.",
    "A balance between suburban tranquility and access to urban amenities like restaurants and theaters."
]
# Combine answers into a single preference string for semantic search
buyer_preferences = "\n".join(answers)
# Perform semantic search using the vectorstore
results = vectorstore.similarity_search(buyer_preferences, k=3)

print("Top 3 Listings Matching Buyer Preferences:\n")

for i, res in enumerate(results[:3], 1):
    print(f"Listing {i}:\n{res.page_content}\n{'-'*40}")

Top 3 Listings Matching Buyer Preferences:

Listing 1:
Neighborhood: Meadowbrook
Price: $480,000
Bedrooms: 3
Bathrooms: 2.5
House Size: 2,100 sqft
Description: This family-friendly home in Meadowbrook features a welcoming front porch, updated kitchen, and cozy den. The backyard is fenced with a play area and fire pit. Newly painted interiors and move-in ready.
Neighborhood Description: Meadowbrook is a classic suburban neighborhood with a community pool, excellent schools, and local farmer’s markets. Ideal for growing families seeking comfort and convenience.
----------------------------------------
Listing 2:
Neighborhood: Green Oaks
Price: $800,000
Bedrooms: 3
Bathrooms: 2
House Size: 2,000 sqft
Description: Welcome to this eco-friendly oasis nestled in the heart of Green Oaks. This charming 3-bedroom, 2-bathroom home boasts energy-efficient features such as solar panels and a well-insulated structure. Natural light floods the living spaces, highlighting the beautiful hardwood floors

## Augmented Response Generation

### Logic for Searching and Augmenting Listing Descriptions

In [23]:
# This script augments the listing descriptions based on buyer preferences using a large language model.
from langchain.llms import OpenAI

# Updating temperature for more creative responses
llm = OpenAI(temperature=0.6)

def search_listings_by_preferences(buyer_answers):
    """
    Search for listings that match the buyer's preferences.
    """
    buyer_preferences = "\n".join(buyer_answers)
    return vectorstore.similarity_search(buyer_preferences, k=3)

def augment_listing_description(listing_text, buyer_preferences):
    """
    Augment the listing description based on buyer preferences using a large language model.
    """
    prompt = (
        "You are a helpful real estate agent. "
        "Given the following property listing and buyer preferences, rewrite the listing description to highlight features that match the buyer's interests. "
        "Do not change or invent any factual information. "
        "Make the listing more appealing to this buyer by emphasizing relevant aspects.\n\n"
        f"Buyer Preferences:\n{buyer_preferences}\n\n"
        f"Original Listing:\n{listing_text}\n\n"
        "Personalized Listing:"
    )
    results = llm(prompt)
    print("Personalized Listings Based on Buyer Preferences:\n")
    for i, res in enumerate(results, 1):
        personalized = augment_listing_description(res.page_content, buyer_preferences)
        print(f"Listing {i}:\n{personalized}\n{'-'*40}")