In [18]:
import os
import chromadb
import uuid
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain_core.runnables import RunnableSequence
from pydantic import BaseModel, Field
from typing import List
from chromadb.utils import embedding_functions


load_dotenv()

NUMBER_OF_LISTINGS = 15


In [2]:
llm = ChatOpenAI(
    model="gpt-4o",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
    api_key=os.environ["OPENAI_API_KEY"],
    base_url=os.environ["OPENAI_API_BASE"]
)

## Generating listings
Generate real estate listings using a Large Language Model. Generate at least 10 listings This can involve creating prompts for the LLM to produce descriptions of various properties. You'll use these listings to populate the database for testing and development of "HomeMatch".
An example of a listing might be:
```Neighborhood: Green Oaks
Price: $800,000
Bedrooms: 3
Bathrooms: 2
House Size: 2,000 sqft

Description: Welcome to this eco-friendly oasis nestled in the heart of Green Oaks. This charming 3-bedroom, 2-bathroom home boasts energy-efficient features such as solar panels and a well-insulated structure. Natural light floods the living spaces, highlighting the beautiful hardwood floors and eco-conscious finishes. The open-concept kitchen and dining area lead to a spacious backyard with a vegetable garden, perfect for the eco-conscious family. Embrace sustainable living without compromising on style in this Green Oaks gem.

Neighborhood Description: Green Oaks is a close-knit, environmentally-conscious community with access to organic grocery stores, community gardens, and bike paths. Take a stroll through the nearby Green Oaks Park or grab a cup of coffee at the cozy Green Bean Cafe. With easy access to public transportation and bike lanes, commuting is a breeze.
```

In [3]:
# Simple PyDantic model for real estate listings

class Listing(BaseModel):
    '''Real estate listing model'''
    id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Unique identifier for the listing")
    neighborhood: str = Field(description="The area where the property is located")
    price: int = Field(description="The price of the property")
    bedrooms: int = Field(description="Number of bedrooms in the property")
    bathrooms: int = Field(description="Number of bathrooms in the property")
    house_size: int = Field(description="Size of the property in square feet")
    description: str = Field(description="Description of the property")
    neighborhood_description: str = Field(description="Description of the neighborhood")
    
    def to_text(self):
        """Convert to searchable text format"""
        return f"""Neighborhood: {self.neighborhood}
        Price: ${self.price:,}
        Bedrooms: {self.bedrooms}
        Bathrooms: {self.bathrooms}
        House Size: {self.house_size} sqft
        Description: {self.description}
        Neighborhood Description: {self.neighborhood_description}"""
        
class Listings(BaseModel):
    """List of real estate listings"""
    listings: List[Listing]

In [5]:
listing1 = Listing(
    id=str(uuid.uuid4()),
    neighborhood="Green Oaks",
    price=800000,
    bedrooms=3,
    bathrooms=2,
    house_size=2000,
    description="Welcome to this eco-friendly oasis nestled in the heart of Green Oaks. This charming 3-bedroom, 2-bathroom home boasts energy-efficient features such as solar panels and a well-insulated structure.",
    neighborhood_description="Green Oaks is a close-knit, environmentally-conscious community with access to organic grocery stores, community gardens, and bike paths."
)

listing2 = Listing(
    id=str(uuid.uuid4()),
    neighborhood="Downtown",
    price=1200000,
    bedrooms=4,
    bathrooms=3,
    house_size=3200,
    description="Luxurious downtown penthouse with panoramic city views. This modern 4-bedroom, 3.5-bathroom unit features high-end appliances, marble countertops, and smart home technology.",
    neighborhood_description="Downtown offers vibrant nightlife, top restaurants, and cultural attractions within walking distance."
)

In [6]:
structured_llm = llm.with_structured_output(Listings)
response = structured_llm.invoke("Generate 15 real estate listings. Make sure to include a variety of neighborhoods, price ranges, and property types.")

In [7]:
response.listings

[Listing(id='c42c6ea1-b72d-47f1-9f25-ce2844609644', neighborhood='Downtown', price=750000, bedrooms=2, bathrooms=2, house_size=1200, description='Modern condo with city views and open floor plan.', neighborhood_description='Vibrant urban area with nightlife, dining, and shopping.'),
 Listing(id='8bd51a88-9440-4d50-a274-9f1a75d44bd6', neighborhood='Suburbia', price=450000, bedrooms=3, bathrooms=2, house_size=1800, description='Charming single-family home with a large backyard.', neighborhood_description='Quiet residential area with parks and good schools.'),
 Listing(id='5a9ba167-bbf6-4ccf-b40b-efd27393d31c', neighborhood='Beachside', price=1200000, bedrooms=4, bathrooms=3, house_size=2500, description='Luxury beachfront property with stunning ocean views.', neighborhood_description='Coastal area known for its beaches and water activities.'),
 Listing(id='932b1e89-30ed-4f88-9d56-0960cd0cf189', neighborhood='Historic District', price=600000, bedrooms=3, bathrooms=2, house_size=1600, desc

## Storing Listings in a Vector Database

In [8]:
# chroma_client.delete_collection("listings")

In [9]:
chroma_client = chromadb.Client()

default_ef = embedding_functions.DefaultEmbeddingFunction()

listings_collection = chroma_client.create_collection(
    name="listings",
    embedding_function=default_ef
)

In [10]:
documents = [listing.to_text() for listing in response.listings]
ids = [str(uuid.uuid4()) for _ in response.listings]

listings_collection.add(
    documents=documents,
    ids=ids
)

In [11]:
print(listings_collection.get(include=['embeddings']))

{'ids': ['a0972e56-27c8-44ea-9657-edc714810655', 'c32f34a6-1640-42de-96e1-0b0e9422b094', '1aab7787-e775-4b41-8425-3a3f5ac0e416', '9f5506ae-7716-4d57-b449-38c07efddb68', '27958a8b-b491-4f91-b08f-dd157d2348e9', 'fa668b0e-c63e-4989-840b-479cd6452cce', 'f443030e-5c96-457b-ad1c-18b21853bdf8', 'b8faee54-ef9e-4f6a-ac57-32f34e0ab9f3', 'c986827a-0f8c-4c03-9676-f747371521f5', '84bb5fff-25a6-4cb4-bb49-9c55234b9c0d', '3a91faac-724b-4c4d-be48-9144cdf0e468', '9477abef-4342-4a8f-8958-76d29f2561f2', '55862e3d-5c44-4b2c-a3ab-375d6864e8e1', 'e0f8d259-b907-4d3c-b8c0-00cd3a8968d3', '9607574d-84c5-410a-94d1-672de05ece9c'], 'embeddings': array([[ 0.13982712, -0.06268502,  0.01222483, ..., -0.00989543,
        -0.10547148,  0.02649353],
       [ 0.15418164, -0.04593146,  0.02412212, ...,  0.02151575,
        -0.13380758,  0.01679743],
       [ 0.15488228, -0.01966279,  0.03117318, ..., -0.00581289,
        -0.12166493,  0.05050588],
       ...,
       [ 0.11009039, -0.05094731,  0.0213808 , ..., -0.0153947 ,

In [12]:
results = listings_collection.query(
    query_texts=["I'm looking for a modern home in a vibrant neighborhood with good schools."],
    n_results=3
)
print(results)

{'ids': [['9477abef-4342-4a8f-8958-76d29f2561f2', 'c986827a-0f8c-4c03-9676-f747371521f5', 'c32f34a6-1640-42de-96e1-0b0e9422b094']], 'embeddings': None, 'documents': [['Neighborhood: Greenfield\n        Price: $400,000\n        Bedrooms: 3\n        Bathrooms: 2\n        House Size: 1500 sqft\n        Description: Eco-friendly home with solar panels and green features.\n        Neighborhood Description: Suburban area with community gardens and green spaces.', 'Neighborhood: Old Town\n        Price: $550,000\n        Bedrooms: 3\n        Bathrooms: 2\n        House Size: 1700 sqft\n        Description: Charming cottage with a garden and vintage appeal.\n        Neighborhood Description: Historic area with cobblestone streets and local shops.', 'Neighborhood: Suburbia\n        Price: $450,000\n        Bedrooms: 3\n        Bathrooms: 2\n        House Size: 1800 sqft\n        Description: Charming single-family home with a large backyard.\n        Neighborhood Description: Quiet residential 

## Building User Preference Interface

In [13]:
questions = [   
    "How big do you want your house to be?" 
    "What are 3 most important things for you in choosing this property?", 
    "Which amenities would you like?", 
    "Which transportation options are important to you?",
    "How urban do you want your neighborhood to be?",   
]

answers = [
    "A comfortable three-bedroom house with a spacious kitchen and a cozy living room.",
    "A quiet neighborhood, good local schools, and convenient shopping options.",
    "A backyard for gardening, a two-car garage, and a modern, energy-efficient heating system.",
    "Easy access to a reliable bus line, proximity to a major highway, and bike-friendly roads.",
    "A balance between suburban tranquility and access to urban amenities like restaurants and theaters."
]

In [19]:
# Buyer Preference Parsing:
buyer_input = " ".join(answers)

template = """
Extract structured buyer preferences from the following input:
{buyer_input}

Return it as a structured summary for in the following format:
    id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Unique identifier for the listing")
    neighborhood: str = Field(description="The area where the property is located")
    price: int = Field(description="The price of the property")
    bedrooms: int = Field(description="Number of bedrooms in the property")
    bathrooms: float = Field(description="Number of bathrooms in the property")
    house_size: int = Field(description="Size of the property in square feet")
    description: str = Field(description="Description of the property")
    neighborhood_description: str = Field(description="Description of the neighborhood") 
    
If smth is missing, remove it for the output.
"""
prompt = PromptTemplate.from_template(template)
chain = prompt | llm
structured_preferences = chain.invoke({"buyer_input": buyer_input})

In [None]:
structured_preferences.content

str

## Searching Based on Preferences

In [None]:
# Semantic Search
results = listings_collection.query(
    query_texts=[structured_preferences.content],
    n_results=3
)
print(results)

{'ids': [['c32f34a6-1640-42de-96e1-0b0e9422b094', 'fa668b0e-c63e-4989-840b-479cd6452cce', '9477abef-4342-4a8f-8958-76d29f2561f2']], 'embeddings': None, 'documents': [['Neighborhood: Suburbia\n        Price: $450,000\n        Bedrooms: 3\n        Bathrooms: 2\n        House Size: 1800 sqft\n        Description: Charming single-family home with a large backyard.\n        Neighborhood Description: Quiet residential area with parks and good schools.', 'Neighborhood: Uptown\n        Price: $800,000\n        Bedrooms: 3\n        Bathrooms: 2\n        House Size: 2000 sqft\n        Description: Elegant townhouse with rooftop terrace and city views.\n        Neighborhood Description: Trendy area with boutiques, cafes, and cultural venues.', 'Neighborhood: Greenfield\n        Price: $400,000\n        Bedrooms: 3\n        Bathrooms: 2\n        House Size: 1500 sqft\n        Description: Eco-friendly home with solar panels and green features.\n        Neighborhood Description: Suburban area with 

In [None]:
# Listing Retrieval
matching_listings = listings_collection.get(ids=results['ids'][0], include=['documents'])
matching_listings


{'ids': ['c32f34a6-1640-42de-96e1-0b0e9422b094',
  'fa668b0e-c63e-4989-840b-479cd6452cce',
  '9477abef-4342-4a8f-8958-76d29f2561f2'],
 'embeddings': None,
 'documents': ['Neighborhood: Suburbia\n        Price: $450,000\n        Bedrooms: 3\n        Bathrooms: 2\n        House Size: 1800 sqft\n        Description: Charming single-family home with a large backyard.\n        Neighborhood Description: Quiet residential area with parks and good schools.',
  'Neighborhood: Uptown\n        Price: $800,000\n        Bedrooms: 3\n        Bathrooms: 2\n        House Size: 2000 sqft\n        Description: Elegant townhouse with rooftop terrace and city views.\n        Neighborhood Description: Trendy area with boutiques, cafes, and cultural venues.',
  'Neighborhood: Greenfield\n        Price: $400,000\n        Bedrooms: 3\n        Bathrooms: 2\n        House Size: 1500 sqft\n        Description: Eco-friendly home with solar panels and green features.\n        Neighborhood Description: Suburban are

In [29]:
# 1. Prepare your input text
documents_text = "\n\n".join(matching_listings["documents"])

# 2. Set up the LLM with structured output
structured_llm = llm.with_structured_output(Listings)

# 3. Call the LLM
listings_obj = structured_llm.invoke(
    f"""Parse the following real estate listings and convert them into structured data.

Each listing contains details like neighborhood, price, bedrooms, bathrooms, house size, description, and neighborhood description.

Listings:
{documents_text}
"""
)

In [30]:
listings_obj

Listings(listings=[Listing(id='2588738a-9682-427f-ba1a-24ac9a8ffdc2', neighborhood='Suburbia', price=450000, bedrooms=3, bathrooms=2, house_size=1800, description='Charming single-family home with a large backyard.', neighborhood_description='Quiet residential area with parks and good schools.'), Listing(id='567a1e00-d1bb-407c-a23d-821c2598bcae', neighborhood='Uptown', price=800000, bedrooms=3, bathrooms=2, house_size=2000, description='Elegant townhouse with rooftop terrace and city views.', neighborhood_description='Trendy area with boutiques, cafes, and cultural venues.'), Listing(id='bb9bf25a-38f2-4367-8c96-e97f3ab04f3d', neighborhood='Greenfield', price=400000, bedrooms=3, bathrooms=2, house_size=1500, description='Eco-friendly home with solar panels and green features.', neighborhood_description='Suburban area with community gardens and green spaces.')])

## Personalizing Listing Descriptions

In [None]:
personalized_descriptions = []

for listing in listings_obj.listings:
    prompt = f"""
You are a helpful assistant helping match homes to buyers.

The buyer has expressed these preferences:
{structured_preferences.content}

Here is a listing:
Neighborhood: {listing.neighborhood}
Price: ${listing.price}
Bedrooms: {listing.bedrooms}
Bathrooms: {listing.bathrooms}
House Size: {listing.house_size} sqft
Description: {listing.description}
Neighborhood Description: {listing.neighborhood_description}

Your task is to rewrite or enhance the property description in a way that highlights the features most aligned with the buyer’s preferences. Do not change any factual information.

Respond with just the improved listing description.
"""

personalized_description = llm.invoke(prompt)
personalized_descriptions.append((listing, personalized_description))

In [34]:
for listing, description in personalized_descriptions:
    print(f"🏡 {listing.neighborhood} — ${listing.price}")
    print(f"{description.content}\n")

🏡 Suburbia — $450000
Charming single-family home featuring three bedrooms and two bathrooms, perfect for comfortable living. Enjoy a spacious kitchen and a cozy living room, ideal for family gatherings. The property boasts a large backyard, perfect for gardening enthusiasts, and includes a two-car garage. Located in a quiet residential area with parks and good schools, this home offers easy access to a reliable bus line and proximity to a major highway. Experience the balance between suburban tranquility and urban amenities, with convenient shopping options and bike-friendly roads. The home is equipped with a modern, energy-efficient heating system, ensuring comfort and sustainability.

🏡 Uptown — $800000
Elegant three-bedroom townhouse with a rooftop terrace offering stunning city views. This home features a spacious kitchen and a cozy living room, perfect for comfortable living. Located in a trendy neighborhood, it provides easy access to boutiques, cafes, and cultural venues, while 