Step 1: Setting Up the Python Application

In [19]:
# Installed
#%pip install pandas
#%pip install chromadb

# Created a local env
# python3 -m venv ./venv
# source ./venv/bin/activate
# Registered my local venv
# ipython kernel install --name "homematch-venv-kernel" --user

# Or run
# pip install -r ./requirements.txt

UsageError: Line magic function `%python3` not found (But cell magic `%%python3` exists, did you mean that instead?).


Step 2: Generating Real Estate Listings

Generate real estate listings using a Large Language Model. Generate at least 10 listings This can involve creating prompts for the LLM to produce descriptions of various properties. An example of a listing might be:

Neighborhood: Green Oaks
Price: 800000 ‚Ç¨
Bedrooms: 3
Bathrooms: 2
House Size: 2000 m2
Description: Welcome to this eco-friendly oasis nestled in the heart of Green Oaks. This charming 3-bedroom, 2-bathroom home boasts energy-efficient features such as solar panels and a well-insulated structure. Natural light floods the living spaces, highlighting the beautiful hardwood floors and eco-conscious finishes. The open-concept kitchen and dining area lead to a spacious backyard with a vegetable garden, perfect for the eco-conscious family. Embrace sustainable living without compromising on style in this Green Oaks gem.
Neighborhood Description: Green Oaks is a close-knit, environmentally-conscious community with access to organic grocery stores, community gardens, and bike paths. Take a stroll through the nearby Green Oaks Park or grab a cup of coffee at the cozy Green Bean Cafe. With easy access to public transportation and bike lanes, commuting is a breeze.


In [2]:
import os
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
import pandas as pd
import re
from langchain.schema import Document

os.environ["OPENAI_API_KEY"] = ""
os.environ["OPENAI_API_BASE"] = "https://openai.vocareum.com/v1"

# 1. Load CSV
df = pd.read_csv('listings.csv')

# 2. Define the prompt template
template = """
You are a real estate listing generator. Given the following data, generate a professional and engaging house listing and a separate neighborhood description.
On the description be specific about the house amenities, ambience of the house and the neighborhood, accessibility to transports and the type of transports, having in mind the condition and the location type.

Neighborhood: {neighborhood}
Price: {price} ‚Ç¨
Bedrooms: {bedrooms}
Bathrooms: {bathrooms}
House Size: {house_size} m2
Location Type: {location_type}
Condition: {condition}

Output format:

Description:
[Generated description]

Neighborhood Description:
[Generated neighborhood description]
"""

prompt = PromptTemplate(
    input_variables=["neighborhood", "price", "bedrooms", "bathrooms", "house_size", "location_type", "condition"],
    template=template,
)

# 3. Create LLMChain
llm = ChatOpenAI(temperature=0.7, model_name="gpt-3.5-turbo", max_tokens = 500)  # Or use "gpt-3.5-turbo"
chain = LLMChain(llm=llm, prompt=prompt)

# 4. Generate the listings descriptionshome
generated_docs = []
for _, row in df.iterrows():
    result = chain.run({
        "neighborhood": row['neighborhood'],
        "price": row['price'],
        "bedrooms": row['bedrooms'],
        "bathrooms": row['bathrooms'],
        "house_size": row['house_size'],
        "location_type": row['location_type'],
        "condition":  row['condition']
    })

    parts = re.split(r'\n\s*Neighborhood Description:\s*', result.strip(), flags=re.IGNORECASE)
    description = parts[0].replace("Description:", "").strip()
    neighborhood_description = parts[1].strip() if len(parts) > 1 else ""

    metadata = {
        "neighborhood": row['neighborhood'],
        "price": row['price'],
        "bedrooms": row['bedrooms'],
        "bathrooms": row['bathrooms'],
        "house_size": row['house_size'],
        "location_type": row['location_type'],
        "condition":  row['condition'],
        "reserved":  row['reserved']
    }

    content = f"{description}\n\n{neighborhood_description}"
    generated_docs.append(Document(page_content=content, metadata=metadata))

  llm = ChatOpenAI(temperature=0.7, model_name="gpt-3.5-turbo", max_tokens = 500)  # Or use "gpt-3.5-turbo"
  chain = LLMChain(llm=llm, prompt=prompt)
  result = chain.run({


Step 3: Storing Listings in a Vector Database & 

* Vector Database Setup: Initialize and configure ChromaDB or a similar vector database to store real estate listings.
* Generating and Storing Embeddings: Convert the LLM-generated listings into suitable embeddings that capture the semantic content of each listing, and store these embeddings in the vector database.

In [10]:
print(generated_docs)

from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings

CHROMA_PATH = './chroma_db'
COLLECTION_NAME = 'listings'

embedding_model = OpenAIEmbeddings()

vector_store = Chroma(
    collection_name=COLLECTION_NAME,
    embedding_function=embedding_model,
    persist_directory=CHROMA_PATH
)

#vector_store.add_documents(generated_docs)

def get_all():
    db = Chroma(
        collection_name=COLLECTION_NAME,
        embedding_function=embedding_model,
        persist_directory=CHROMA_PATH
    )
    return db.get()

def update_by_id(id):
    db = Chroma(
        collection_name=COLLECTION_NAME,
        embedding_function=embedding_model,
        persist_directory=CHROMA_PATH
    )
    return db.get()

def reserve_by_id(id):
    db = Chroma(
        collection_name=COLLECTION_NAME,
        embedding_function=embedding_model,
        persist_directory=CHROMA_PATH
    )
    results = db.get([id], include=["metadatas"])

    if len(results["metadatas"]) == 0:
        return False
    
    current_meta = results["metadatas"][0]
    collection = db._collection
    collection.update(
        ids=[id],
        metadatas=[{"reserved": True}]
    )
    return True

def similarity_search_on_chroma(query, number_of_listings):
    db = Chroma(
        collection_name=COLLECTION_NAME,
        embedding_function=embedding_model,
        persist_directory=CHROMA_PATH
    )
    return db.similarity_search(query, k=number_of_listings)

def search_listings(query, min_bedrooms=0, min_bathrooms=0, min_size=0, max_size=99999, number_of_listings = 1):
    
    results = similarity_search_on_chroma(query, number_of_listings)
    
    filtered = []
    for doc in results:
        meta = doc.metadata
        bedrooms = meta.get("bedrooms", 0)
        bathrooms = meta.get("bathrooms", 0)
        size = meta.get("house_size", 0)

        if bedrooms >= min_bedrooms and bathrooms >= min_bathrooms and min_size <= size <= max_size:
            filtered.append(doc)
    
    return filtered

[Document(metadata={'neighborhood': 'Green Oaks', 'price': 800000, 'bedrooms': 3, 'bathrooms': 2, 'house_size': 2000, 'location_type': 'Mountain', 'condition': 'ranch house in perfect condition', 'reserved': False}, page_content='Welcome to this charming ranch house located in the picturesque neighborhood of Green Oaks. This 3 bedroom, 2 bathroom home offers a cozy ambience with plenty of space for your family. The house spans 2000 m2 and is in perfect condition, ready for you to move in and make it your own. The living room features a fireplace, perfect for cozy nights in, and the spacious kitchen is ideal for hosting family gatherings. The bedrooms are bright and airy, providing a peaceful retreat after a long day. The neighborhood is tranquil and surrounded by beautiful mountains, offering a serene atmosphere for you to enjoy.\n\nGreen Oaks is a peaceful neighborhood nestled in the mountains, offering a tranquil escape from the hustle and bustle of city life. The area is known for i

Step 4: Building the User Preference Interface & Step 5: Searching Based on Preferences & Step 6: Personalizing Listing Descriptions

* Collect buyer preferences, such as the number of bedrooms, bathrooms, location, and other specific requirements from a set of questions or telling the buyer to enter their preferences in natural language. You can hard-code the buyer preferences in questions and answers, or collect them interactively however you'd like
* Buyer Preference Parsing: Implement logic to interpret and structure these preferences for querying the vector database.
* Semantic Search Implementation: Use the structured buyer preferences to perform a semantic search on the vector database, retrieving listings that most closely match the user's requirements.
* Listing Retrieval Logic: Fine-tune the retrieval algorithm to ensure that the most relevant listings are selected based on the semantic closeness to the buyer‚Äôs preferences.
* LLM Augmentation: For each retrieved listing, use the LLM to augment the description, tailoring it to resonate with the buyer‚Äôs specific preferences. This involves subtly emphasizing aspects of the property that align with what the buyer is looking for.
* Maintaining Factual Integrity: Ensure that the augmentation process enhances the appeal of the listing without altering factual information.

In [24]:
import gradio as gr
from langchain.agents import Tool, initialize_agent
from langchain.agents.agent_types import AgentType
from pydantic import BaseModel, Field
from pprint import pprint

# Model to represent the input field for the agents tools
class NoInput(BaseModel):
    pass

# Model to represent the input field for the agents tools
class ID(BaseModel):
    id: str = Field(..., description="The ID to look up")

def format_listings_context(docs):
    formatted = ""
    for doc in docs:
        listing = doc.page_content
        metadata = doc.metadata
        formatted += f"Listing: {listing}\n"
        formatted += f"Listing Metadata: {metadata}\n\n"
    return formatted

def rag_listings_search(listings, user_input):
    listing_context = format_listings_context(listings)
    
    rag_prompt = PromptTemplate(
    input_variables=["listings", "user_input"],
    template="""
        You are a real estate assistant helping a homebuyer.
        
        Here are some listings:
        {listing_context}
        
        User's request:
        {user_input}
        
        Based on these listings and their metadata, write a tailored response summarizing the best options or recommendations, subtly emphasizing aspects of the property that align with what the buyer is looking for.
        """
    )

    chain = LLMChain(llm=llm, prompt=rag_prompt)

    return chain.run(listing_context=listing_context, user_input=user_input)

def list_available_listings(str = ""):
    docs = get_all()
    
    results = []
    for id, doc, meta in zip(docs['ids'], docs['documents'], docs['metadatas']):
        results.append(f"- id: {id}: {doc} : metadata{meta}\n\n")
    return "\n".join(results) or "No available listings."

def reserve_listing(listing_id: str):
    reserved = reserve_by_id(listing_id)

    if reserved:
        return f"Listing {listing_id} updated."

    return f"Listing {listing_id} not found."

tools = [
    Tool(name="ListListings", func=list_available_listings, description="List all available property listings."),
    Tool(name="ReserveListing", func=reserve_listing, description="Reserve a listing by ID. Input should be the listing ID.", args_schema=ID)
]

agent_executor = initialize_agent(
    tools, 
    llm, 
    agent=AgentType.OPENAI_FUNCTIONS, 
    verbose=True
)

def realtor_chat(user_msg, chat_history=[]):
    response = agent_executor.run(user_msg)
    
    chat_history.append((user_msg, response))
    return chat_history, chat_history

# Tab representing the realtor assistant chat bot
def create_chat_tab():
    with gr.Blocks() as chat_ui:
        gr.Markdown("## üßë‚Äçüíº Assistant for the Realtor")

        chatbot = gr.Chatbot()
        msg = gr.Textbox(label="Ask the realtor assistant...")
        clear = gr.Button("Clear Chat")

        state = gr.State([])

        msg.submit(realtor_chat, [msg, state], [chatbot, state])
        clear.click(lambda: ([], []), None, [chatbot, state])

    return chat_ui

def handle_form(
    min_size_str,
    max_size_str,
    min_bedrooms,
    min_bathrooms,
    important_factors,
    amenities,
    transportation,
    urban_description
):
    min_size = 0 if min_size_str == "No limit" else int(min_size_str)
    max_size = 99999 if max_size_str == "No limit" else int(max_size_str)
    
    query = (
        f"Important: {important_factors}. "
        f"Amenities: {', '.join(amenities)}. "
        f"Transportation: {', '.join(transportation)}. "
        f"Urban feel: {urban_description}."
    )

    results = search_listings(
        query=query,
        min_bedrooms=min_bedrooms,
        min_bathrooms=min_bathrooms,
        min_size=min_size,
        max_size=max_size,
        number_of_listings=5
    )

    if not results:
        return "No listings found."

    rag_result = rag_listings_search(results, query)

    return rag_result

# Tab representing the user search
def create_user_searching_tab():
    # Size options for dropdowns
    size_options = [str(i) for i in range(40, 181, 20)]
    size_options.extend([str(i) for i in range(200, 451, 50)])
    size_options.extend([str(i) for i in range(500, 1501, 100)])
    size_options.append("No limit")
    
    # Amenities and transport options
    amenity_choices = [
        "Gym", "Swimming Pool", "Parking", "Garden", "Rooftop",
        "Security", "Smart Home Features"
    ]
    
    transport_choices = [
        "Subway", "Bus", "Bike Paths", "Walking Trails",
        "Electric Charging", "Highway Access"
    ]
    
    with gr.Blocks() as demo:
        with gr.Row():
            with gr.Column(scale=1):
                gr.Markdown("### üß∞ Filters")
    
                min_size = gr.Dropdown(choices=size_options, label="Minimum size of the house?", value="No limit")
                max_size = gr.Dropdown(choices=size_options, label="Maximum size of the house?", value="No limit")
                min_bedrooms = gr.Slider(0, 100, value=1, step=1, label="Minimum Bedrooms")
                min_bathrooms = gr.Slider(0, 100, value=1, step=1, label="Minimum Bathrooms")
    
                important_factors = gr.Textbox(label="Top 3 things you care about")
                amenities = gr.Dropdown(choices=amenity_choices, multiselect=True, label="Preferred Amenities")
                transportation = gr.Dropdown(choices=transport_choices, multiselect=True, label="Preferred Transportation")
                urban_description = gr.Textbox(label="How urban should the neighborhood feel?")
    
                search_btn = gr.Button("üîç Search Listings")
    
            with gr.Column(scale=2):
                gr.Markdown("### üìã Matching Listings")
                results_box = gr.Markdown()
    
        search_btn.click(
            fn=handle_form,
            inputs=[
                min_size, max_size, min_bedrooms, min_bathrooms,
                important_factors, amenities, transportation, urban_description
            ],
            outputs=results_box
        )

    return demo

with gr.Blocks() as app:
    with gr.Tabs():
        with gr.Tab("üè† Search Listings"):
            create_user_searching_tab()

        with gr.Tab("üí¨ Realtor Chatbot"):
            create_chat_tab()
    
# Run it
app.launch()

  chatbot = gr.Chatbot()


* Running on local URL:  http://127.0.0.1:7871
* To create a public link, set `share=True` in `launch()`.






[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `ListListings` with `all`


[0m[36;1m[1;3m- id: bcce96d1-d287-421f-99b4-41553e6cd027: Welcome to this charming ranch house located in the picturesque neighborhood of Green Oaks. This 3 bedroom, 2 bathroom home offers a cozy ambience with plenty of space for your family. The house spans 2000 m2 and is in perfect condition, ready for you to move in and make it your own. The living room features a fireplace, perfect for cozy nights in, and the spacious kitchen is ideal for hosting family gatherings. The bedrooms are bright and airy, providing a peaceful retreat after a long day. The neighborhood is tranquil and surrounded by beautiful mountains, offering a serene atmosphere for you to enjoy.

Green Oaks is a peaceful neighborhood nestled in the mountains, offering a tranquil escape from the hustle and bustle of city life. The area is known for its natural beauty, with lush greenery and stunning mountain views. Resi