In [38]:
!pip install -r requirements.txt

Defaulting to user installation because normal site-packages is not writeable








In [39]:
# Import Libraries
import os
import re
import pandas as pd
import chromadb
from langchain.llms import OpenAI
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores.chroma import Chroma

In [40]:
os.environ["OPENAI_API_KEY"] = "YOUR API KEY"
os.environ["OPENAI_API_BASE"] = "https://openai.vocareum.com/v1"

OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
MODEL_NAME = 'gpt-4'
TEMPERATURE = 0.1

# Initialize LLM
llm = OpenAI(
    model_name=MODEL_NAME,
    temperature=TEMPERATURE,
    max_tokens = 4096,
    openai_api_key=OPENAI_API_KEY
)



In [41]:
# Initialize ChromaDB client and embedding model
client = chromadb.Client()
embedding_model = OpenAIEmbeddings()
collection = client.get_or_create_collection("listings_collections")

In [42]:
def generate_llm_listing(neighborhood, price, bedrooms, bathrooms, house_size, property_type, amenities):
    """
    Generate a real estate listing based on provided details.
    :param neighborhood: Neighborhood name.
    :param price: Price of the property.
    :param bedrooms: Number of bedrooms.
    :param bathrooms: Number of bathrooms.
    :param house_size: Size of the house in sqft.
    :param property_type: Type of property (house, condo, apartment).
    :param amenities: List of amenities.
    :return: Generated property description and neighborhood description.
    """
    prompt = (f"You are an expert real estate agent. Generate a realistic real estate listing for Hoboken, NJ with the following details:\n"
              f"Neighborhood: {neighborhood}\n"
              f"Price: ${price}\n"
              f"Bedrooms: {bedrooms}\n"
              f"Bathrooms: {bathrooms}\n"
              f"House Size: {house_size} sqft\n"
              f"Property Type: {property_type}\n"
              f"Amenities: {', '.join(amenities)}\n\n"
              "Provide a property description and a neighborhood description.")
    return llm(prompt).strip()

In [43]:
def add_to_chromadb(collection, listing_id, description, metadata, embedding_model):
    """
    Add a listing to the ChromaDB collection.
    :param collection: ChromaDB collection instance.
    :param listing_id: Unique ID for the listing.
    :param description: Description of the property.
    :param metadata: Metadata related to the property.
    :param embedding_model: Model used for generating embeddings.
    """
    embedding = embedding_model.embed_documents([description])[0]
    collection.add(
        ids=[listing_id],
        embeddings=[embedding],
        metadatas=[metadata],
        documents=[description]  # Include the document (description) here
    )

In [44]:
def parse_answers(answers):
    preferences = {}

    # Extract the number of bedrooms from the answers (list of integers)
    bedrooms = [int(b) for b in answers[0]]
    preferences["bedrooms"] = bedrooms if bedrooms else "Not specified"

    # Extract the number of bathrooms from the answers (list of doubles)
    bathrooms = [float(b) for b in answers[1]]
    preferences["bathrooms"] = bathrooms if bathrooms else "Not specified"

    # Extract important features for the property
    preferences["important_features"] = answers[2]

    # Extract amenities from the answers
    preferences["amenities"] = answers[3]

    # Extract transportation preferences
    preferences["transportation"] = answers[4]

    # Extract neighborhood preference
    preferences["urban_preference"] = answers[5]

    # Extract price range (single lower and upper bound)
    price_pattern = r"\$(\d+)"
    lower_upper = [int(p) for p in re.findall(price_pattern, answers[6][0], re.IGNORECASE)]
    preferences["price_range"] = lower_upper if lower_upper else "Not specified"

    # Extract property type preference
    preferences["property_type"] = answers[7][0]  # Assuming single select for property type

    return preferences

In [45]:
def search_chromadb(preferences, collection, embedding_model):
    """
    Searches the embeddings of the vector database and included metadata filters.
    :param preferences: The parsed user preferences.
    :param collection: The ChromaDB collection containing the listings and embeddings.
    :param embedding_model: The model used to generate embeddings for queries.
    :return: A list of matching listings.
    """
    # Construct the search query text for semantic search
    search_query = f"""
    I'm looking for a property with {', '.join(map(str, preferences['bedrooms']))} bedrooms and {', '.join(map(str, preferences['bathrooms']))} bathrooms. 
    It should have features such as {', '.join(preferences['important_features'])}.
    It should include amenities like {', '.join(preferences['amenities'])}. 
    I'm particularly interested in a neighborhood that offers {preferences['urban_preference']} and good transportation options, including {', '.join(preferences['transportation'])}.
    My price range is from ${preferences['price_range'][0]} to ${preferences['price_range'][1]}.
    """

    # Generate an embedding for the search query
    query_embedding = embedding_model.embed_query(search_query)

    # Define metadata filters
    metadata_filters = {
        "$and": [
            {"bedrooms": {"$in": user_preferences["bedrooms"]}},
            {"bathrooms": {"$in": user_preferences["bathrooms"]}},
            {"price": {"$gte": user_preferences["price_range"][0]}},
            {"price": {"$lte": user_preferences["price_range"][1]}},
            {"property_type": user_preferences["property_type"]}
        ]
    }

    # Perform the semantic search with metadata filters
    search_results = collection.query(
        query_embeddings=[query_embedding],  # Embedding for matching listings
        where=metadata_filters,              # Metadata filter criteria
        n_results=3, # Return top matching listings
        include = ["documents","distances"]
    )

    return search_results

In [46]:
def augment_result(user_preferences, document):
    """
    Augment the search result by generating an explanation of why the listing is perfect for the user.
    :param user_preferences: Dictionary of user preferences.
    :param document: The nearest search result's document (description of the listing).
    :return: LLM response.
    """
    # Construct user preferences description
    preferences_description = (
        f"User is looking for a property with {user_preferences['bedrooms']} bedrooms, "
        f"{user_preferences['bathrooms']} bathrooms, and a price range from {user_preferences['price_range'][0]} "
        f"to {user_preferences['price_range'][1]}. The preferred property type is {user_preferences['property_type']}."
    )

    # Construct the prompt for the LLM
    prompt = (
        f"Based on the user's preferences: {preferences_description}\n"
        f"Explain why the following listing is perfect for the user without changing facts about the listing:\n"
        f"{document}"
    )

    return llm(prompt).strip()

# If first time running, generate sample listings
Commented out to save on tokens since it has already been run and saved as CSV

In [47]:
# # List of neighborhoods and property details in Hoboken, NJ
# listings_data = [
# {"neighborhood": "Downtown Hoboken", "price": 950000, "bedrooms": 2, "bathrooms": 2, "house_size": 1200, "property_type": "condo", "amenities": ["gym", "pool"]},
# {"neighborhood": "Uptown Hoboken", "price": 1200000, "bedrooms": 3, "bathrooms": 2.5, "house_size": 1500, "property_type": "condo", "amenities": ["parking", "gym"]},
# {"neighborhood": "Midtown Hoboken", "price": 800000, "bedrooms": 2, "bathrooms": 1.5, "house_size": 1100, "property_type": "apartment", "amenities": ["parking", "roof deck"]},
# {"neighborhood": "Southwest Hoboken", "price": 700000, "bedrooms": 1, "bathrooms": 1, "house_size": 900, "property_type": "apartment", "amenities": ["gym", "parking"]},
# {"neighborhood": "Northwest Hoboken", "price": 850000, "bedrooms": 2, "bathrooms": 2, "house_size": 1000, "property_type": "condo", "amenities": ["pool", "gym"]},
# {"neighborhood": "Washington Street", "price": 1300000, "bedrooms": 3, "bathrooms": 2.5, "house_size": 1600, "property_type": "condo", "amenities": ["parking", "gym"]},
# {"neighborhood": "Hoboken Waterfront", "price": 1500000, "bedrooms": 4, "bathrooms": 3, "house_size": 2000, "property_type": "house", "amenities": ["pool", "garage"]},
# {"neighborhood": "Jackson Street", "price": 900000, "bedrooms": 2, "bathrooms": 2, "house_size": 1200, "property_type": "condo", "amenities": ["gym", "parking"]},
# {"neighborhood": "Clinton Street", "price": 1100000, "bedrooms": 3, "bathrooms": 2, "house_size": 1400, "property_type": "condo", "amenities": ["roof deck", "parking"]},
# {"neighborhood": "Garden Street", "price": 750000, "bedrooms": 1, "bathrooms": 1, "house_size": 800, "property_type": "apartment", "amenities": ["gym", "parking"]},
# {"neighborhood": "Adams Street", "price": 1200000, "bedrooms": 3, "bathrooms": 2, "house_size": 1550, "property_type": "house", "amenities": ["garage", "garden"]},
# {"neighborhood": "River Street", "price": 950000, "bedrooms": 2, "bathrooms": 1.5, "house_size": 1150, "property_type": "apartment", "amenities": ["pool", "gym"]},
# {"neighborhood": "Jefferson Street", "price": 850000, "bedrooms": 2, "bathrooms": 2, "house_size": 1000, "property_type": "condo", "amenities": ["gym", "roof deck"]},
# {"neighborhood": "Madison Street", "price": 1300000, "bedrooms": 3, "bathrooms": 2.5, "house_size": 1700, "property_type": "house", "amenities": ["parking", "garden"]},
# {"neighborhood": "Grand Street", "price": 1100000, "bedrooms": 4, "bathrooms": 3, "house_size": 2000, "property_type": "house", "amenities": ["pool", "garage"]},
# {"neighborhood": "Bloomfield Street", "price": 950000, "bedrooms": 2, "bathrooms": 2, "house_size": 1200, "property_type": "condo", "amenities": ["gym", "parking"]},
# {"neighborhood": "Monroe Street", "price": 800000, "bedrooms": 2, "bathrooms": 1.5, "house_size": 1100, "property_type": "apartment", "amenities": ["parking", "gym"]},
# {"neighborhood": "Willow Avenue", "price": 1400000, "bedrooms": 3, "bathrooms": 2.5, "house_size": 1600, "property_type": "house", "amenities": ["garage", "garden"]},
# {"neighborhood": "Park Avenue", "price": 1500000, "bedrooms": 4, "bathrooms": 3.5, "house_size": 2100, "property_type": "house", "amenities": ["pool", "garage"]},
# {"neighborhood": "Washington Street", "price": 1200000, "bedrooms": 3, "bathrooms": 2.5, "house_size": 1500, "property_type": "condo", "amenities": ["parking", "gym"]},
# {"neighborhood": "Hoboken Avenue", "price": 1400000, "bedrooms": 4, "bathrooms": 3, "house_size": 2200, "property_type": "house", "amenities": ["garden", "garage"]},
# {"neighborhood": "Garden Street", "price": 950000, "bedrooms": 2, "bathrooms": 2, "house_size": 1150, "property_type": "condo", "amenities": ["gym", "roof deck"]},
# {"neighborhood": "1st Street", "price": 850000, "bedrooms": 2, "bathrooms": 1.5, "house_size": 1000, "property_type": "apartment", "amenities": ["parking", "gym"]},
# {"neighborhood": "2nd Street", "price": 1200000, "bedrooms": 3, "bathrooms": 2.5, "house_size": 1500, "property_type": "condo", "amenities": ["parking", "gym"]},
# {"neighborhood": "3rd Street", "price": 900000, "bedrooms": 2, "bathrooms": 2, "house_size": 1200, "property_type": "condo", "amenities": ["gym", "parking"]},
# {"neighborhood": "4th Street", "price": 1100000, "bedrooms": 3, "bathrooms": 2, "house_size": 1400, "property_type": "condo", "amenities": ["roof deck", "parking"]},
# {"neighborhood": "5th Street", "price": 750000, "bedrooms": 1, "bathrooms": 1, "house_size": 800, "property_type": "apartment", "amenities": ["gym", "parking"]},
# {"neighborhood": "6th Street", "price": 1400000, "bedrooms": 4, "bathrooms": 3.5, "house_size": 2100, "property_type": "house", "amenities": ["pool", "garage"]},
# {"neighborhood": "7th Street", "price": 850000, "bedrooms": 2, "bathrooms": 2, "house_size": 1000, "property_type": "condo", "amenities": ["gym", "parking"]},
# {"neighborhood": "8th Street", "price": 1300000, "bedrooms": 3, "bathrooms": 2.5, "house_size": 1600, "property_type": "condo", "amenities": ["parking", "gym"]},
# {"neighborhood": "9th Street", "price": 950000, "bedrooms": 2, "bathrooms": 2, "house_size": 1200, "property_type": "condo", "amenities": ["gym", "pool"]},
# ]


# # Generate listings for each property
# listings = []
# for data in listings_data:
#     listing_text = generate_llm_listing(
#     data["neighborhood"],
#     data["price"],
#     data["bedrooms"],
#     data["bathrooms"],
#     data["house_size"],
#     data["property_type"],
#     data["amenities"]
#     )
#     listings.append({
#     "neighborhood": data["neighborhood"],
#     "price": data["price"],
#     "bedrooms": data["bedrooms"],
#     "bathrooms": data["bathrooms"],
#     "house_size": data["house_size"],
#     "property_type": data["property_type"],
#     "amenities": data["amenities"],
#     "description": listing_text
#     })

# # Convert to a DataFrame and save to CSV
# df_listings = pd.DataFrame(listings)
# df_listings.to_csv("listings.csv", index=False)

In [48]:
# df_listings.head()

# If not first time running, load sample listings

In [49]:
# Load real estate listings from CSV file
listings_df = pd.read_csv("listings.csv")

# Add listings to the vector database
for index, row in listings_df.iterrows():
    metadata = {
    "neighborhood": row["neighborhood"],
    "price": row["price"],
    "bedrooms": row["bedrooms"],
    "bathrooms": row["bathrooms"],
    "house_size": row["house_size"],
    "property_type": row["property_type"],
    "amenities": row["amenities"]
    }
    description = row["description"]
    listing_id = f"{index}"
    add_to_chromadb(collection, listing_id, description, metadata, embedding_model)

Insert of existing embedding ID: 0
Add of existing embedding ID: 0
Insert of existing embedding ID: 1
Add of existing embedding ID: 1
Insert of existing embedding ID: 2
Add of existing embedding ID: 2
Insert of existing embedding ID: 3
Add of existing embedding ID: 3
Insert of existing embedding ID: 4
Add of existing embedding ID: 4
Insert of existing embedding ID: 5
Add of existing embedding ID: 5
Insert of existing embedding ID: 6
Add of existing embedding ID: 6
Insert of existing embedding ID: 7
Add of existing embedding ID: 7
Insert of existing embedding ID: 8
Add of existing embedding ID: 8
Insert of existing embedding ID: 9
Add of existing embedding ID: 9
Insert of existing embedding ID: 10
Add of existing embedding ID: 10
Insert of existing embedding ID: 11
Add of existing embedding ID: 11
Insert of existing embedding ID: 12
Add of existing embedding ID: 12
Insert of existing embedding ID: 13
Add of existing embedding ID: 13
Insert of existing embedding ID: 14
Add of existing em

In [50]:
# Show the ids in the collection to ensure everything has been added
ids_only_result = collection.get(include=[])
print(ids_only_result['ids'])

['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30']


In [51]:
# Define buyer questions to be collected in a form that has multi select and free form
questions = [
"How many bedrooms do you prefer? (e.g., 1, 2, 3, 4)",
"How many bathrooms do you prefer? (e.g., 1.0, 1.5, 2.0, 2.5)",
"What are the important features you are looking for in a property?",
"Which amenities are important to you?",
"Which transportation options are important to you?",
"What kind of neighborhood do you prefer?",
"What is your price range? (Provide a lower and upper bound, e.g., $400,000 - $800,000)",
"What type of property are you looking for? (house, condo, apartment)"
]

# Create sample answers to the questions and match the users to the correct listings

User 1

In [52]:
answers = [
["2", "3"],
["1.5", "2"],
["A quiet neighborhood", "good local schools", "convenient shopping options"],
["A garage for parking", "heating system"],
["Proximity to a major highway", "Proximity to subway"],
["A balance between suburban tranquility and access to urban amenities like restaurants and theaters"],
["$500000 - $1200000"],
["condo"]
]

In [53]:
user_preferences = parse_answers(answers)
print("Parsed User Preferences:", user_preferences)

# Search the chromadb
matching_listings = search_chromadb(user_preferences, collection, embedding_model)

# Extract the results
ids = matching_listings['ids'][0]
distances = matching_listings['distances'][0]
documents = matching_listings['documents'][0]

# Combine the results into a list of tuples
combined_results = list(zip(ids, distances, documents))

# Sort the combined results by distance (ascending order)
sorted_results = sorted(combined_results, key=lambda x: x[1])

# Print the descriptions sorted by smallest distances
for listing_id, distance, document in sorted_results:
    print(f"Listing ID: {listing_id}")
    print(f"Distance: {distance}")
    print(f"Description: {document}")
    print("="*40)

Parsed User Preferences: {'bedrooms': [2, 3], 'bathrooms': [1.5, 2.0], 'important_features': ['A quiet neighborhood', 'good local schools', 'convenient shopping options'], 'amenities': ['A garage for parking', 'heating system'], 'transportation': ['Proximity to a major highway', 'Proximity to subway'], 'urban_preference': ['A balance between suburban tranquility and access to urban amenities like restaurants and theaters'], 'price_range': [500000, 1200000], 'property_type': 'condo'}
Listing ID: 24
Distance: 0.4174313247203827
Description: Property Description:

Welcome to this stunning 2-bedroom, 2-bathroom condo located on 3rd Street in the heart of Hoboken, NJ. Priced at $900,000, this 1200 sqft gem is a perfect blend of modern luxury and urban charm. 

As you step into this condo, you will be greeted by a spacious open floor plan that seamlessly connects the living room, dining area, and the kitchen. The kitchen is equipped with top-of-the-line stainless steel appliances, granite co

In [54]:
augmented_response = augment_result(user_preferences, sorted_results[0][2])

In [55]:
augmented_response

"This property is perfect for the user as it fits all their specified preferences. The condo has 2 bedrooms and 2 bathrooms, which falls within the user's desired range of 2-3 bedrooms and 1.5-2 bathrooms. The price of the condo is $900,000, which is comfortably within the user's budget of $500,000 to $1,200,000. \n\nThe condo offers a modern, luxurious living experience with its open floor plan, top-of-the-line kitchen appliances, and tastefully designed bathrooms. The master suite comes with a private bathroom and a large closet, providing ample space and privacy. The second bedroom can also serve as a home office or guest room, offering flexibility based on the user's needs. \n\nIn addition to the condo's features, the user will also have access to a state-of-the-art gym and a dedicated parking spot, adding to the convenience and luxury of living in this property. \n\nThe location of the condo in the heart of Hoboken, NJ, is ideal for both families and young professionals. The neigh

User 2

In [56]:
answers = [
["3"],
["2", "3"],
["A quiet neighborhood", "nice appliances", "master suite bedroom"],
["A garage for parking", "roof deck"],
["Proximity to public transport"],
["Somewhere close to public transport but is not loud or busy"],
["$800000 - $1500000"],
["condo"]
]

In [57]:
user_preferences = parse_answers(answers)
print("Parsed User Preferences:", user_preferences)

# Search the chromadb
matching_listings = search_chromadb(user_preferences, collection, embedding_model)

# Extract the results
ids = matching_listings['ids'][0]
distances = matching_listings['distances'][0]
documents = matching_listings['documents'][0]

# Combine the results into a list of tuples
combined_results = list(zip(ids, distances, documents))

# Sort the combined results by distance (ascending order)
sorted_results = sorted(combined_results, key=lambda x: x[1])

# Print the descriptions sorted by smallest distances
for listing_id, distance, document in sorted_results:
    print(f"Listing ID: {listing_id}")
    print(f"Distance: {distance}")
    print(f"Description: {document}")
    print("="*40)

Parsed User Preferences: {'bedrooms': [3], 'bathrooms': [2.0, 3.0], 'important_features': ['A quiet neighborhood', 'nice appliances', 'master suite bedroom'], 'amenities': ['A garage for parking', 'roof deck'], 'transportation': ['Proximity to public transport'], 'urban_preference': ['Somewhere close to public transport but is not loud or busy'], 'price_range': [800000, 1500000], 'property_type': 'condo'}
Listing ID: 8
Distance: 0.4013093411922455
Description: Property Description:

Welcome to this stunning 3-bedroom, 2-bathroom condo located on the desirable Clinton Street in Hoboken, NJ. Priced at $1,100,000, this 1400 sqft property is a perfect blend of modern luxury and comfort. 

As you step inside, you'll be greeted by a spacious open floor plan that seamlessly connects the living room, dining area, and kitchen. The kitchen is a chef's dream, equipped with top-of-the-line appliances, ample counter space, and sleek cabinetry. Each of the three bedrooms is generously sized, with th

In [58]:
augmented_response = augment_result(user_preferences, sorted_results[0][2])

In [59]:
augmented_response

"This property is perfect for the user as it meets all their specified preferences. It has the exact number of bedrooms (3) and bathrooms (2) they are looking for. The price of the condo is $1,100,000, which falls within the user's budget range of $800,000 to $1,500,000. \n\nThe property is a condo, which is the user's preferred type of property. The open floor plan and modern kitchen cater to a luxurious and comfortable lifestyle. The private roof deck and dedicated parking space are added bonuses that enhance the value of the property. \n\nThe location of the condo on Clinton Street in Hoboken, NJ, is also a plus. The vibrant neighborhood with its mix of old-world charm and modern amenities, excellent school system, and easy commute to Manhattan align with the user's lifestyle needs. \n\nIn summary, this property is a perfect match for the user's preferences in terms of property type, size, price, and location."

# Analysis

The AI real estate did a great job!
For user 1, the top listing had the correct amount of bedrooms, bathrooms, and in the correct price range. The embedding search probably found this listing due to the parking space and its proximity to the highway and public transportation. The user also wanted proximity to restaurants and the description of the top property shows in the neighborhood description that this is so. Therefore it did well. The other two properties it listed also met user 1's criteria.

For user 2, the top listing was perfect. It has the correct number of bedrooms and bathrooms and was within the price range. It had a roof deck and a parking spot, both were top priority for user 2. User 2 also wanted nice appliance and a master bedroom suite, which the top property had.

All in all, this system did a great job.