## Setting Up Project Environment

In [1]:
import os
import openai
import random
import csv
import pandas as pd

openai.api_base = "https://openai.vocareum.com/v1"
openai.api_key = "voc-484115602126677146700166ffd9d85304c7.61070966"


## Synthetic Data Generation

### Generating Real Estate Listings

In [2]:
def get_completion(prompt, model="gpt-3.5-turbo"):
    messages = [{"role": "user", "content": prompt}]
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=0, # this is the degree of randomness of the model's output
    )
    return response.choices[0].message["content"]

In [15]:
prompt = f"""
Generate a list of twenty-six made-up real estate listings along with their neighborhood, price (in numerical format without currency symbols or commas), bedrooms, bathrooms, size in sqft, detailed description of the property, and detailed description of the neighborhood.
Provide detailed descriptions that include:
- Property features: Unique selling points, amenities, architectural style, views, etc.
- Neighborhood amenities: Nearby parks, schools, shopping centers, restaurants, transportation options, etc.
- Lifestyle: The overall atmosphere and vibe of the neighborhood, including safety, walkability, and community events.
Examples:
- Detailed property description: "This modern farmhouse boasts soaring ceilings, floor-to-ceiling windows, and a gourmet kitchen with high-end appliances. The spacious backyard features a saltwater pool, spa, and outdoor kitchen, perfect for entertaining."
- Detailed neighborhood description: "Located in the heart of a thriving community, this neighborhood offers easy access to top-rated schools, parks, and shopping centers. Residents enjoy a strong sense of community and a variety of local events throughout the year."
Provide the listings in CSV format with the following keys: neighborhood, price, bedrooms, bathrooms, house size, description, neighborhood description. Ensure the descriptions are enclosed in double quotes.
"""
response = get_completion(prompt)
print(response)

neighborhood,price,bedrooms,bathrooms,house size,description,neighborhood description
Willow Creek,500000,4,3,2500,"This charming colonial-style home features a wrap-around porch, hardwood floors, and a cozy fireplace. The backyard includes a deck and lush landscaping.","Willow Creek is a family-friendly neighborhood with top-rated schools, parks, and a variety of dining options nearby."
Sunset Hills,750000,5,4,3500,"This contemporary home offers an open floor plan, high-end finishes, and a chef's kitchen with quartz countertops. The backyard boasts a pool, spa, and outdoor living space.","Sunset Hills is known for its luxury homes and scenic views. Residents enjoy easy access to hiking trails, shopping centers, and fine dining restaurants."
Maple Grove,400000,3,2,2000,"This newly renovated ranch-style home features a spacious living room, updated kitchen, and a master suite with a walk-in closet. The backyard includes a patio and mature trees for privacy.","Maple Grove is a quiet neig

In [16]:
# Write the response to a CSV file
with open("real_state_listings.csv", "w") as f:
    f.write(response)


## Semantic Search

### Creating a Vector Database and Storing Listings

In [2]:
# initialize and configure ChromaDB
import chromadb
from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction

In [3]:
chroma_client = chromadb.Client()
embedding_function = SentenceTransformerEmbeddingFunction(model_name="all-MiniLM-L6-v2")
chroma_collection = chroma_client.get_or_create_collection(name="real_estate_listings", embedding_function=embedding_function)


  from tqdm.autonotebook import tqdm, trange


In [4]:
# Load the CSV file
listings_df = pd.read_csv("real_state_listings.csv")
print(listings_df.head())
print(listings_df.shape)



        neighborhood    price  bedrooms  bathrooms  house size  \
0       Willow Creek   500000         4          3        2500   
1       Sunset Hills   750000         5          4        3500   
2        Maple Grove   400000         3          2        2000   
3         River Oaks  1000000         6          5        4500   
4  Pinecrest Heights   600000         4          3        2800   

                                         description  \
0  This charming colonial-style home features a w...   
1  This contemporary home offers an open floor pl...   
2  This newly renovated ranch-style home features...   
3  This elegant Mediterranean villa offers a gran...   
4  This traditional two-story home boasts a forma...   

                            neighborhood description  
0  Willow Creek is a family-friendly neighborhood...  
1  Sunset Hills is known for its luxury homes and...  
2  Maple Grove is a quiet neighborhood with tree-...  
3  River Oaks is an upscale neighborhood known

### Generating and Storing Embeddings

In [5]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer('all-MiniLM-L6-v2')

listings_df['description_embedding'] = listings_df['description'].apply(lambda x: model.encode(x).tolist())
listings_df['neighborhood_description_embedding'] = listings_df['neighborhood description'].apply(lambda x: model.encode(x).tolist())

    

In [6]:
# Recreate the collection with the correct embedding function
combined_descriptions = listings_df['description'].values.tolist() + listings_df['neighborhood description'].values.tolist()
combined_embedding = listings_df['description_embedding'] + listings_df['neighborhood_description_embedding']

for index, row in listings_df.iterrows():
    combined_embedding = row['description_embedding'] + row['neighborhood_description_embedding']
    chroma_collection.add(
        documents=[combined_descriptions[index]],
        ids=[f"listing_{index}"],
        embeddings=[combined_embedding],
        metadatas=[{k: (str(v) if isinstance(v, list) else v) for k, v in listings_df.iloc[index].to_dict().items()}]
    )


In [7]:
chroma_collection.count()

26

### Buyer Preferences stated in Natural Language

In [8]:
# define a query
query = "I'm looking for a luxury waterfront home with at least 6 bedrooms and no fewer than 3 bathrooms, featuring a private deck."

### Semantic Search Implementation & Listing Retrieval 

In [9]:
# embed the query
query_embedding = model.encode(query).tolist() * 2  # Ensure the embedding dimension matches the collection's dimensionality
results = chroma_collection.query(query_embeddings=[query_embedding], n_results=1)

# print the results
for result in results['metadatas']:
    for metadata in result:
        print(f"Neighborhood: {metadata['neighborhood']}")
        print(f"Price: {metadata['price']}")
        print(f"Bedrooms: {metadata['bedrooms']}")
        print(f"Bathrooms: {metadata['bathrooms']}")
        print(f"Size: {metadata['house size']}")
        print(f"Description: {metadata['description']}")
    

Neighborhood: Riverbend Estates
Price: 920000
Bedrooms: 6
Bathrooms: 5
Size: 4200
Description: This waterfront property offers stunning views of the river, a private dock, and a spacious deck for outdoor entertaining. The interior features a stone fireplace, hardwood floors, and a gourmet kitchen with a breakfast nook.


## Augmented Response Generation

### LLM Augmentation

In [10]:
# desfine a function that responds to user queries
def chat_completion_from_messages(messages, model="gpt-3.5-turbo", temperature=0):
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=temperature,
    )
    #print(str(response.choices[0].message))
    return response.choices[0].message["content"]

### Chat Interface Implementation

In [11]:
import panel as pn
pn.extension(comms='vscode')

In [12]:
def generate_augmented_listings(query):
    # search the Chroma collection with the query
    # results = chroma_collection.query(query_texts=[query], n_results=2)
    query_embedding = model.encode(query).tolist() * 2 
    results = chroma_collection.query(query_embeddings=[query_embedding], n_results=2)
    listings = []
    for result in results['metadatas']:
        for metadata in result:
            listings.append({
                "Neighborhood": metadata['neighborhood'],
                "Bedrooms": metadata['bedrooms'],
                "Price": metadata['price'],
                "Description": metadata['description']
            })

    messages=[
            {
                "role": "system", 
                "content": "You are a helpful expert real estate assistant. Your users are asking questions about information contained in real estate listings."
                "You will be shown the user's question and the relevant information from the listings. Answer the user's question using only this information."
            },
            {
                "role": "user", 
                "content": f"Question: {query}. \n Listings: {listings}"
            },
        ]
    
    response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=messages,
            temperature=0,
            max_tokens=250
        )
    content = response.choices[0]['message']['content']
    return content


# Define the widget
requirements_widget = pn.widgets.TextAreaInput(name='Enter Your Preferences', placeholder='Describe your ideal property...')

# Button to submit
submit_button = pn.widgets.Button(name='Submit', button_type='primary')
output = pn.pane.Markdown()

# Define the callback function
def collect_preferences(event):
    requirements = requirements_widget.value
    
    # Process natural language input
    preferences = generate_augmented_listings(requirements)
    
    # Display collected preferences
    output.object = f"""
    **Collected Preferences:**
    {requirements}

    **Interpreted Preferences:**
    {preferences}
    """

submit_button.on_click(collect_preferences)

# Layout
layout = pn.Column(
    '# Buyer Preferences',
    requirements_widget,
    submit_button,
    output
)

# Use .servable() for inline rendering
layout.servable()

BokehModel(combine_events=True, render_bundle={'docs_json': {'e8e3d4d4-9c04-4c29-9c65-9ad328259711': {'version…

In [17]:
# Save the Panel layout to an HTML file
layout.save('layout.html')


### Alternative Response Generation

In [15]:


panels = [] # collect display 

context = [ {'role':'system', 'content':"""
You are PropertyBot, an automated service to help customers find their dream home. 
You first greet the customer, then collect their preferences, and then offer suggestions based on their criteria. 
You wait to collect the entire list of preferences, then summarize them and check for a final time if the customer 
wants to add anything else. Finally, you provide a single property recommendation that matches their preferences. 
Make sure to include at least the price and the neighborhood in the recommendation.

Make sure to clarify all preferences, such as location, price range, number of bedrooms, and amenities. 
You respond in a short, very conversational friendly style. 

Here are some examples of preferences you can collect:
* **Location:** City, neighborhood, proximity to amenities (e.g., schools, parks, public transportation)
* **Price:** Budget range
* **Property type:** House, apartment, condo, etc.
* **Size:** Number of bedrooms, bathrooms, square footage
* **Amenities:** Garage, backyard, pool, etc.
* **Style:** Modern, traditional, rustic, etc.

Let's get started! What are you looking for in a new home?
"""} ] 

# To keep track of collected preferences
preferences_collected = {
    # 'location': False,
    # 'price': False,
    # 'property_type': False,
    # 'size': False,
    # 'amenities': False,
    # 'style': False,
    'description': False
}

def rag(context, model="gpt-3.5-turbo", temperature=0):
    # Extract the user's preferences from the context
    user_message = context[-1]['content']

    all_collected = all(preferences_collected.values())

    if all_collected:
        # Query ChromaDB based on user_message
        query_embedding =  SentenceTransformer('all-MiniLM-L6-v2').encode(user_message).tolist() * 2 
        results = chroma_collection.query(query_embeddings=[query_embedding], n_results=1)

        # Get the most relevant result
        if results:
            listing = results['metadatas'][0][0]
            recommendation = f"Property Recommendation: \n\n**Neighborhood:** {listing['neighborhood']}\n**Price:** {listing['price']}\n**Details:** {listing['description']}\n**Neighborhood Description:** {listing['neighborhood description']}"
        else:
            recommendation = "I'm sorry, but I couldn't find any properties that match your preferences."

        return recommendation
    else:
        # Continue asking for more preferences
        for key in preferences_collected.keys():
            if not preferences_collected[key]:
                preferences_collected[key] = True
                return f"Can you please tell me about your {key.replace('_', ' ')}?"
    
    return "I have collected all the necessary information."


def collect_preferences(event):
    prompt = inp.value_input
    inp.value = ''
    context.append({'role':'user', 'content':f"{prompt}"})
    response = rag(context) 
    context.append({'role':'assistant', 'content':f"{response}"})
    panels.append(
        pn.Row('User:', pn.pane.Markdown(prompt, width=600)))
    panels.append(
        pn.Row('Assistant:', pn.pane.Markdown(response, width=600)))
    
    interactive_panel.object = [pn.Column(*panels)]
 
    return pn.Column(*panels)

inp = pn.widgets.TextInput(value="Hi", placeholder='Enter text here…')
button_conversation = pn.widgets.Button(name="Chat!")
interactive_conversation = pn.bind(collect_preferences, button_conversation)
interactive_panel = pn.Column()


dashboard = pn.Column(
    inp,
    pn.Row(button_conversation),
    pn.panel(interactive_conversation, loading_indicator=True, width=2400, height=400),
    interactive_panel
)

dashboard.servable()

BokehModel(combine_events=True, render_bundle={'docs_json': {'78137082-3029-4ce3-b749-5fdc8adb2c58': {'version…

In [16]:
# Save the Panel layout to an HTML file
dashboard.save('dashboard.html')


