This is a starter notebook for the project, you'll have to import the libraries you'll need, you can find a list of the ones available in this workspace in the requirements.txt file in this workspace. 

# Step 1: Setting up the python application

In [1]:
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.llms import OpenAI
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain import LLMChain
from langchain.chains.question_answering import load_qa_chain
from langchain_community.document_loaders import TextLoader
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma


In [1]:
# Importing the library for OpenAI API
import openai
openai.api_base = "https://openai.vocareum.com/v1"

# Define OpenAI API key 
api_key = ""
openai.api_key = api_key
# os.environ["OPENAI_API_KEY"] = ""
# os.environ["OPENAI_API_BASE"] = "https://openai.vocareum.com/v1"

# Step 2: Generating real estate listings

In [2]:
def generate_real_estate_listings():
    base_prompt = """
    Generate 10 real estate listings with the following format:
    - neighborhood: [Name]
    - price: [Price]
    - bedrooms: [Number of bedrooms]
    - bathrooms: [Number of bathrooms]
    - house_size: [Size in sqft]
    
    Description: [Detailed description of the property, highlighting key features such as design, energy efficiency, and the surrounding environment.]
    
    Neighborhood Description: [Brief description of the neighborhood, including nearby amenities, community vibe, and transportation options.]
    """

    all_listings = ""
    for i in range(4):  # Repeat 4 times to generate 20 listings
        user_prompt = base_prompt + f"\nStart listing batch {i+1} below:"
        
        try:
            response = openai.chat.completions.create(
                model="gpt-3.5-turbo",
                messages=[
                    {"role": "system", "content": "You are a real estate agent."},
                    {"role": "user", "content": user_prompt}
                ],
                temperature=0.5,  # Adjust for creativity level
                max_tokens=1500,
            )
            
            # Extract the generated listings and append to the full list
            listings = response.choices[0].message.content
            all_listings += listings + "\n\n"

        except Exception as e:
            return f"An error occurred: {e}"

    return all_listings

# Generate and print the listings
listings = generate_real_estate_listings()
print(listings)

1. 
- Neighborhood: Green Hills
- Price: $750,000
- Bedrooms: 4
- Bathrooms: 3
- House Size: 2,500 sqft

Description: This stunning modern home in Green Hills features an open floor plan, high ceilings, and large windows that flood the space with natural light. The kitchen is equipped with stainless steel appliances and quartz countertops. The master suite boasts a luxurious bathroom with a soaking tub and walk-in shower. Enjoy the spacious backyard perfect for entertaining.

Neighborhood Description: Green Hills is a highly sought-after neighborhood known for its upscale shops, restaurants, and top-rated schools. Residents enjoy easy access to parks, walking trails, and convenient transportation options.

2. 
- Neighborhood: Downtown
- Price: $1,200,000
- Bedrooms: 3
- Bathrooms: 2.5
- House Size: 2,000 sqft

Description: Live in luxury in this sleek downtown condo with panoramic city views. The interior features designer finishes, a gourmet kitchen with high-end appliances, and a pri

In [3]:
import pandas as pd
import re

In [17]:
# Input text

# Split the text into individual property entries
entries = re.split(r'\d+\.\s+', listings.strip())[1:]

# Define a function to extract property details
def extract_details(entry):
    details = {}
    details['Neighborhood'] = re.search(r'- Neighborhood: (.+)', entry).group(1).strip()
    details['Price'] = re.search(r'- Price: (.+)', entry).group(1).strip()
    details['Bedrooms'] = re.search(r'- Bedrooms: (.+)', entry).group(1).strip()
    details['Bathrooms'] = re.search(r'- Bathrooms: (.+)', entry).group(1).strip()
    details['House Size'] = re.search(r'- House Size: (.+)', entry).group(1).strip()
    details['Description'] = re.search(r'Description:\s*(.+?)\n\n', entry, re.DOTALL).group(1).strip()
    details['Neighborhood Description'] = re.search(r'Neighborhood Description:\s*(.+)', entry, re.DOTALL).group(1).strip()
    return details

# Extract details from each entry and create a DataFrame
data = [extract_details(entry) for entry in entries]
df = pd.DataFrame(data)

# Display the DataFrame
df.head()

Unnamed: 0,Neighborhood,Price,Bedrooms,Bathrooms,House Size,Description,Neighborhood Description
0,Green Hills,"$750,000",4,3.0,"2,500 sqft",This stunning modern home in Green Hills featu...,Green Hills is a highly sought-after neighborh...
1,Downtown,"$1,200,000",3,2.5,"2,000 sqft",Live in luxury in this sleek downtown condo wi...,"Downtown is the heart of the city, offering a ..."
2,Belle Meade,"$1,500,000",5,4.5,"4,000 sqft",This exquisite estate in Belle Meade exudes el...,Belle Meade is a prestigious neighborhood know...
3,East Nashville,"$500,000",2,2.0,"1,200 sqft",Embrace the eclectic charm of East Nashville i...,East Nashville is a hip and diverse neighborho...
4,Sylvan Park,"$600,000",3,2.5,"1,800 sqft",This charming cottage in Sylvan Park offers a ...,Sylvan Park is a family-friendly neighborhood ...


In [18]:
# Convert numeric columns to appropriate types
df['Price'] = df['Price'].str.replace('$', '').str.replace(',', '').astype(float)
df['Bedrooms'] = df['Bedrooms'].astype(int)
df['Bathrooms'] = df['Bathrooms'].astype(float)
df['House Size'] = df['House Size'].str.replace(' sqft', '').str.replace(',', '').astype(int)

# Reorder columns if desired
column_order = ['Neighborhood', 'Price', 'Bedrooms', 'Bathrooms', 'House Size', 'Description', 'Neighborhood Description']
df = df[column_order]
df.columns = ['neighborhood','price','bedrooms','bathrooms','house_size','description','neighborhood_description']
df

Unnamed: 0,neighborhood,price,bedrooms,bathrooms,house_size,description,neighborhood_description
0,Green Hills,750000.0,4,3.0,2500,This stunning modern home in Green Hills featu...,Green Hills is a highly sought-after neighborh...
1,Downtown,1200000.0,3,2.5,2000,Live in luxury in this sleek downtown condo wi...,"Downtown is the heart of the city, offering a ..."
2,Belle Meade,1500000.0,5,4.5,4000,This exquisite estate in Belle Meade exudes el...,Belle Meade is a prestigious neighborhood know...
3,East Nashville,500000.0,2,2.0,1200,Embrace the eclectic charm of East Nashville i...,East Nashville is a hip and diverse neighborho...
4,Sylvan Park,600000.0,3,2.5,1800,This charming cottage in Sylvan Park offers a ...,Sylvan Park is a family-friendly neighborhood ...
5,Germantown,850000.0,4,3.0,2300,Discover urban living at its finest in this co...,Germantown is a historic neighborhood with a m...
6,12 South,700000.0,3,2.0,1600,Live in the heart of the trendy 12 South neigh...,12 South is a popular neighborhood known for i...
7,Hillsboro Village,950000.0,4,3.5,2800,Experience upscale living in this custom-built...,Hillsboro Village is a vibrant neighborhood wi...
8,West End,1100000.0,5,4.0,3500,This elegant home in West End features a grand...,West End is a prestigious neighborhood known f...
9,Wedgewood-Houston,550000.0,3,2.5,1700,Modern living awaits in this sleek townhome in...,Wedgewood-Houston is a rapidly growing neighbo...


In [19]:
df.to_csv('home.csv')

# Step3: Storing listings in a vector database

In [2]:
# Define OpenAI API key 
import os
os.environ["OPENAI_API_KEY"] = ""
os.environ["OPENAI_API_BASE"] = "https://openai.vocareum.com/v1"

In [3]:
loader = CSVLoader(file_path='./home.csv')
docs = loader.load()

In [4]:
model_name = 'gpt-3.5-turbo'
llm = OpenAI(model_name=model_name, temperature=0, max_tokens=2000)



In [5]:
import pandas as pd
from langchain.schema import Document
df = pd.read_csv('./home.csv')
documents = []
for index, row in df.iterrows():
    documents.append(Document(page_content=row['description'], metadata={'id': str(index)}))

In [6]:
splitter = CharacterTextSplitter(chunk_size= 300, chunk_overlap=100)
split_docs = splitter.split_documents(docs)
print(f"Split {len(documents)} documents into {len(split_docs)} chunks.")

Created a chunk of size 604, which is longer than the specified 300
Created a chunk of size 394, which is longer than the specified 300
Created a chunk of size 354, which is longer than the specified 300
Created a chunk of size 372, which is longer than the specified 300
Created a chunk of size 368, which is longer than the specified 300
Created a chunk of size 333, which is longer than the specified 300


Split 30 documents into 40 chunks.


In [7]:
#check whether data is loaded correctly
if split_docs:
    document = split_docs[10]
    print(document.page_content)
    print(document.metadata)

: 10
neighborhood: Willow Creek
price: 450000.0
bedrooms: 3
bathrooms: 2.5
house_size: 2100
description: This charming home in Willow Creek features a modern open concept design with a spacious living area, gourmet kitchen, and a cozy backyard patio perfect for entertaining. Energy-efficient appliances and smart home features make this property both stylish and eco-friendly.
neighborhood_description: Willow Creek is a family-friendly neighborhood known for its top-rated schools, parks, and walking trails. Residents enjoy easy access to shopping centers, restaurants, and public transportation options.
{'source': './home.csv', 'row': 10}


In [8]:
from langchain_community.document_loaders import CSVLoader
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import CharacterTextSplitter
from langchain_chroma import Chroma


# Load the document, split it into chunks, embed each chunk and load it into the vector store.
raw_documents = CSVLoader('./home.csv').load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
documents = text_splitter.split_documents(raw_documents)

In [9]:
db = Chroma.from_documents(documents, OpenAIEmbeddings())

In [10]:
print(dir(db))

['_Chroma__ensure_collection', '_Chroma__query_collection', '_LANGCHAIN_DEFAULT_COLLECTION_NAME', '__abstractmethods__', '__annotations__', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__slots__', '__str__', '__subclasshook__', '__weakref__', '_abc_impl', '_asimilarity_search_with_relevance_scores', '_chroma_collection', '_client', '_client_settings', '_collection', '_collection_metadata', '_collection_name', '_cosine_relevance_score_fn', '_embedding_function', '_euclidean_relevance_score_fn', '_get_retriever_tags', '_max_inner_product_relevance_score_fn', '_persist_directory', '_select_relevance_score_fn', '_similarity_search_with_relevance_scores', 'aadd_documents', 'aadd_texts', 'add_documents', 'add_images', 'add_texts', 'adelete', 

In [11]:
# Step 4: Building the User Preference Interface
questions = [
    "How big do you want your house to be?",
    "What are 3 most important things for you in choosing this property?",
    "Which amenities would you like?",
    "Which transportation options are important to you?",
    "How urban do you want your neighborhood to be?",
]

answers = [
    "A comfortable three-bedroom house with a spacious kitchen and a cozy living room.",
    "A quiet neighborhood, good local schools, and convenient shopping options.",
    "A backyard for gardening, a two-car garage, and a modern, energy-efficient heating system.",
    "Easy access to a reliable bus line, proximity to a major highway, and bike-friendly roads.",
    "A balance between suburban tranquility and access to urban amenities like restaurants and theaters.",
]

buyer_preferences = {
    "house_size": answers[0],
    "important_factors": answers[1].split(", "),
    "amenities": answers[2].split(", "),
    "transportation": answers[3].split(", "),
    "neighborhood_urbanity": answers[4],
}

In [12]:
buyer_preferences

{'house_size': 'A comfortable three-bedroom house with a spacious kitchen and a cozy living room.',
 'important_factors': ['A quiet neighborhood',
  'good local schools',
  'and convenient shopping options.'],
 'amenities': ['A backyard for gardening',
  'a two-car garage',
  'and a modern',
  'energy-efficient heating system.'],
 'transportation': ['Easy access to a reliable bus line',
  'proximity to a major highway',
  'and bike-friendly roads.'],
 'neighborhood_urbanity': 'A balance between suburban tranquility and access to urban amenities like restaurants and theaters.'}

# Step 5: Searching Based on Preferences


In [13]:
joined_string = ''.join(
    str(item) if not isinstance(item, list) else ''.join(str(i) for i in item)
    for item in buyer_preferences.values()
)

print(joined_string) 

A comfortable three-bedroom house with a spacious kitchen and a cozy living room.A quiet neighborhoodgood local schoolsand convenient shopping options.A backyard for gardeninga two-car garageand a modernenergy-efficient heating system.Easy access to a reliable bus lineproximity to a major highwayand bike-friendly roads.A balance between suburban tranquility and access to urban amenities like restaurants and theaters.


In [15]:
similar_docs = db.similarity_search(joined_string, k=5)
similar_docs

[Document(metadata={'row': 0, 'source': './home.csv'}, page_content=': 0\nneighborhood: Green Hills\nprice: 750000.0\nbedrooms: 4\nbathrooms: 3.0\nhouse_size: 2500\ndescription: This stunning modern home in Green Hills features an open floor plan, high ceilings, and large windows that flood the space with natural light. The kitchen is equipped with stainless steel appliances and quartz countertops. The master suite boasts a luxurious bathroom with a soaking tub and walk-in shower. Enjoy the spacious backyard perfect for entertaining.\nneighborhood_description: Green Hills is a highly sought-after neighborhood known for its upscale shops, restaurants, and top-rated schools. Residents enjoy easy access to parks, walking trails, and convenient transportation options.'),
 Document(metadata={'row': 20, 'source': './home.csv'}, page_content=': 20\nneighborhood: Green Hills\nprice: 650000.0\nbedrooms: 4\nbathrooms: 3.0\nhouse_size: 2800\ndescription: This stunning modern home in Green Hills f

In [21]:
def parse_document_content(content):
    # Define patterns for each field
    patterns = {
        'description': r'description:\s*(.*?)(?=\n|$)',
        'neighborhood_description': r'neighborhood_description:\s*(.*?)(?=\n|$)',
        'price': r'price:\s*([\d.,]+)',
        'bedrooms': r'bedrooms:\s*(\d+)',
        'bathrooms': r'bathrooms:\s*([\d.]+)',
        'house_size': r'house_size:\s*([\d.]+)'
    }
    
    # Extract data using regex patterns
    extracted_data = {}
    for field, pattern in patterns.items():
        match = re.search(pattern, content, re.DOTALL)
        extracted_data[field] = match.group(1).strip() if match else 'Not Available'
    
    return extracted_data

# Function to format the extracted data
def format_document(document):
    content = document.page_content
    data = parse_document_content(content)
    
    formatted_string = (
        f"Description: {data['description']}\n"
        f"Neighborhood Description: {data['neighborhood_description']}\n"
        f"Price: {data['price']}\n"
        f"Bedrooms: {data['bedrooms']}\n"
        f"Bathrooms: {data['bathrooms']}\n"
        f"House Size (sqft): {data['house_size']}"
    )
    
    return formatted_string

# Apply formatting to each document and print results
for doc in similar_docs:
    print(format_document(doc))
    print()  # Add a newline for better readability

Description: This stunning modern home in Green Hills features an open floor plan, high ceilings, and large windows that flood the space with natural light. The kitchen is equipped with stainless steel appliances and quartz countertops. The master suite boasts a luxurious bathroom with a soaking tub and walk-in shower. Enjoy the spacious backyard perfect for entertaining.
Neighborhood Description: Green Hills is a highly sought-after neighborhood known for its upscale shops, restaurants, and top-rated schools. Residents enjoy easy access to parks, walking trails, and convenient transportation options.
Price: 750000.0
Bedrooms: 4
Bathrooms: 3.0
House Size (sqft): 2500

Description: This stunning modern home in Green Hills features an open floor plan, high ceilings, and large windows that flood the space with natural light. The kitchen boasts top-of-the-line appliances and a spacious island. The master suite offers a luxurious retreat with a spa-like bathroom. The backyard oasis includes

# Step 6: Personalizing Listing Descriptions


In [34]:
import openai
openai.api_key = api_key = ''
def generate_personalized_description(property_details, buyer_preferences):
    prompt = f"""
    You are a skilled real estate agent. Given the following property details and buyer preferences, craft a personalized listing description that emphasizes features matching the buyer's interests. Ensure that factual details are preserved.

    **Property Details:**
    Description: {property_details['description']}
    Neighborhood Description: {property_details['neighborhood_description']}
    Price: {property_details['price']}
    Bedrooms: {property_details['bedrooms']}
    Bathrooms: {property_details['bathrooms']}
    House Size (sqft): {property_details['house_size']}

    **Buyer Preferences:**
    {buyer_preferences}

    **Personalized Listing Description:**
    """
    
    response = openai.chat.completions.create(
                model="gpt-3.5-turbo",
                messages=[
                    {"role": "system", "content": "You are a real estate agent.For each retrieved listing, tailor it to resonate with the buyers' specific preferences. This invovles subtly emphasizing aspects of the property that align with what the buyer is looking for"},
                    {"role": "user", "content": buyer_preferences}
                ],
                temperature=0.5,  # Adjust for creativity level
                max_tokens=1500,
    )
    return response.choices[0].message.content


In [35]:
property_details = {
    'description': 'This stunning modern home in Green Hills features an open floor plan, high ceilings, and large windows that flood the space with natural light. The kitchen is equipped with stainless steel appliances and quartz countertops. The master suite boasts a luxurious bathroom with a soaking tub and walk-in shower. Enjoy the spacious backyard perfect for entertaining.',
    'neighborhood_description': 'Green Hills is a highly sought-after neighborhood known for its upscale shops, restaurants, and top-rated schools. Residents enjoy easy access to parks, walking trails, and convenient transportation options.',
    'price': '750000.0',
    'bedrooms': '4',
    'bathrooms': '3.0',
    'house_size': '2500'
}

buyer_preferences = "Interested in modern kitchens and spacious backyards. Prefers neighborhoods with top-rated schools and easy access to parks. Budget: up to $800,000."

# Generate the personalized description
personalized_description = generate_personalized_description(property_details, buyer_preferences)

print(personalized_description)

🏡 **Modern Home with Spacious Backyard in Family-Friendly Neighborhood**

Welcome home to this stunning property located in a sought-after neighborhood known for its top-rated schools and proximity to parks. 

As you step inside, you'll be greeted by a beautifully renovated modern kitchen featuring sleek cabinetry, stainless steel appliances, and quartz countertops. The open layout is perfect for entertaining guests or simply enjoying family meals together.

Step outside to your expansive backyard, ideal for hosting summer barbecues, gardening, or simply relaxing in the sunshine. With plenty of space for kids to play and pets to roam, this backyard oasis is a rare find in the area.

This home is situated in a family-friendly community known for its excellent schools and safe streets. Enjoy easy access to nearby parks, perfect for morning jogs, picnics, or weekend outings with the family.

Priced at $800,000, this property offers the perfect blend of modern living, outdoor space, and a 