This is a starter notebook for the project, you'll have to import the libraries you'll need, you can find a list of the ones available in this workspace in the requirements.txt file in this workspace. 

In [None]:
%pip install "pydantic<2.0"


In [None]:
%pip install "fastapi<0.100"


In [2]:
import os
import pandas as pd
import shutil
from dataclasses import dataclass
from typing import List
from pydantic import BaseModel, Field, NonNegativeInt  # Updated to import directly from pydantic
from langchain.embeddings import OpenAIEmbeddings
from langchain.llms import OpenAI
from langchain.vectorstores.chroma import Chroma
from langchain.schema import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.output_parsers import PydanticOutputParser
from fastapi.encoders import jsonable_encoder


In [11]:
import os
OPENAI_API_KEY = ''
os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
os.environ["OPENAI_API_KEY"] = ""
os.environ["OPENAI_API_BASE"] = "https://openai.vocareum.com/v1"

from langchain.llms import OpenAI

In [12]:
#Define the Real Estate Listing Schema
class RealEstateListing(BaseModel):
    neighborhood: str = Field(description="The neighborhood where the property is located")
    price: NonNegativeInt = Field(description="The price of the property in USD")
    bedrooms: NonNegativeInt = Field(description="The number of bedrooms in the property")
    bathrooms: NonNegativeInt = Field(description="The number of bathrooms in the property")
    house_size: NonNegativeInt = Field(description="The size of the house in square feet")
    description: str = Field(description="A description of the property")
    neighborhood_description: str = Field(description="A description of the neighborhood.")  

class ListingCollection(BaseModel):
    listings: List[RealEstateListing] = Field(description="A list of real estate listings")


In [13]:
#Generate Real Estate Listings Using OpenAI
llm = OpenAI(model_name="gpt-3.5-turbo", temperature=0, api_key=OPENAI_API_KEY)

instruction = "Generate a JSON object with at least 10 real estate listings."
sample_listing = """
Neighborhood: Green Oaks
Price: $800,000
Bedrooms: 3
Bathrooms: 2
House Size: 2,000 sqft

Description: Welcome to this eco-friendly oasis nestled in the heart of Green Oaks. This charming 3-bedroom, 2-bathroom home boasts energy-efficient features such as solar panels and a well-insulated structure. Natural light floods the living spaces, highlighting the beautiful hardwood floors and eco-conscious finishes. The open-concept kitchen and dining area lead to a spacious backyard with a vegetable garden, perfect for the eco-conscious family. Embrace sustainable living without compromising on style in this Green Oaks gem.

Neighborhood Description: Green Oaks is a close-knit, environmentally-conscious community with access to organic grocery stores, community gardens, and bike paths. Take a stroll through the nearby Green Oaks Park or grab a cup of coffee at the cozy Green Bean Cafe. With easy access to public transportation and bike lanes, commuting is a breeze.
"""

# Define the parser and prompt
parser = PydanticOutputParser(pydantic_object=ListingCollection)
prompt = PromptTemplate(
    template="{instruction}\n{sample}\n{format_instructions}\n",
    input_variables=["instruction", "sample"],
    partial_variables={"format_instructions": parser.get_format_instructions},
)

query = prompt.format(
    instruction=instruction,
    sample=sample_listing,
)

# Get response from the LLM
response = llm(query)
result = parser.parse(response)

# Create a dataframe from the generated listings
df = pd.DataFrame(jsonable_encoder(result.listings))
df.head()




Unnamed: 0,neighborhood,price,bedrooms,bathrooms,house_size,description,neighborhood_description
0,Green Oaks,800000,3,2,2000,Welcome to this eco-friendly oasis nestled in ...,"Green Oaks is a close-knit, environmentally-co..."
1,Downtown Loft District,600000,2,1,1500,Experience urban living at its finest in this ...,The Downtown Loft District is a bustling neigh...
2,Suburban Meadows,700000,4,3,2500,Escape the hustle and bustle of the city in th...,Suburban Meadows is a family-friendly neighbor...
3,Waterfront Estates,1200000,5,4,4000,Live the life of luxury in this stunning 5-bed...,Waterfront Estates is an upscale waterfront co...
4,Mountain View Heights,900000,4,3,3000,Escape to the mountains in this spacious 4-bed...,Mountain View Heights is a picturesque neighbo...


In [15]:
#Save Listings to CSV
df.to_csv('real_estate_listings.csv', index_label='id')


In [16]:
#Set up ChromaDB and store vector embeddings for the real estate listings.

CHROMA_PATH = "chroma"
CSV_PATH = "real_estate_listings.csv" 

embedding_function = OpenAIEmbeddings()

df = pd.read_csv(CSV_PATH)
documents = [Document(page_content=row['description'], metadata={'id': str(index)}) for index, row in df.iterrows()]

# Split the text into suitable chunks
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=300,
    chunk_overlap=100,
    length_function=len,
    add_start_index=True,
)
chunks = text_splitter.split_documents(documents)
print(f"Split {len(documents)} documents into {len(chunks)} chunks.")

# Save chunks to Chroma
if os.path.exists(CHROMA_PATH):
    shutil.rmtree(CHROMA_PATH)

db = Chroma.from_documents(chunks, OpenAIEmbeddings(), persist_directory=CHROMA_PATH)
db.persist()
print(f"Saved {len(chunks)} chunks to {CHROMA_PATH}.")


Split 10 documents into 21 chunks.
Saved 21 chunks to chroma.


In [17]:
#Collect buyer preferences and convert them into a search query.
# Example buyer preferences
preferences = {
    "bedrooms": 3,
    "bathrooms": 2,
    "location": "Suburban area",
    "budget": "Below $900,000",
}
query_text = "Comfortable 3-bedroom house in a suburban area below $900,000."


In [18]:
#Search for listings that match the buyer preferences
def perform_search(db, query_text):
    results = db.similarity_search_with_relevance_scores(query_text, k=3)
    if len(results) == 0 or results[0][1] < 0.7:
        print(f"Unable to find matching results.")
    else:
        for result in results:
            print(f"Listing ID: {result[0].metadata['id']}, Relevance Score: {result[1]}")
            print(result[0].page_content)
            print("-----")

perform_search(db, query_text)


Listing ID: 2, Relevance Score: 0.788932700015128
Escape the hustle and bustle of the city in this spacious 4-bedroom, 3-bathroom home located in the peaceful Suburban Meadows neighborhood. This well-maintained property features a large backyard, perfect for outdoor gatherings and gardening. The open kitchen and living area provide ample space for
-----
Listing ID: 2, Relevance Score: 0.7680523782604093
for outdoor gatherings and gardening. The open kitchen and living area provide ample space for family gatherings, while the cozy fireplace is perfect for chilly evenings. Enjoy the tranquility of suburban living without sacrificing convenience.
-----
Listing ID: 5, Relevance Score: 0.7481586404734428
Step back in time in this charming 3-bedroom, 2-bathroom home located in the historic Old Town neighborhood. This well-preserved property features original hardwood floors, vintage fixtures, and a cozy front porch. The updated kitchen and bathrooms offer modern convenience while maintaining

In [19]:
#Use the LLM to augment and personalize the property descriptions based on buyer preferences.

AUGMENT_PROMPT_TEMPLATE = """
Based on the following context:

{context}

---

Craft a response that not only answers the question "{question}", but also ensures that your explanation is distinct, captivating, and customized to align with the specified preferences. This involves subtly emphasizing aspects of the property that align with what the buyer is looking for.
"""

def predict_response(db, query_text, PROMPT_TEMPLATE):
    results = db.similarity_search_with_relevance_scores(query_text, k=3)
    if len(results) == 0 or results[0][1] < 0.7:
        print(f"Unable to find matching results.")
    else:
        context_text = "\n\n---\n\n".join([doc.page_content for doc, _ in results])
        prompt_template = PromptTemplate(template=PROMPT_TEMPLATE, input_variables=["context", "question"])
        prompt = prompt_template.format(context=context_text, question=query_text)

        # Get the augmented response
        model = OpenAI(model_name="gpt-3.5-turbo", temperature=0, api_key=OPENAI_API_KEY)
        response_text = model(prompt)
        print(f"Personalized Response:\n{response_text}")

predict_response(db, query_text, AUGMENT_PROMPT_TEMPLATE)




Personalized Response:
Welcome to your dream home in the serene Suburban Meadows neighborhood! This comfortable 3-bedroom house is the perfect oasis from the hustle and bustle of city life, all for under $900,000. With a spacious backyard for outdoor gatherings and gardening, you can enjoy the tranquility of suburban living while still being conveniently located. The open kitchen and living area provide ample space for family gatherings, and the cozy fireplace is ideal for chilly evenings. Don't miss out on this well-maintained property that offers the perfect blend of modern convenience and suburban charm.
