In [84]:
import os
import json 
import ast
import chromadb
import pandas as pd

from uuid import uuid4
from openai import OpenAI

from langchain_chroma import Chroma
from langchain.prompts import PromptTemplate
from langchain_core.prompts import ChatPromptTemplate
from langchain.output_parsers import PydanticOutputParser
from langchain_openai import ChatOpenAI
from langchain.document_loaders import JSONLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain_core.output_parsers import StrOutputParser

from helpers.generate_listings_helpers import ListingOutput
from helpers.upload_to_vector_db_helpers import metadata_func
from helpers.personalized_listings_helpers import AttributeParser, create_where_filter
from helpers.utils import load_yaml_config, set_config_parameters

os.environ["OPENAI_API_KEY"] = "voc-522165759126677351210066e73a00acdee9.58422824"
os.environ["OPENAI_API_BASE"] = "https://openai.vocareum.com/v1"

In [85]:
client = OpenAI(
  api_key=os.environ['OPENAI_API_KEY'],
  base_url=os.environ["OPENAI_API_BASE"]
)

llm = ChatOpenAI(max_tokens = 4000, temperature=0)

In [86]:
# Read configuration Yaml
yaml_file_path = "./config.yaml"
config = load_yaml_config(yaml_file_path)
set_config_parameters(config)

#### Generate real estate listings

Generate 10 real estate listings using a Large Language Model

In [87]:
parser = PydanticOutputParser(pydantic_object=ListingOutput)

In [88]:
generate_listings_prompt = PromptTemplate.from_file("./prompts/generate_listings_prompt.txt", input_variables=[
        "property_types",
        "neighborhoods",
        "price_min",
        "price_max",
        "bedrooms_min",
        "bedrooms_max",
        "bathrooms_min",
        "bathrooms_max",
        "house_size_min",
        "house_size_max",
        "expensive",
        "features" "listing_examples",
    ],
    partial_variables={"format_instructions": parser.get_format_instructions()},) 

  generate_listings_prompt = PromptTemplate.from_file("./prompts/generate_listings_prompt.txt", input_variables=[


In [89]:
system_prompt = generate_listings_prompt.format(
    property_types=config['PROPERTY_TYPES'],
    neighborhoods=config['NEIGHBORHOODS'],
    price_min=config['PRICE_MIN'],
    price_max=config['PRICE_MAX'],
    bedrooms_min=config['BEDROOMS_MIN'],
    bedrooms_max=config['BEDROOMS_MAX'],
    bathrooms_min=config['BATHROOMS_MIN'],
    bathrooms_max=config['BATHROOMS_MAX'],
    house_size_min=config['HOUSE_SIZE_MIN'],
    house_size_max=config['HOUSE_SIZE_MAX'],
    expensive=",".join(config['EXPENSIVE_NEIGHBORHOODS']),
    features=", ".join(config['FEATURES']),
    neighborhood_descriptions=config['NEIGHBORHOOD_DESCRIPTIONS'],
    listing_examples="\n------------------------- \n".join(config['LISTING_EXAMPLES']),
    format_instructions=parser.get_format_instructions()
)

In [90]:
listings_output = client.chat.completions.create(
    model="gpt-3.5-turbo",
    messages=[
        {"role": "system", "content": system_prompt},
        {
            "role": "user",
            "content": "Generate 10 listings following these guidelines. Make sure the property_description is very unique across examples",
        },
    ],
    temperature=0,
    max_tokens=4000,
)

In [91]:
# Extract LLM output and format
out = listings_output.choices[0].message.content
out = out.replace("\n", "")
out_format = ast.literal_eval(out)

# Combine the property and neighborhood descriptions into a new field
for item in out_format:
    item['property_and_neighborhood_desc'] = item['property_description'] + item['neighborhood_description']

# save to json
with open("./output/sample_listings.json", "w") as f:
    json.dump({"properties": out_format}, f)

#### Store vectors in a Chroma Vector DB

Set up a Chroma Vector DB instance, write the metadata about the property and the property and neighborhood descriptions as embedded vectors into the DB

In [93]:
# Create langchain Documents with langchain document loader
loader = JSONLoader(
    file_path='./output/sample_listings.json',
    jq_schema='.properties[]',
    content_key="property_and_neighborhood_desc",
    metadata_func=metadata_func
)


documents = loader.load()

In [94]:
# Upload to Vector DB
chroma_client = chromadb.Client()
# chroma_client.delete_collection("home_match")

try:
    collection = chroma_client.get_collection(name="home_match")
except:
    collection = collection = chroma_client.create_collection(name="home_match")

embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

vector_store_from_client = Chroma(
    client=chroma_client,
    collection_name="home_match",
    embedding_function=embeddings,
)

uuids = [str(uuid4()) for _ in range(len(documents))]

vector_store_from_client.add_documents(documents=documents, ids=uuids)

['57435bb1-59f1-4e3d-a54e-c8e145f00cce',
 'b461cb8b-0553-4426-8ba7-80d456b9b9aa',
 '87af360d-0bd9-473f-b455-23dbfb9c1389',
 '856f23ab-eac7-4a8f-8efc-3fcbbf8b5d0d',
 '2bea86cd-2b67-4dcd-ae3f-df70f68b13b1',
 '6cf375f4-f7fb-4075-b42a-0d5a56b8283b',
 '90478e80-dca0-417a-86c3-cfaa3c8492fc',
 '75f52d2a-fccf-4c8b-87d3-f8c0e2816402',
 '20657f8a-425c-481f-8f9c-00f46f5099d9',
 '8eecf272-94c8-45f8-a39c-416d8f1cc05d',
 'ff0d9787-d0dd-4f0b-bc4e-01e119f396f4']

#### Retrieve personalized listings

Retrieves personalized listings based on hard coded questions and answers in config.

Parse the output into a structured format (Pandas DataFrame) to be sent to downstream applications

In [95]:
# Set up structured LLM to provide the structured output needed for the Vector DB search
structured_llm = llm.with_structured_output(AttributeParser)

In [97]:
# Get personalization system prompt
personalization_system_prompt = PromptTemplate.from_file(
    "./prompts/personalization_system_prompt.txt",
    input_variables=[
        "attribute_questions",
        "attributed_answers",
    ],
)

personalization_system_prompt = personalization_system_prompt.format(
    attribute_questions=config["ATTRIBUTE_QUESTIONS"],
    attribute_answers=config["ATTRIBUTE_ANSWERS"],
)

  personalization_system_prompt = PromptTemplate.from_file(


In [98]:
# Get the input for the vector DB search from the LLM
structured_input = structured_llm.invoke(personalization_system_prompt)

# Parse the output into a where filter for chroma DB
filter = create_where_filter(structured_input)
all_answers = ": ".join(config["ATTRIBUTE_ANSWERS"])

In [14]:
# Retrieve personalized output
results = vector_store_from_client.similarity_search(
    all_answers,
    k=2,
    filter=json.loads(filter),
)

In [99]:
results

[Document(metadata={'bathrooms': 2, 'bedrooms': 4, 'house_size': 2200, 'neighborhood': 'Noe Valley', 'neighborhood_description': "Noe Valley is a charming and family-friendly neighborhood in San Francisco, known for its tree-lined streets, Victorian cottages, and vibrant community. If you're looking to buy a home, Noe Valley offers a peaceful and idyllic setting, with excellent schools, parks, and amenities. While housing prices can be competitive, the neighborhood's strong sense of community, proximity to popular attractions, and beautiful surroundings make it a highly desirable choice for those seeking a family-friendly and welcoming atmosphere.", 'price': 1700000, 'property_description': 'This Noe Valley gem offers a blend of modern elegance and classic charm. The open living area features a gas fireplace and French doors leading to a sunny deck. The gourmet kitchen is equipped with stainless steel appliances, quartz countertops, and a large island. The master suite boasts a walk-in

In [100]:
# Parse personalized output
column_names = [
    "neighborhood",
    "price",
    "bedrooms",
    "bathrooms",
    "size",
    "property_description",
    "neighborhood_description",
]
recommendations_df = pd.DataFrame(columns=column_names)

for i, res in enumerate(results):
    recommendations_df.loc[i, "neighborhood"] = res.metadata["neighborhood"]
    recommendations_df.loc[i, "price"] = res.metadata["price"]
    recommendations_df.loc[i, "bedrooms"] = res.metadata["bedrooms"]
    recommendations_df.loc[i, "bathrooms"] = res.metadata["bathrooms"]
    recommendations_df.loc[i, "size"] = res.metadata["house_size"]
    recommendations_df.loc[i, "property_description"] = res.metadata[
        "property_description"
    ]
    recommendations_df.loc[i, "neighborhood_description"] = res.metadata[
        "neighborhood_description"
    ]

In [101]:
recommendations_df

Unnamed: 0,neighborhood,price,bedrooms,bathrooms,size,property_description,neighborhood_description
0,Noe Valley,1700000,4,2,2200,"This Noe Valley gem offers a blend of modern elegance and classic charm. The open living area features a gas fireplace and French doors leading to a sunny deck. The gourmet kitchen is equipped with stainless steel appliances, quartz countertops, and a large island. The master suite boasts a walk-in closet and a spa-like bathroom with a jetted tub. Enjoy the landscaped backyard with a fire pit and a hot tub, perfect for relaxing or entertaining. Additional features include a home office, a wine cellar, and a two-car garage.","Noe Valley is a charming and family-friendly neighborhood in San Francisco, known for its tree-lined streets, Victorian cottages, and vibrant community. If you're looking to buy a home, Noe Valley offers a peaceful and idyllic setting, with excellent schools, parks, and amenities. While housing prices can be competitive, the neighborhood's strong sense of community, proximity to popular attractions, and beautiful surroundings make it a highly desirable choice for those seeking a family-friendly and welcoming atmosphere."
1,Potrero Hill,1500000,3,2,2000,"Perched on Potrero Hill, this modern home offers breathtaking views of the city skyline. The open-concept living area features floor-to-ceiling windows and a sleek fireplace. The chef's kitchen is equipped with high-end appliances and a large island. The master suite boasts a private terrace, a walk-in closet, and a spa-like bathroom with a rain shower. Enjoy the outdoor oasis with a landscaped garden, a fire pit, and a hot tub. Other features include a home gym, a wine cellar, and a two-car garage.","Potrero Hill is a vibrant and rapidly developing neighborhood in San Francisco, known for its stunning views of the city skyline and the bay. If you're looking to buy a home, Potrero Hill offers a mix of historic buildings, modern condos, and trendy lofts. While housing prices can be on the higher side, the neighborhood's proximity to popular attractions, vibrant atmosphere, and stunning views make it a desirable place to live. With its growing popularity, Potrero Hill offers a unique opportunity to be part of a thriving and dynamic community."


#### Augment propoerty description

Summarize the provided user preferences from the questions.  Augment the property description to highlight the attributes specified by the user.

In [102]:
# Create a summary of the user preferences
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are an AI agent that can summarize user preferences from a list of questions.",
        ),
        ("user", "{input}"),
    ]
)

output_parser = StrOutputParser()
chain = prompt | llm | output_parser

answers = config["ATTRIBUTE_ANSWERS"][2:4]
questions = config["ATTRIBUTE_QUESTIONS"][2:4]

summary_user_preferences = chain.invoke(
    input=f"Given the following {answers}:\n\n to these questions: \n\n {questions} summarize the user preferences given in the answers"
)

In [103]:
summary_user_preferences

'User preferences can be summarized as follows:\n- The user values a quiet neighborhood, good local schools, and convenient shopping options as the top priorities in choosing a property.\n- The user desires a backyard for gardening, a home office, and a large remodeled kitchen for entertaining as preferred amenities.'

In [105]:
# Augment the descriptions

for i, desc in enumerate(recommendations_df["property_description"].values):
    prompt = ChatPromptTemplate.from_messages(
        [
            (
                "system",
                "You are an AI agent that can take existing descriptions of real estate properties and highlight property attributes relevant to users based on supplied preferences",
            ),
            ("user", "{input}"),
        ]
    )

    chain = prompt | llm

    # Get augmentation prompt from the file
    augmentation_prompt = PromptTemplate.from_file(
        "./prompts/augmentation_prompt.txt",
        input_variables=[
            "recommendations",
            "summary_user_preferences",
        ],
    )

    augmentation_prompt = augmentation_prompt.format(
        recommendation=desc,
        summary_user_preferences=summary_user_preferences,
    )

    augmented_description = chain.invoke(input=augmentation_prompt)

    recommendations_df.loc[i, "augmented_description"] = augmented_description.content

  augmentation_prompt = PromptTemplate.from_file(
  augmentation_prompt = PromptTemplate.from_file(


In [106]:
pd.set_option('display.max_colwidth', None)
recommendations_df

Unnamed: 0,neighborhood,price,bedrooms,bathrooms,size,property_description,neighborhood_description,augmented_description
0,Noe Valley,1700000,4,2,2200,"This Noe Valley gem offers a blend of modern elegance and classic charm. The open living area features a gas fireplace and French doors leading to a sunny deck. The gourmet kitchen is equipped with stainless steel appliances, quartz countertops, and a large island. The master suite boasts a walk-in closet and a spa-like bathroom with a jetted tub. Enjoy the landscaped backyard with a fire pit and a hot tub, perfect for relaxing or entertaining. Additional features include a home office, a wine cellar, and a two-car garage.","Noe Valley is a charming and family-friendly neighborhood in San Francisco, known for its tree-lined streets, Victorian cottages, and vibrant community. If you're looking to buy a home, Noe Valley offers a peaceful and idyllic setting, with excellent schools, parks, and amenities. While housing prices can be competitive, the neighborhood's strong sense of community, proximity to popular attractions, and beautiful surroundings make it a highly desirable choice for those seeking a family-friendly and welcoming atmosphere.","This Noe Valley gem features a landscaped backyard with a fire pit and a hot tub, perfect for relaxing or entertaining. The gourmet kitchen, equipped with stainless steel appliances, quartz countertops, and a large island, is ideal for hosting gatherings. Additionally, the home includes a home office, perfect for remote work, and a wine cellar for enthusiasts. The property also offers a two-car garage for convenience."
1,Potrero Hill,1500000,3,2,2000,"Perched on Potrero Hill, this modern home offers breathtaking views of the city skyline. The open-concept living area features floor-to-ceiling windows and a sleek fireplace. The chef's kitchen is equipped with high-end appliances and a large island. The master suite boasts a private terrace, a walk-in closet, and a spa-like bathroom with a rain shower. Enjoy the outdoor oasis with a landscaped garden, a fire pit, and a hot tub. Other features include a home gym, a wine cellar, and a two-car garage.","Potrero Hill is a vibrant and rapidly developing neighborhood in San Francisco, known for its stunning views of the city skyline and the bay. If you're looking to buy a home, Potrero Hill offers a mix of historic buildings, modern condos, and trendy lofts. While housing prices can be on the higher side, the neighborhood's proximity to popular attractions, vibrant atmosphere, and stunning views make it a desirable place to live. With its growing popularity, Potrero Hill offers a unique opportunity to be part of a thriving and dynamic community.","Nestled atop Potrero Hill, this modern sanctuary offers stunning views of the city skyline. Step into the open-concept living area with expansive floor-to-ceiling windows that flood the space with natural light. The chef's kitchen, complete with high-end appliances and a spacious island, is perfect for entertaining guests. Retreat to the master suite featuring a private terrace and a luxurious spa-like bathroom with a rain shower. Outside, discover an outdoor oasis with a beautifully landscaped garden, ideal for gardening enthusiasts. Additional highlights include a home gym, a wine cellar, and a two-car garage for added convenience."


In [107]:
recommendations_df.to_csv("./output/final_recommendations.csv")