In [None]:
import os

os.environ["OPENAI_API_KEY"] = ""
os.environ["OPENAI_API_BASE"] = "https://openai.vocareum.com/v1"

from langchain.chat_models import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser
from langchain_core.messages import HumanMessage, SystemMessage

Syntetic Data Generation    

In [12]:
# Create a and initialize database with real estate listings
import random
from langchain_chroma import Chroma
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain_core.documents import Document
from uuid import uuid4

locations = [
    "San Francisco",
    "San Jose", 
    "Sunnyvale", 
    "Mountain View",
    "Palo Alto", 
    "Santa Clara", 
    "Cupertino", 
    "Milpitas"
    "Fremont", 
]

llm = ChatOpenAI(model="gpt-3.5-turbo")

embeddings = OpenAIEmbeddings()


vectorstore = Chroma(
    collection_name="example_collection",
    persist_directory="data",
    embedding_function=embeddings
)

parser = StrOutputParser()
print("Generating descriptions for the homes")
documents = []


for location in locations:
    bedrooms = random.randint(2, 5)
    bathrooms = random.randint(1, 3)
    house_size = random.randint(1000, 3000)
    price = random.randint(500000, 2000000)
    messages = [
        SystemMessage(f"""Generate a short description for the follwoing home. Please include detailed description of the home and surrounding area. The d
                      description should include information on the locality, transportation options, and nearby amenities. Please include 
                      details about the schools and parks in the area as well."""),
        HumanMessage(f"{bedrooms} bedroom, {bathrooms} bathroom house in {location} with {house_size} square feet for ${price}.")
    ]
    response = llm.invoke(messages)
    parser = StrOutputParser()
    doc = Document(page_content=parser.invoke(response))
    documents.append(doc)
    

uuids = [str(uuid4()) for _ in range(len(documents))]
vectorstore.add_documents(documents=documents, ids=uuids)

Generating descriptions for the homes


['5096aa82-45ff-4c09-a661-21678f8a67e8',
 '0e6bd6a3-6da3-4103-9bdd-346c15ab8cc6',
 'd30f3cee-0dc7-4769-b446-cb7dda7605c9',
 '1d334419-212b-420d-81cc-87d9edf225f5',
 '8188411c-993e-4fbd-a338-423d62a2c57f',
 '12a5a780-1922-4e2a-9132-14e67a371fd5',
 '4ca24883-93e3-442b-88e5-1f31c8ef3a79',
 '320bf8f8-a684-453c-9d24-ed911cfb536f']

In [13]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
def create_prompt():
    """
    Returns a prompt instructed to produce a rephrased question based on the user's
    last question, but referencing previous messages (chat history).
    """
    system_instruction = """Given a chat history of the user retrive the most similar home listing."""

    prompt = ChatPromptTemplate.from_messages([
        ("system", system_instruction),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}")])
    return prompt

In [6]:
from langchain.chains import create_history_aware_retriever

history_aware_retriever = create_history_aware_retriever(
    llm,
    vectorstore.as_retriever(),
    create_prompt()
)

In [14]:
chat_history = [
    ('ai', 'How big do you want your house to be?'),
    ('human', 'A comfortable three-bedroom house with a spacious kitchen and a cozy living room.'),
    ('ai', 'What are 3 most important things for you in choosing this property?'),
    ('human', 'A quiet neighborhood, good local schools, and convenient shopping options.'),
    ('ai', 'Which amenities would you like?'),
    ('human', 'A backyard for gardening, a two-car garage, and a modern, energy-efficient heating system.'),
    ('ai', 'Which transportation options are important to you?'),
    ('human', 'Easy access to a reliable bus line, proximity to a major highway, and bike-friendly roads.'),
    ('ai', 'How urban do you want your neighborhood to be?'),
    ('human', 'A balance between suburban tranquility and access to urban amenities like restaurants and theaters.')
]
docs = history_aware_retriever.invoke({'input': 'Which home to recommend?', 'chat_history': chat_history})

for i, doc in enumerate(docs):
    print(f'Chunk {i+1}:')
    print(doc.page_content)
    print()

Chunk 1:
This charming 4-bedroom, 2-bathroom house in San Jose offers 1389 square feet of living space for $1,048,681. Located in a peaceful residential neighborhood, this home features a spacious layout with plenty of natural light, a cozy living room, a modern kitchen with updated appliances, and a lovely backyard perfect for outdoor entertaining.

The surrounding area is family-friendly and offers a variety of amenities. The neighborhood is known for its excellent schools, including top-rated public and private options. There are also several parks nearby, providing ample opportunities for outdoor recreation and picnics.

For commuters, the location is convenient with easy access to major highways and public transportation options, making it a breeze to get around the city and beyond. Additionally, there are shopping centers, restaurants, and entertainment venues within a short drive, ensuring residents have everything they need right at their fingertips.

Chunk 2:
Welcome to this s