This is a starter notebook for the project, you'll have to import the libraries you'll need, you can find a list of the ones available in this workspace in the requirements.txt file in this workspace. 

# Step 1: Setting Up the Python Application

In [None]:
#!pip install -r requirements.txt
#!pip install openai
#!pip install langchain
#!pip install langchain_core
#!pip install langchain_openai
#!pip install lancedb
!pip install pandas

In [None]:
import os
import pandas as pd
import chromadb
from langchain import LLMChain
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chains.question_answering import load_qa_chain
from langchain.vectorstores import Chroma
from langchain.chat_models import ChatOpenAI
from io import StringIO

In [None]:
os.environ["OPENAI_API_KEY"] = "YOUR API KEY"

MODEL_NAME = "gpt-3.5-turbo"
MAX_TOKENS = 3000

llm = ChatOpenAI(model_name=MODEL_NAME, temperature=0.7, max_tokens=MAX_TOKENS)

# Step 2: Generating Real Estate Listings

In [None]:
LISTINGS_FILE = "listings.csv"

gen_listing_template = """You are an expert real estate agent in New York City.
Generate {num_listings} real estate listings in CSV format. 
Each listing should represent a realistic property and include the following fields:
name: Grey house,
neighborhood: A neighborhood, city, or region where the property is located.
price: The asking price for the property in USD.
bedrooms: The number of bedrooms.
bathrooms: The number of bathrooms.
house_size: The size of the property in square feet.
description: A detailed, appealing paragraph describing the property, including unique features, amenities, nearby attractions, and potential selling points.
neighborhood_description: A detailed, appealing paragraph describing the neighborhood, including comunity access, facilities etc
Additional Guidelines:
Realistic Listings: Use real-world trends and plausible details for the properties. Avoid overly generic descriptions.
Variety: Include a mix of property types (e.g., apartments, single-family homes, townhouses, luxury condos) and diverse locations.
Creativity and Specificity: Write vivid and engaging descriptions that highlight unique aspects of each property (e.g., architectural style, appliances, views, proximity to landmarks).
Accuracy: Ensure consistency between fields like location, list_price, and description. For example, a high-end listing should have features that justify its price.

Be creative in your listings. The CSV format is a must, separator is ";", one line per listing, labels in first line.
"""

# create prompt
prompt = PromptTemplate.from_template(gen_listing_template).format(num_listings=20)

#generate listing
real_estate_listings = llm.invoke(prompt).content

print(real_estate_listings)

#make a data frame
df = pd.read_csv(StringIO(real_estate_listings), sep=';')
# save the data frame to a CSV file
df.to_csv('listings.csv', index=False)

# Step 3: Storing Listings in a Vector Database

In [None]:
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

# initialize and configure ChromaDB to store real estate listings
CHROMA_PATH = "chroma-db"

# load documents
loader = CSVLoader(file_path='./listings.csv')
documents = loader.load()


# split document
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=10,
    length_function=len,
    add_start_index=True,
)
chunks = text_splitter.split_documents(documents)
print(f"Split {len(documents)} documents into {len(chunks)} chunks.")

embeddings = OpenAIEmbeddings()

# save to vector database
db = Chroma.from_documents(
    chunks, embeddings, 
    persist_directory=CHROMA_PATH,
    collection_name="listings"
)
db.persist()
print(f"Saved {len(chunks)} chunks to {CHROMA_PATH}.")

# Step 4: Building the User Preference Interface

In [None]:
 user_preferences = " ".join([
    "A comfortable three-bedroom house with a spacious kitchen and a cozy living room.",
    "A quiet neighborhood, good local schools, and convenient shopping options.",
    "A backyard for gardening, a two-car garage, and a modern, energy-efficient heating system.",
    "Easy access to a reliable bus line, proximity to a major highway, and bike-friendly roads.",
    "A balance between suburban tranquility and access to urban amenities like restaurants and theaters."
    ])

# Step 5: Searching Based on Preferences

In [None]:
 # perform similarity search
matched_listings = db.similarity_search(user_preferences, k=3)  
matching_listings = []

for doc in matched_listings:
    print(f"{doc.page_content}, \n\nMetadata: {doc.metadata}")
    print("---")
    
if matched_listings:
    df = pd.read_csv('./listings.csv')
    for match in matched_listings:
        row_index = match.metadata['row']
        matching_listing = df.iloc[row_index]
        print(f"\nMatched Listing:\n\n{matching_listing}")
        matching_listings.append({"text": matching_listing, "row": row_index})

if not matching_listings:
    print("No matching listings found.")



# Step 6: Personalizing Listing Descriptions

In [None]:
def enhance_listing(user_preferences, original_listings):

    enhanced_listings = []

    for matching_listing in original_listings:
        prompt = f"""
        Generate an updated real estate listing by aligning the general description with the user's preferences while preserving factual details.

        Instructions:
        - Use the provided data from the original listing (below) to fill in the fields.
        - Do not change factual details such as the name, neighborhood, neighborhood_description, price, number of bedrooms, bathrooms, or house size.
        - Update the description to reflect the user's stated {user_preferences}, but still combine it with the original description

        Fill in the fields using the exact data from the original listing
        {matching_listing["text"]}
        Output format must be: 
        name:
        neighborhood:
        price: 
        bedrooms: 
        bathrooms: 
        house size: 
        =====
        description:
        """

        response = llm.invoke(prompt)
        new_listing = response.content.strip()
        enhanced_listings.append(new_listing)

    return enhanced_listings

# enhance original listing
enhanced_listings = enhance_listing(user_preferences, matching_listings)

# print all newly updated listings
print("\nPersonalized listing:\n")
for updated_listing in enhanced_listings:
    print(updated_listing)
    print("\n---\n")  
