This is a starter notebook for the project, you'll have to import the libraries you'll need, you can find a list of the ones available in this workspace in the requirements.txt file in this workspace. 

In [1]:
import os

os.environ["OPENAI_API_KEY"] = "ai_key"
os.environ["OPENAI_API_BASE"] = "https://openai.vocareum.com/v1"

from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate

In [2]:
llm = ChatOpenAI(
    model='gpt-3.5-turbo-0125', 
    temperature=0.5,
    max_tokens=2500)

prompt = '''
Generate 25 diverse and realistic real estate listings in the United States.
Generated listings should be catchy and descriptive, so they encourage reviewers to buy such property.
Also they should contain facts, such as:
- price
- location
- available surface
- number of bedrooms and bathrooms
- property type
- amenities
- description of the neighborhood.

Separate listings by '---'.     
'''

In [3]:
result = llm.invoke(prompt)

print(result.content)

In [6]:
with open('real_est.txt', 'w') as listings_file:
    listings_file.write(result.content)

In [7]:
import chromadb
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma

chroma_client = chromadb.Client()
collection = chroma_client.create_collection(name="real_estate_listings")

loader = TextLoader("./real_est.txt")
files = loader.load()

text_splitter = CharacterTextSplitter(separator='\n---\n', chunk_size=400, chunk_overlap=0)
docs = text_splitter.split_documents(files)
len(docs)



Created a chunk of size 412, which is longer than the specified 400
Created a chunk of size 401, which is longer than the specified 400
Created a chunk of size 430, which is longer than the specified 400
Created a chunk of size 417, which is longer than the specified 400
Created a chunk of size 406, which is longer than the specified 400
Created a chunk of size 414, which is longer than the specified 400
Created a chunk of size 436, which is longer than the specified 400
Created a chunk of size 406, which is longer than the specified 400


25

In [9]:

embedding_model = OpenAIEmbeddings(openai_api_key=os.environ["OPENAI_API_KEY"], model="text-embedding-3-large")
db = Chroma.from_documents(docs, embedding_model, persist_directory='./chroma_db')




In [10]:
query = '''
I would like to live in one of state in the middle of US and I can spend up to $1.5 Million. 
I want to have an estate in gated community with a lot of amenities for children (e.g. school, playground).
Beside of that house should have at least 4 bedrooms, spacious kitchen and garden.
'''
docs = db.similarity_search(query)

for doc in docs:
    print(doc.page_content)



12. Equestrian Estate in Lexington, KY
Price: $3,000,000
Location: Lexington, KY
Surface: 10,000 sqft
Bedrooms: 7
Bathrooms: 5
Property type: Equestrian estate
Amenities: Horse stables, riding arena, guest house
Description: Live the equestrian lifestyle in this sprawling estate in Lexington with acres of land for horses and outdoor activities. Explore nearby horse farms and bourbon distilleries.
7. Urban Oasis in Brooklyn, NY
Price: $1,000,000
Location: Brooklyn, NY
Surface: 1,800 sqft
Bedrooms: 3
Bathrooms: 2
Property type: Townhouse
Amenities: Private garden, rooftop deck, chef's kitchen
Description: Escape the hustle and bustle of the city in this serene urban oasis in Brooklyn. Enjoy the vibrant arts scene, trendy cafes, and diverse culture of the neighborhood.
5. Waterfront Estate in Newport, RI
Price: $4,000,000
Location: Newport, RI
Surface: 6,500 sqft
Bedrooms: 5
Bathrooms: 6
Property type: Waterfront estate
Amenities: Private dock, pool, guest house
Description: Experience wa

In [29]:
from langchain.llms import OpenAI
from langchain.chains import RetrievalQA
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",  # You can use "map_reduce" for more complex queries
    retriever=db.as_retriever()
)

In [31]:
query = "Show me listings with 3 bedrooms under $500,000."
query = "Show me top 2 lowest priced listing for bedroom >4 "
response = qa_chain.run(query)
print(response)



The two lowest priced listings with more than 4 bedrooms are:

1. Ski-In/Ski-Out Chalet in Park City, UT
Price: $2,500,000
Location: Park City, UT
Surface: 4,000 sqft
Bedrooms: 6
Bathrooms: 5
Property type: Ski chalet
Amenities: Hot tub, game room, mountain views

2. Mountain Retreat in Aspen, CO
Price: $2,500,000
Location: Aspen, CO
Surface: 5,000 sqft
Bedrooms: 6
Bathrooms: 4
Property type: Mountain home
Amenities: Ski-in/ski-out access, hot tub, fireplace


## Creating Streamlit App for the Real Estate Listings

In [33]:
#!pip install streamlit

In [8]:
import streamlit as st
from langchain.llms import OpenAI
from langchain.chains import RetrievalQA
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
import os

os.environ["OPENAI_API_KEY"] = "api_key"
os.environ["OPENAI_API_BASE"] = "https://openai.vocareum.com/v1"


# Initialize OpenAI LLM and embeddings
llm = OpenAI(openai_api_key=os.environ["OPENAI_API_KEY"],model_name="gpt-3.5-turbo-0125", temperature=0.5, max_tokens=2500)
embedding_model = OpenAIEmbeddings(openai_api_key=os.environ["OPENAI_API_KEY"], model="text-embedding-3-large")

# Function to create listings
def create_listings(city, country):
    prompt = f"Create a real estate listing for {city}, {country}."
    listing = llm(prompt)
    return listing

# Function to save listings to Chroma DB
def save_to_chroma(listings):
    docs = [{"content": listing} for listing in listings]
    db = Chroma.from_documents(docs, embedding_model, persist_directory='./chroma_db')
    return db

# Streamlit UI
st.title("Real Estate Listing Creator and Query Tool")

# Section to create listings
st.header("Create Listings")
city = st.text_input("City")
country = st.text_input("Country")
if st.button("Create Listings"):
    listing = create_listings(city, country)
    st.write("Generated Listing:")
    st.write(listing)
    
    if st.button("Save to Chroma DB"):
        db = save_to_chroma([listing])
        st.success("Listing saved to Chroma DB!")

# Section to query Chroma DB
st.header("Query Listings")
query = st.text_input("Enter your query")
if st.button("Search Listings"):
    db = Chroma(persist_directory='./chroma_db', embedding_function=embedding_model)
    qa_chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",  # You can use "map_reduce" for more complex queries
        retriever=db.as_retriever()
    )
    response = qa_chain.run(query)
    st.write("Query Result:")
    st.write(response)


2024-08-16 17:42:00.872 
  command:

    streamlit run /opt/conda/lib/python3.10/site-packages/ipykernel_launcher.py [ARGUMENTS]
2024-08-16 17:42:00.873 Session state does not function when running a script without `streamlit run`
