In [66]:
%run config.ipynb
set_environment()

# Import Statements

In [77]:
from langchain_aws import ChatBedrock
from langchain_aws.embeddings.bedrock import BedrockEmbeddings
from langchain_community.vectorstores import DocumentDBVectorSearch
from langchain_community.vectorstores.documentdb import DocumentDBSimilarityType
from pymongo import MongoClient

from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter

from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_history_aware_retriever

from langchain_core.prompts import ChatPromptTemplate
from langchain_core.messages import AIMessage, HumanMessage

import bs4
from langchain_community.document_loaders import WebBaseLoader

USER_AGENT environment variable not set, consider setting it to identify your requests.


# LLM Model

In [68]:
llm = ChatBedrock(
    model_id="anthropic.claude-3-sonnet-20240229-v1:0",
    #model_id="meta.llama3-70b-instruct-v1:0",
    model_kwargs=dict(temperature=1)
)
embedModel=BedrockEmbeddings(model_id='amazon.titan-embed-text-v2:0')

In [69]:
dbName='RealEstateRag'
dbCollection='ApartmentsDescSearch'
indexName='AptDescEmbedIndex'
dbConnectionStr=os.environ['DOCDB_CONNECTION_STR']

client: MongoClient = MongoClient(dbConnectionStr)
Ragdatabase=client[dbName]
Ragcollection = Ragdatabase[dbCollection]
Ragdatabase.command( { "createIndexes": dbCollection, 
    "indexes": [{ 
    "key": { "vectorContent": "vector" }, 
    "vectorOptions": { "type": "hnsw", "dimensions": 1024, "similarity": "cosine", "m": 16, "efConstruction": 64 }, 
    "name": indexName }] } );

  client: MongoClient = MongoClient(dbConnectionStr)


In [70]:
rentalDataCollection='RentalApartments'
RentalCollection=Ragdatabase[rentalDataCollection]

# 1. Load, chunk and index the Description field of RentalApartments Collection

In [71]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
dbVectorDocs=[]
for doc in RentalCollection.find({},{'description':1}):
    langDoc=[Document(page_content=doc['description'],metadata={"source":doc['_id']})]
    splits = text_splitter.split_documents(langDoc)
    dbVectorDocs.extend(splits)
print(len(dbVectorDocs))

5000


In [72]:
dbVectorDocs[489]

Document(metadata={'source': ObjectId('66ccf5ac4b623a226706f63b')}, page_content='Welcome to this 1-bedroom, 1-bathroom studio located in the heart of Rochelle Park. With 707 square feet of living space, this unit offers stunning views from the 12th floor. Enjoy the convenience of top-of-the-line appliances, hardwood floors, and a private balcony.\nPet-friendly: No. Parking: Available.')

In [73]:
vectorstore = DocumentDBVectorSearch.from_documents(
    documents=dbVectorDocs, 
    embedding=embedModel,
    collection=Ragcollection,
    index_name=indexName)

# Query the Description field using prompt

In [74]:
retriever = vectorstore.as_retriever()

In [75]:
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

#This chain will pass a collection of documents to the llm
question_answer_chain = create_stuff_documents_chain(llm, prompt)
#This chain will get the documents from retriever and pass it to the chain specified as argument
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

response = rag_chain.invoke({"input": "Give me a list of 3 bedrooms apartment in Newark?"})

In [76]:
response["answer"]

'Based on the information provided, here is a list of the 3 bedroom apartments available in Newark:\n\n1. 3 bedroom, 2 bathroom, 2037 sq ft apartment\nRent: $1966.30 per month\nAvailable from: 9/26/2024\nContact: Edith Leannon at 1-217-491-4065 x59958\n\n2. 3 bedroom, 1 bathroom, 1049 sq ft apartment  \nRent: $4471.39 per month\nAvailable from: 3/20/2025\nContact: Ervin Reichel at (848) 721-3722 x655\n\n3. 3 bedroom, 1 bathroom, 2862 sq ft apartment\nRent: $1794.62 per month  \nAvailable from: 8/19/2025\nContact: Tyrone Bernhard at 672.382.5350\n\n4. 3 bedroom, 2 bathroom, 976 sq ft apartment\nRent: $2793.60 per month\nAvailable from: 8/25/2025\nContact: Ms. Wendy Steuber at (707) 953-9101 x202'

# Insert Website Content in the vector Db

In [92]:
bs4_strainer = bs4.SoupStrainer(class_=("single-content", "single-title", "single-excerpt"))
web_paths=[
    "https://www.zillow.com/research/july-2024-rent-report-34276/",
    "https://www.zillow.com/research/renters-consider-buying-33973/"
]
webdocs=[]
for i in range(len(web_paths)):
    loader = WebBaseLoader(
        web_path=web_paths[i],
        bs_kwargs={"parse_only": bs4_strainer},
    )
    webdoc=loader.load()
    #for dc in webdoc:
        #webdocs.append(dc)
    all_splits = text_splitter.split_documents(webdoc)
    webdocs.extend(all_splits)

In [93]:
print(len(webdocs))

16


In [94]:
print(webdocs[5])

page_content='Single-Family Rents

The typical U.S. asking rent for single-family homes is $2,294 as of July, up 0.4% month over month. 
Single-family rents are now up 4.7% from last year.
Since the beginning of the pandemic, single-family rents have increased by 40.1%.
Single-family rents fell, on a monthly basis, in two major metro areas: Milwaukee (-0.7%), Austin (-0.02%). 
Single-family rents are up from year-ago levels in 49 of the 50 largest metro areas. Annual single-family rent increases are highest in Cleveland (8.6%), Cincinnati (7.8%), Indianapolis (7.5%), Columbus (7.2%) and Louisville (7.2%).

Multifamily Rents' metadata={'source': 'https://www.zillow.com/research/july-2024-rent-report-34276/'}
