In [1]:
# Importing requests, BeautifulSoup for scraping property site
import requests
from bs4 import BeautifulSoup
import bs4
import getpass
import os

# Importing necessary langchain modules
from langchain import hub
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import ChatOpenAI
from langchain_pinecone import PineconeVectorStore
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

# Importing langsmith Client for communication
from langsmith import Client

from uuid import uuid4

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [5]:
# Getting main listing page
url = "https://kleinpennyrentals.com/list"
propertyListingPage = requests.get(url)
propertyListingSoup = BeautifulSoup(propertyListingPage.content, "html.parser")

In [6]:
propertyLinks = []

# Getting all properties in the main listing page
properties = propertyListingSoup.find_all("td", {"class": "views-field-title"})

baseURL = "https://kleinpennyrentals.com"
# Looping through to get each property link
for property in properties:
  link = property.find_all("a")[0]["href"]
  propertyLinks.append(baseURL + link)

In [2]:
# Setting up OpenAPI key, PineCone API, LangChain API
os.environ["OPENAI_API_KEY"] = getpass.getpass()
os.environ['PINECONE_API_KEY'] = getpass.getpass()
os.environ["LANGCHAIN_API_KEY"] = getpass.getpass() 

# Choosing llm model --> using 4o-mini 
llm = ChatOpenAI(model="gpt-4o-mini")

 ········
 ········
 ········


In [7]:
# Load, chunk and index the contents of the property sites
loader = WebBaseLoader(
    web_paths=propertyLinks
)

In [4]:
# Setting up pinecone index --> not necessary now
from pinecone import Pinecone, ServerlessSpec

pinecone_index = Pinecone(api_key=os.environ['PINECONE_API_KEY'])

index_name = "klein-penny-index"

# pinecone_index.create_index(
#     name=index_name,
#     dimension=1536, # Replace with your model dimensions
#     metric="cosine", # Replace with your model metric
#     spec=ServerlessSpec(
#         cloud="aws",
#         region="us-east-1"
#     ) 
# )

In [8]:
docs = loader.load()

In [9]:
# Splitting the text and creating the vector store with PineCone
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2500, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

# vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())
# vectorstore = PineconeVectorStore(index_name=index_name, embedding=OpenAIEmbeddings())
vectorstore = PineconeVectorStore.from_documents(
        splits,
        index_name=index_name,
        embedding=OpenAIEmbeddings()
    )

In [10]:
unique_id = uuid4().hex[0:8]
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = f"Tracing Walkthrough - {unique_id}"
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"

In [11]:
client = Client()

In [12]:
# Retrieve and generate using the relevant snippets of the blog.
# retriever = vectorstore.as_retriever()
retriever = vectorstore.as_retriever(
    search_type="mmr",
    search_kwargs={"k": 5, "fetch_k": 5, "lambda_mult": 0.5},
)
# prompt = hub.pull("rlm/rag-prompt")

In [19]:
# Using custom prompt to get property info
system_prompt = (
    "You are given a list of documents (each associated with a property) and their associated descriptions, titles, and other information as context. "
    "Use the following pieces of retrieved context to give properties that match the user's question."
    "If you don't know the answer, say that you "
    "don't know. If you use any information from the context, list its metadata in your answer." 
    ""
    "\n\n"
    "Context: {context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

# Creating chain so we can grab source from context
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [41]:
response = rag_chain.invoke({"input": "Help me find an apartment with a cast-iron and porcelain Kohler tub"})

In [42]:
print(response["answer"])
# response["context"][0]
print("\n\nYou can check out more related properties here: ")
for found in response["context"]:
    print(found.metadata["title"] + ": " + found.metadata["source"])

I found a couple of apartments that feature a cast-iron and porcelain Kohler tub:

1. **20 N Congress, Apt 5**
   - **Bedrooms:** 4
   - **Bathrooms:** 4
   - **Price:** $700 per bedroom per month
   - **Available:** May 2026
   - **Description:** Each bedroom has its own private bath featuring ceramic tile floors and tub surround, including a Kohler cast iron porcelain tub.
   - **Amenities:** 5 person hot tub, central air conditioning, hardwood floors, large closets, laundry room, dishwasher, and more.
   - **Security Deposit:** $700 per person
   - **Metadata:** 20 N Congress St Apt 5

2. **20 N Congress, Apt 4**
   - **Bedrooms:** 4
   - **Bathrooms:** 4
   - **Price:** $700 per bedroom per month
   - **Available:** May 2026
   - **Description:** Each bedroom has a private bathroom with ceramic tile floor and tub surround, and a Kohler cast iron porcelain tub.
   - **Amenities:** 5 person hot tub, central air conditioning, hardwood floors, laundry room, dishwasher, and more.
   - *

In [52]:
# Start of testing for model
import deepeval
from deepeval import assert_test
from deepeval.test_case import LLMTestCase
from deepeval.metrics import AnswerRelevancyMetric

inputs = [
    "Help me find a 3 bedroom apartment with a new bathroom and high ceilings.",
    "Help me find an apartment that has a washer, dryer, front porch, is 600-700 a month, and has air conditioning.",
    "Are there any apartments with 8 bedrooms and hard wood floors?"
]

expected_outputs = [
    "12 N Congress St Apt 1, 12 N Congress St Apt 4, 12 N Congress St Apt 5",
    "20 N Congress St Apt 1",
    "36 W State St"
    
    
]

# Running through all tests to evaluate accuracy
tests = []
for testIndex in range(len(inputs)):
    # Getting response from model
    response = rag_chain.invoke({"input": inputs[testIndex]})
    currentContext = [str(i) for i in response["context"]]
    # print(response["answer"])
    test_case = LLMTestCase(
        input=inputs[0],
        expected_output=expected_outputs[testIndex],
        actual_output=" ".join([i.metadata["title"] for i in response["context"]]),
        retrieval_context=currentContext
    )

    tests.append(test_case)


In [53]:
# Answer Relevancy -- fails on 3
for test in tests:
    assert_test(test, [AnswerRelevancyMetric()])

Event loop is already running. Applying nest_asyncio patch to allow async execution...


Evaluating 1 test case(s) in parallel: |█████████████████████████████|100% (1/1) [Time Taken: 00:06,  6.92s/test case]


Event loop is already running. Applying nest_asyncio patch to allow async execution...


Evaluating 1 test case(s) in parallel: |█████████████████████████████|100% (1/1) [Time Taken: 00:14, 14.00s/test case]


Event loop is already running. Applying nest_asyncio patch to allow async execution...


Evaluating 1 test case(s) in parallel: |█████████████████████████████|100% (1/1) [Time Taken: 00:10, 10.49s/test case]


AssertionError: Metrics: Answer Relevancy (score: 0.0, threshold: 0.5, strict: False, error: None) failed.

In [61]:
from deepeval.metrics import FaithfulnessMetric
from deepeval.metrics import ContextualRecallMetric
from deepeval.metrics import ContextualPrecisionMetric
from deepeval.metrics import ContextualRelevancyMetric

In [None]:
# Faithfulness -- all pass
for test in tests:
    assert_test(test, [FaithfulnessMetric()])

In [57]:
# Contextual Recall -- fails on 3rd
for test in tests:
    assert_test(test, [ContextualRecallMetric()])

Event loop is already running. Applying nest_asyncio patch to allow async execution...


Evaluating 1 test case(s) in parallel: |█████████████████████████████|100% (1/1) [Time Taken: 00:07,  7.26s/test case]


Event loop is already running. Applying nest_asyncio patch to allow async execution...


Evaluating 1 test case(s) in parallel: |█████████████████████████████|100% (1/1) [Time Taken: 00:05,  5.09s/test case]


Event loop is already running. Applying nest_asyncio patch to allow async execution...


Evaluating 1 test case(s) in parallel: |█████████████████████████████|100% (1/1) [Time Taken: 00:02,  2.90s/test case]


AssertionError: Metrics: Contextual Recall (score: 0.0, threshold: 0.5, strict: False, error: None) failed.

In [60]:
# Contextual Precisison -- fail
for test in tests:
    assert_test(test, [ContextualPrecisionMetric()])

Event loop is already running. Applying nest_asyncio patch to allow async execution...


Evaluating 1 test case(s) in parallel: |█████████████████████████████|100% (1/1) [Time Taken: 00:05,  5.83s/test case]


Event loop is already running. Applying nest_asyncio patch to allow async execution...


Evaluating 1 test case(s) in parallel: |█████████████████████████████|100% (1/1) [Time Taken: 00:07,  7.04s/test case]


AssertionError: Metrics: Contextual Precision (score: 0.3333333333333333, threshold: 0.4, strict: False, error: None) failed.

In [62]:
# ContextualRelevancyMetric -- fail on 2
for test in tests:
    assert_test(test, [ContextualRelevancyMetric()])

Event loop is already running. Applying nest_asyncio patch to allow async execution...


Evaluating 1 test case(s) in parallel: |█████████████████████████████|100% (1/1) [Time Taken: 00:31, 31.04s/test case]


Event loop is already running. Applying nest_asyncio patch to allow async execution...


Evaluating 1 test case(s) in parallel: |█████████████████████████████|100% (1/1) [Time Taken: 00:38, 38.88s/test case]


AssertionError: Metrics: Contextual Relevancy (score: 0.18604651162790697, threshold: 0.5, strict: False, error: None) failed.