In [1]:
# Importing flask for setting up basic server
from flask import Flask, request
from flask_cors import CORS

app = Flask(__name__)
CORS(app)

# Importing requests, BeautifulSoup for scraping property site
import requests
from bs4 import BeautifulSoup
import bs4
import getpass
import os

# Importing necessary langchain modules
from langchain import hub
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import ChatOpenAI
from langchain_pinecone import PineconeVectorStore
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

# Importing langsmith Client for communication
from langsmith import Client

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [2]:
# Getting main listing page
url = "https://kleinpennyrentals.com/list"
propertyListingPage = requests.get(url)
propertyListingSoup = BeautifulSoup(propertyListingPage.content, "html.parser")

In [3]:
propertyLinks = []

# Getting all properties in the main listing page
properties = propertyListingSoup.find_all("td", {"class": "views-field-title"})

baseURL = "https://kleinpennyrentals.com"
# Looping through to get each property link
for property in properties:
  link = property.find_all("a")[0]["href"]
  propertyLinks.append(baseURL + link)

In [4]:
# Setting up OpenAPI key, PineCone API, LangChain API
os.environ["OPENAI_API_KEY"] = getpass.getpass()
os.environ['PINECONE_API_KEY'] = getpass.getpass()
os.environ["LANGCHAIN_API_KEY"] = getpass.getpass() 

# Choosing llm model --> using 4o-mini 
llm = ChatOpenAI(model="gpt-4o-mini")

 ········
 ········
 ········


In [5]:
# Load, chunk and index the contents of the property sites
loader = WebBaseLoader(
    web_paths=propertyLinks
)

In [10]:
# Setting up pinecone index
from pinecone import Pinecone, ServerlessSpec

# Creates new pinecone index for openai embeddings 
def createNewIndex(indexName):
    pinecone_index.create_index(
        name=index_name,
        dimension=1536, # Replace with your model dimensions
        metric="cosine", # Replace with your model metric
        spec=ServerlessSpec(
            cloud="aws",
            region="us-east-1"
        ) 
    )

# Connecting to pinecone
pinecone_index = Pinecone(api_key=os.environ['PINECONE_API_KEY'])

index_name = "klein-penny-index"
allIndexes = [i["name"] for i in pinecone_index.list_indexes()]

# Searching to see if user has index for klein-penny setup and setting it up if not
if index_name not in allIndexes:
    # Creating index in pinecone
    createNewIndex(index_name)

    # Loading documents from the sites
    docs = loader.load()
    
    # Splitting the text and creating the vector store with PineCone
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=2500, chunk_overlap=200)
    splits = text_splitter.split_documents(docs)
    vectorstore = PineconeVectorStore.from_documents(
        splits,
        index_name=index_name,
        embedding=OpenAIEmbeddings()
    )
# Already has an index so no need to create it
else:
    vectorstore = PineconeVectorStore(index_name=index_name, embedding=OpenAIEmbeddings())


In [11]:
client = Client()

In [12]:
# Retrieve and generate using the relevant snippets of the sites
retriever = vectorstore.as_retriever(
    search_type="mmr",
    search_kwargs={"k": 5, "fetch_k": 5, "lambda_mult": 0.5},
)

In [13]:
# Using custom prompt to get property info
system_prompt = (
    "You are given a list of documents (each associated with a property) and their associated descriptions, titles, and other information as context. "
    "Use the following pieces of retrieved context to give properties that match the user's question."
    "If you don't know the answer, say that you "
    "don't know. If you use any information from the context, list its metadata in your answer." 
    ""
    "\n\n"
    "Context: {context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

# Creating chain so we can grab source from context
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [14]:
response = rag_chain.invoke({"input": "Find me an apartment for 3 people with hard wood floors."})

In [15]:
print(response["answer"])
# response["context"][0]
print("\n\nYou can check out more related properties here: ")
for found in response["context"]:
    print(found.metadata["title"] + ": " + found.metadata["source"])

Here are two apartments that match your criteria of being suitable for 3 people and having hardwood floors:

1. **20 N Congress St Apt 4**
   - **Max Occupancy:** 3
   - **Price:** $700 per bedroom per month
   - **Lease:** May to May
   - **Amenities:** Hardwood floors, 5 person hot tub, central air conditioning, ceramic tiled bath, dishwasher, large closet, lots of natural light, washer and dryer.
   - **Available:** May 2026
   - **Security Deposit:** $700 per person
   - **Note:** No pets allowed.

   *(Source: 20 N Congress, Apr 4)*

2. **12 N Congress St Apt 6**
   - **Max Occupancy:** 3
   - **Price:** $566 per bedroom per month
   - **Lease:** May to May
   - **Amenities:** Hardwood floors, air conditioning, ceramic tiled bath, deck, dishwasher, laundry room, microwave.
   - **Available:** May 2026
   - **Security Deposit:** $566 per person
   - **Note:** No pets allowed.

   *(Source: 12 N Congress, Apt 6)*

Both options are available for 3 people and include hardwood floors.


In [None]:
# Start of testing for model
import deepeval
from deepeval import assert_test
from deepeval.test_case import LLMTestCase
from deepeval.metrics import AnswerRelevancyMetric

inputs = [
    "Help me find a 3 bedroom apartment with a new bathroom and high ceilings.",
    "Help me find an apartment that has a washer, dryer, front porch, is 600-700 a month, and has air conditioning.",
    "Are there any apartments with 8 bedrooms and hard wood floors?"
]

expected_outputs = [
    "12 N Congress St Apt 1, 12 N Congress St Apt 4, 12 N Congress St Apt 5",
    "20 N Congress St Apt 1",
    "36 W State St"
    
    
]

# Running through all tests to evaluate accuracy
tests = []
for testIndex in range(len(inputs)):
    # Getting response from model
    response = rag_chain.invoke({"input": inputs[testIndex]})
    currentContext = [str(i) for i in response["context"]]
    # print(response["answer"])
    test_case = LLMTestCase(
        input=inputs[0],
        expected_output=expected_outputs[testIndex],
        actual_output=" ".join([i.metadata["title"] for i in response["context"]]),
        retrieval_context=currentContext
    )

    tests.append(test_case)


In [None]:
# Answer Relevancy -- fails on 3
for test in tests:
    assert_test(test, [AnswerRelevancyMetric()])

In [None]:
from deepeval.metrics import FaithfulnessMetric
from deepeval.metrics import ContextualRecallMetric
from deepeval.metrics import ContextualPrecisionMetric
from deepeval.metrics import ContextualRelevancyMetric

In [None]:
# Faithfulness -- all pass
for test in tests:
    assert_test(test, [FaithfulnessMetric()])

In [None]:
# Contextual Recall -- fails on 3rd
for test in tests:
    assert_test(test, [ContextualRecallMetric()])

In [None]:
# Contextual Precisison -- fail
for test in tests:
    assert_test(test, [ContextualPrecisionMetric()])

In [None]:
# ContextualRelevancyMetric -- fail on 2
for test in tests:
    assert_test(test, [ContextualRelevancyMetric()])