This is a starter notebook for the project, you'll have to import the libraries you'll need, you can find a list of the ones available in this workspace in the requirements.txt file in this workspace. 

In [2]:
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.llms import OpenAI
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain import LLMChain
from langchain.chains.question_answering import load_qa_chain
from langchain.schema import Document, AIMessage, HumanMessage, SystemMessage
from langchain.memory import ConversationSummaryMemory, ConversationBufferMemory, CombinedMemory, ChatMessageHistory
from langchain.chains import ConversationChain
from langchain.llms import OpenAI

import os

os.environ["OPENAI_API_KEY"] = "XXXXXX"
os.environ["OPENAI_API_BASE"] = "https://openai.vocareum.com/v1"


# Synthetic Data Generation

In [24]:
# Initialize llm model
model_name = 'gpt-3.5-turbo'
llm = OpenAI(model_name=model_name, temperature=0.7, max_tokens=2000)

prompt_template = PromptTemplate(
    input_variables=["num_rows"],
    template="""
Generate {num_rows} rows of CSV data with strings enclosed in quotes based on the following structure and restrictions:

Columns and restrictions:
- ID (integer, starting from 1001 incrementing by 1)
- URL (string, URL of the House, format:"https://prea.com/home/ID")
- Neighborhood (string, Neighborhood name of the House)
- Price (integer, Price of the house between 100000 and 1000000)
- Bedrooms (integer, Number of Bedrooms between 1 and 5)
- Bathrooms (integer, Number of Bathrooms between 1 and 5)
- House Size (integer, Size of the house between 1000 and 10000)
- Description (string, a brief summary of the House, 100-150 characters)
- Neighbourhood Description (string, a brief summary of the Neighbourhood, 100-300 characters)

The information should be about various houses and the neighborhood. Provide both positive and negative descriptions of the neighborhood.
Include a header row. Only return the CSV data, nothing else
""")

# Create an LLMChain
chain = LLMChain(llm=llm, prompt=prompt_template)

# Generate CSV data
num_rows = 10
csv_data = chain.run(num_rows=num_rows)
print(csv_data)

"ID","URL","Neighborhood","Price","Bedrooms","Bathrooms","House Size","Description","Neighbourhood Description"
1001,"https://prea.com/home/1001","Sunset Valley",450000,3,2,2500,"Beautiful family home with stunning views of the valley","Sunset Valley is a peaceful neighborhood with friendly neighbors and great schools."
1002,"https://prea.com/home/1002","Pinecrest Heights",780000,4,3,3500,"Luxurious modern mansion with all the amenities","Pinecrest Heights is a prestigious neighborhood known for its upscale homes and exclusive country club."
1003,"https://prea.com/home/1003","Willow Creek",320000,2,1,1500,"Cozy cottage perfect for first-time buyers","Willow Creek is a quaint neighborhood with tree-lined streets and a charming downtown area."
1004,"https://prea.com/home/1004","Riverside Meadows",550000,3,3,2800,"Spacious waterfront property with a private dock","Riverside Meadows offers a serene setting for nature lovers but can be prone to flooding during heavy rains."
1005,"https://pr

# Sematic Search using Vector DB

In [3]:
# Initialize the embedding function
embeddings = OpenAIEmbeddings()

# Initialize llm model
model_name = 'gpt-3.5-turbo'
llm = OpenAI(model_name=model_name, temperature=0, max_tokens=2000)

# Load data from CSV file
loader = CSVLoader(file_path='./listings.csv', csv_args={
    'delimiter': ',',
    'quotechar': '"'
   # 'fieldnames': ['ID','URL','Neighborhood','Price', 'Bedrooms','Bathroom','House Size', 'Description', 'Neighborhood Description']
    }
  
)

documents = loader.load()
   

# Create the Chroma vector store
vectorstore = Chroma.from_documents(
    documents=documents,
    embedding=embeddings,
    collection_name="csv_docs"
)

# Perform a semantic search without returning source documents
query = "3 Bedroom home with easy public access"
rag = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=vectorstore.as_retriever())
print(rag.run(query))

print("==========================")

# Perform a semantic search and return relevant source documents
# so we can perform further augmentation

query = "Provide details of a home with serene waterfront"
rag = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=vectorstore.as_retriever(search_kwargs={"k": 2}), return_source_documents=True)

result = rag({"query": query})

print("Answer:", result['result'])
print("\nSource Documents:")
for doc in result['source_documents']:
    print(doc.page_content)

#print(rag.run(query))





ID: 111
URL: https://prea.com/home/111
Neighborhood: Green Oaks
Price: 800000
Bedrooms: 3
Bathrooms: 2
House Size: 2000
Description: Welcome to this eco-friendly oasis nestled in the heart of Green Oaks. This charming 3-bedroom, 2-bathroom home boasts energy-efficient features such as solar panels and a well-insulated structure. Natural light floods the living spaces, highlighting the beautiful hardwood floors and eco-conscious finishes. The open-concept kitchen and dining area lead to a spacious backyard with a vegetable garden, perfect for the eco-conscious family. Embrace sustainable living without compromising on style in this Green Oaks gem.
Neighborhood Description: Green Oaks is a close-knit, environmentally-conscious community with access to organic grocery stores, community gardens, and bike paths. Take a stroll through the nearby Green Oaks Park or grab a cup of coffee at the cozy Green Bean Cafe. With easy access to public transportation and bike lanes, commuting is a breeze

# Augmented Response Generation

In [12]:
# Initialize llm model
model_name = "gpt-3.5-turbo"
temperature = 0.0
llm = OpenAI(model_name=model_name, temperature=temperature, max_tokens = 1000)



# Function to get user preferences
def get_user_preferences():
    
    print("AI: Hello, I'm your AI Personalized Real Estate Agent")
    print("AI: Please enter your preferences and I will provide the listings that best matches your requirements")
    budget = input("AI: Your Budget: ")
    bedrooms = input("AI: Number of Bedrooms: ")
    bathrooms = input("AI: Number of Bathrooms: ")
    size = input("AI: Preferred house size in Sqft: ")
    information = input("AI: How do you like your dream house and neigbourhood to be: ")
    
    return budget, bedrooms, bathrooms, size, information


    
# Create a Prompt Template for Augmented Response
recommender_template = f"""
The following is a conversation between a Human and an AI Personalized Real Estate Agent.
The AI takes Buyer Preferences and augments the description of real estate listings below. 
The augmentation should personalize the listings without changing details of the listing. 

Current conversation:
{{history}}

Human: {{input}}
AI: 
"""

prompt = PromptTemplate(
    input_variables=["history","input"],
    template=recommender_template
)

# Set up the conversation memory
memory = ConversationBufferMemory(human_prefix="Human", ai_prefix="AI")

# Create the ConversationChain
conversation = ConversationChain(
    llm=llm,
    memory=memory,
    prompt=prompt,
    verbose=True
)
    

# Get user preferences
memory.chat_memory.add_ai_message("Hello, I'm your AI Personalized Real Estate Agent, provide your preferences")

budget, bedrooms, bathrooms, size, information = get_user_preferences()
user_input  = f"Price: {budget}\nBedrooms:{bedrooms}\nBathrooms: {bathrooms}\nSize: {size}\nDescription: {information}\nNeighbourhood Description: {information}"

memory.chat_memory.add_user_message(user_input)

# perform vector search based on buyer preferences to get the listings
listings = ""
rag = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=vectorstore.as_retriever(search_kwargs={"k": 2}), return_source_documents=True)
result = rag({"query": user_input})
for doc in result['source_documents']:
    listings += doc.page_content

memory.chat_memory.add_ai_message(listings)

# Main conversation prediction based on the listings from Vector Search
response = conversation.predict(input="Please personalize the listings")
print("AI:", response)

AI: Hello, I'm your AI Personalized Real Estate Agent
AI: Please enter your preferences and I will provide the listings that best matches your requirements
AI: Your Budget: 200000
AI: Number of Bedrooms: 2
AI: Number of Bathrooms: 2
AI: Preferred house size in Sqft: 2000
AI: How do you like your dream house and neigbourhood to be: good schools


[1m> Entering new ConversationChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
The following is a conversation between a Human and an AI Personalized Real Estate Agent.
The AI takes Buyer Preferences and augments the description of real estate listings below. 
The augmentation should personalize the listings without changing details of the listing. 

Current conversation:
AI: AI: Hello, I'm your AI Personalized Real Estate Agent, provide your preferences
Human: Price: 200000
Bedrooms:2
Bathrooms: 2
Size: 2000
Description: good schools
Neighbourhood Description: good schools
AI: ID: 1009
URL: https://prea.com/home/1009
Neighborhood: Hi