This is a starter notebook for the project, you'll have to import the libraries you'll need, you can find a list of the ones available in this workspace in the requirements.txt file in this workspace. 

In [3]:
!pip install pandas

Defaulting to user installation because normal site-packages is not writeable


In [4]:
import os

os.environ["OPENAI_API_KEY"] = "<YOUR API KEY>"
os.environ["OPENAI_API_BASE"] = "https://openai.vocareum.com/v1"



In [5]:
# from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.vectorstores import Chroma
from langchain.prompts import PromptTemplate
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.document_loaders.csv_loader import CSVLoader
import pandas as pd

In [6]:
model_name = 'gpt-3.5-turbo'
llm = ChatOpenAI(model=model_name, temperature=0,max_tokens=2000)

### Synthetic Data Generation
### (No need to run this section again as synthetic data has already been generated and saved to csv file here: data/property_listings.csv)

In [7]:
prompt_template = """Generate {num_listings}  Property listings in csv format. Each listing must contain these fields in the csv:
 Neighborhood, Price, Size in Sqft, Number of Bedrooms, Number of Bathrooms, Property Age, Description, Neighborhood Description. Generate multiple listings for each location with varying Property sizes.
Only csv format is acceptable. Use semicolon as a separator.
"""

synthetic_data_prompt = PromptTemplate.from_template(prompt_template)

In [8]:
listings = llm.predict(synthetic_data_prompt.format(num_listings=25))

In [10]:
# Save Synthetic data as csv file
with open('data/property_listings.csv', 'w') as f:
    f.write(listings)


### Loading Generated Synthetic data 



In [12]:
# validating the csv in schema in pandas (Note we have to install pandas to run this cell)
df = pd.read_csv('data/property_listings.csv', sep=';')
df

Unnamed: 0,Neighborhood,Price,Size in Sqft,Number of Bedrooms,Number of Bathrooms,Property Age,Description,Neighborhood Description
0,Downtown,500000,1000,2,1,5,Cozy condo in the heart of the city,Vibrant neighborhood with plenty of restaurant...
1,Downtown,750000,1500,3,2,10,Spacious loft with city views,Close to public transportation and nightlife
2,Downtown,1000000,2000,4,3,15,Luxurious penthouse with rooftop terrace,Walking distance to parks and museums
3,Downtown,600000,1200,2,2,8,Modern apartment with balcony,Convenient location near grocery stores and cafes
4,Downtown,900000,1800,3,2,12,Renovated townhouse with private garden,Historic neighborhood with tree-lined streets
5,Midtown,450000,900,2,1,7,Charming bungalow with original details,Family-friendly area with good schools
6,Midtown,650000,1300,3,2,12,Contemporary townhome with open floor plan,Close to shopping centers and public parks
7,Midtown,800000,1600,4,3,20,Colonial-style house with large backyard,Quiet neighborhood with tree-lined streets
8,Midtown,550000,1100,2,2,10,Cozy cottage with updated kitchen,Walking distance to restaurants and cafes
9,Midtown,750000,1500,3,2,15,Modern condo with rooftop deck,Close to public transportation and entertainme...


In [13]:
# Loading Generated Synthetic data
loader = CSVLoader(file_path='data/property_listings.csv')
docs = loader.load()

In [14]:
docs

[Document(page_content='Neighborhood;Price;Size in Sqft;Number of Bedrooms;Number of Bathrooms;Property Age;Description;Neighborhood Description: Downtown;500000;1000;2;1;5;Cozy condo in the heart of the city;Vibrant neighborhood with plenty of restaurants and shops', metadata={'source': 'data/property_listings.csv', 'row': 0}),
 Document(page_content='Neighborhood;Price;Size in Sqft;Number of Bedrooms;Number of Bathrooms;Property Age;Description;Neighborhood Description: Downtown;750000;1500;3;2;10;Spacious loft with city views;Close to public transportation and nightlife', metadata={'source': 'data/property_listings.csv', 'row': 1}),
 Document(page_content='Neighborhood;Price;Size in Sqft;Number of Bedrooms;Number of Bathrooms;Property Age;Description;Neighborhood Description: Downtown;1000000;2000;4;3;15;Luxurious penthouse with rooftop terrace;Walking distance to parks and museums', metadata={'source': 'data/property_listings.csv', 'row': 2}),
 Document(page_content='Neighborhood;P

### Semantic search on Vector DB

In [15]:

splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
split_docs = splitter.split_documents(docs)

embeddings = OpenAIEmbeddings()

db = Chroma.from_documents(split_docs, embeddings)

In [21]:
queries = [
    """Show me some properties in Midtown.""",
    """Find me top properties in West End with atleast 3 bedrooms.""",
    """Recommend properties in Downtown not older than 5 years""",
      
]


top_n_results = 3



In [22]:
def get_search_results(queries):
    answers = []
    for query in queries:
        answer = db.similarity_search(query,k=top_n_results)
        answers.append(answer)
        
    return answers
    

In [23]:
answers = get_search_results(queries)

### Augmented Response Generation

In [32]:
def get_personalized_recommendations(answer):
    
    augmented_query = f"""Give me a personalized property listing based on my preferences: {answer}
    Use creative language to describe the property elaborately using all the provided information (price, size, etc.) according my above preferences but Do not include any information beyond the provided information.
    """
    augmented_response = llm.predict(augmented_query)
    
    return augmented_response
    
    

In [33]:
for i,answer in enumerate(answers):
    
    augmented_response = get_personalized_recommendations(answer)
    print(f'Query {i+1}:\n {queries[i]}\n')
    print(f'Response {i+1}:\n {augmented_response}\n')
    print('_________________________________________')

Query 1:
 Show me some properties in Midtown.

Response 1:
 Welcome to your dream home in Midtown! This modern condo with a rooftop deck is priced at $750,000 and offers 1500 sqft of living space with 3 bedrooms and 2 bathrooms. The property is 15 years old and boasts a contemporary design that is sure to impress. 

Situated in a vibrant neighborhood, this condo is conveniently located close to public transportation and entertainment venues, making it perfect for those who enjoy city living. 

But wait, there's more! If you're looking for something a bit different, consider the contemporary townhome priced at $650,000. With 1300 sqft, 3 bedrooms, and 2 bathrooms, this property offers an open floor plan that is ideal for modern living. 

Located in the heart of Midtown, this townhome is just a stone's throw away from shopping centers and public parks, providing the perfect balance of convenience and relaxation. 

And if you're in the market for a larger home, look no further than the co