This is a starter notebook for the project, you'll have to import the libraries you'll need, you can find a list of the ones available in this workspace in the requirements.txt file in this workspace. 

In [4]:
import os


proxy = os.environ.get("HTTP_PROXY", "http://sia-lb.telekom.de:8080")
no_proxy = "localhost"

os.environ["HTTP_PROXY"] = proxy
os.environ["HTTPS_PROXY"] = proxy
os.environ["NO_PROXY"] = no_proxy


os.environ["http_proxy"] = proxy
os.environ["https_proxy"] = proxy
os.environ["no_proxy"] = no_proxy


In [12]:
%pip install -r requirements.txt

Collecting langchain-community (from -r requirements.txt (line 12))
  Downloading langchain_community-0.3.21-py3-none-any.whl.metadata (2.4 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community->-r requirements.txt (line 12))
  Using cached dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain-community->-r requirements.txt (line 12))
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community->-r requirements.txt (line 12))
  Downloading marshmallow-3.26.1-py3-none-any.whl.metadata (7.3 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community->-r requirements.txt (line 12))
  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)
Downloading langchain_community-0.3.21-py3-none-any.whl (2.5 MB)
   ---------------------------------------- 0.0/2.5 MB ? eta -:--:--
   ---


[notice] A new release of pip is available: 24.3.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


Step 1: Setting Up the Python Application

In [7]:
# load environment variables, like OpenAPI Key or Proxy

from dotenv import load_dotenv

load_dotenv()

True

In [30]:
# Define Some Projet Variables
MODEL_NAME = 'gpt-3.5-turbo'
LANCEDB_URI = "lancedb"
NEIGHBORHOOD_TABLE = "neighborhoods"
HOUSE_TABLE = "house"

Step 2: Generating Real Estate Listings

In [61]:
from dataclasses import dataclass, asdict
import lancedb

@dataclass
class Neighborhood:
    name: str
    description: str

@dataclass
class House:
    price: int
    bedrooms: int
    bathrooms: int
    size_sqft: int
    description: str
    neighborhood: Neighborhood

    def to_dict(self) -> dict:
        data = asdict(self)
        # neighborhood auflösen in einzelne Felder
        data["neighborhood_name"] = self.neighborhood.name
        data["neighborhood_description"] = self.neighborhood.description
        # verschachteltes Feld entfernen (falls nötig)
        del data["neighborhood"]
        return data


In [25]:

import os
from langchain_community.chat_models import ChatOpenAI

openAI = ChatOpenAI(
    model_name=MODEL_NAME,  
    api_key=os.environ["OPENAI_API_KEY"],
    base_url="https://openai.vocareum.com/v1"
)

In [26]:
# imports

import json
from langchain_community.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain


# variables
num = 20
example_name = "Green Oaks"
example_description = "Green Oaks is a close-knit, environmentally-conscious community with access to organic grocery stores, community gardens, and bike paths. Take a stroll through the nearby Green Oaks Park or grab a cup of coffee at the cozy Green Bean Cafe. With easy access to public transportation and bike lanes, commuting is a breeze."

# promt template
prompt = PromptTemplate(
    input_variables=["num", "example_name", "example_description"],
    template="""
Generate a list of {num} real-world neighborhoods in Germany in the following JSON format:

[
  {{ "name": "Name of the neighborhood", "description": "Short description" }},
  ...
]

Here is an example:
[
  {{ "name": "{example_name}", "description": "{example_description}" }}
]

Please return **only** the JSON output, with no comments or explanations.
"""
)


# execute
chain = LLMChain(llm=openAI, prompt=prompt)


response = chain.run(
    num=num,
    example_name=example_name,
    example_description=example_description
)

neighborhood_data = json.loads(response)
neighborhoods = [Neighborhood(**item) for item in neighborhood_data]


neighborhoods

[Neighborhood(name='Kreuzberg', description='Kreuzberg is known for its vibrant arts scene, eclectic mix of residents, and diverse culinary offerings. Explore the street art, enjoy a meal at a trendy restaurant, or relax in one of the many parks scattered throughout the neighborhood.'),
 Neighborhood(name='Prenzlauer Berg', description='Prenzlauer Berg is a trendy and family-friendly neighborhood with charming cobblestone streets, hip cafes, and local boutiques. Take a stroll through Mauerpark on a Sunday afternoon or enjoy a picnic in one of the many green spaces.'),
 Neighborhood(name='Neukölln', description='Neukölln is a multicultural neighborhood with a thriving food and nightlife scene. Sample international cuisine, browse vintage shops, or catch a live music performance at one of the many bars and clubs in the area.'),
 Neighborhood(name='Mitte', description='Mitte is the historical and cultural heart of Berlin, with world-class museums, historic landmarks, and upscale shopping 

In [42]:
# save neighborhoods to db
import lancedb
db = lancedb.connect(LANCEDB_URI) 
table = db.create_table(NEIGHBORHOOD_TABLE, data=neighborhood_data, mode="create")
table.to_pandas().head(10)

Unnamed: 0,name,description
0,Kreuzberg,"Kreuzberg is known for its vibrant arts scene,..."
1,Prenzlauer Berg,Prenzlauer Berg is a trendy and family-friendl...
2,Neukölln,Neukölln is a multicultural neighborhood with ...
3,Mitte,Mitte is the historical and cultural heart of ...
4,Schanzenviertel,Schanzenviertel is a lively and alternative ne...
5,Eimsbüttel,Eimsbüttel is a residential neighborhood in Ha...
6,Altona,Altona is a diverse and multicultural neighbor...
7,Schwabing,Schwabing is a bohemian and upscale neighborho...
8,Glockenbachviertel,Glockenbachviertel is a trendy and LGBTQ-frien...
9,Südstadt,Südstadt is a historic neighborhood in Cologne...


In [45]:
import lancedb
db = lancedb.connect(LANCEDB_URI) 
neighborhood_table = db.open_table(NEIGHBORHOOD_TABLE)
neighborhood_names = neighborhood_table.to_pandas()["name"].tolist()
comma_separated = ", ".join(neighborhood_names)
comma_separated

'Kreuzberg, Prenzlauer Berg, Neukölln, Mitte, Schanzenviertel, Eimsbüttel, Altona, Schwabing, Glockenbachviertel, Südstadt, Ehrenfeld, Linden, Berg am Laim, Sachsenhausen, Bockenheim, St. Georg, Ostend, Ludwigsvorstadt, Nordend, Altstadt'

In [74]:
# imports

import json
from langchain_community.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
import lancedb


# variables
num = 10
example_neighborhood = "Kreuzberg"
example_price = 800_000
example_bedrooms = 3
example_bathrooms = 2
example_size_sqft = 2_000
example_description = "Welcome to this eco-friendly oasis nestled in the heart of Green Oaks. This charming 3-bedroom, 2-bathroom home boasts energy-efficient features such as solar panels and a well-insulated structure. Natural light floods the living spaces, highlighting the beautiful hardwood floors and eco-conscious finishes. The open-concept kitchen and dining area lead to a spacious backyard with a vegetable garden, perfect for the eco-conscious family. Embrace sustainable living without compromising on style in this Green Oaks gem."

# promt template
prompt = PromptTemplate(
    input_variables=["num", "example_name", "example_description"],
    template="""
Generate a list of {num} realistic real estate listings in Germany in the following JSON format:

[
  {{
    "neighborhood": "Neighborhood name",
    "price": int,
    "bedrooms": int,
    "bathrooms": int,
    "size_sqft": int,
    "description": "Detailed multi-sentence description of the house."
  }},
  ...
]

Each listing must be located in one of the following neighborhoods:
{neighborhoods}

Example:
Here is an example:
[
  {{ "neighborhood": "{example_neighborhood}", "price": "{example_price}", "bedrooms": "{example_bedrooms}", "bathrooms": "{example_bathrooms}", "size_sqft": "{example_size_sqft}, "description": "{example_description}" }}
]
"""
)


# execute
chain = LLMChain(llm=openAI, prompt=prompt)


response = chain.run(
    num=num,
    neighborhoods=neighborhood_names,
    example_neighborhood=example_neighborhood,
    example_price=example_price,
    example_bedrooms=example_bedrooms,
    example_bathrooms=example_bathrooms,
    example_size_sqft=example_size_sqft,
    example_description=example_description
)

house_data = json.loads(response)

for item in house_data:

  houses = [
      House(
          price=item["price"],
          bedrooms=item["bedrooms"],
          bathrooms=item["bathrooms"],
          size_sqft=item["size_sqft"],
          description=item["description"],
          neighborhood=Neighborhood(name=item["neighborhood"], description="")
      )
      for item in house_data
  ]


house_dicts = [house.to_dict() for house in houses]

db = lancedb.connect(LANCEDB_URI) 
if HOUSE_TABLE in db.table_names():
  house_table = db.open_table(HOUSE_TABLE)
  house_table.add(house_dicts)
else:
  house_table = db.create_table(HOUSE_TABLE, data=house_dicts, mode="create")

house_table.to_pandas().head(10)

Unnamed: 0,price,bedrooms,bathrooms,size_sqft,description,neighborhood_name,neighborhood_description
0,1200000,4,2,1800,"Located in the vibrant neighborhood of Mitte, ...",Mitte,
1,650000,2,1,1200,Located in the trendy neighborhood of Prenzlau...,Prenzlauer Berg,
2,750000,4,2,1800,Situated in the popular neighborhood of Prenzl...,Prenzlauer Berg,
3,900000,3,3,2200,Located in the heart of Berlin's bustling Mitt...,Mitte,
4,600000,2,1,1400,Live the urban lifestyle in this stylish 2-bed...,Neukölln,
5,850000,3,2,2000,"This charming 3-bedroom, 2-bathroom home is lo...",Eimsbüttel,
6,700000,4,2,1600,Nestled in the historic neighborhood of Altona...,Altona,
7,950000,3,2,1800,Experience luxury living in the upscale neighb...,Schwabing,
8,650000,3,2,1500,Located in the bustling neighborhood of Linden...,Linden,
9,800000,2,1,1300,Embrace the historic charm of Altstadt with th...,Altstadt,


Step 3: Storing Listings in a Vector Database
The data get get written in the previos step to the db

In [76]:
import lancedb

db = lancedb.connect(LANCEDB_URI)

house_table = db.open_table(HOUSE_TABLE)
house_df = house_table.to_pandas()

print(f"Table: {HOUSE_TABLE}")
print(f"Amount: {len(house_df)}")
print("10 Sets (latest):")

print(house_df.tail(10)) 





Table: house
Amount: 80
10 Sets (latest):
      price  bedrooms  bathrooms  size_sqft  \
70   950000         4          2       1800   
71   600000         2          1       1200   
72  1200000         3          2       2000   
73   850000         3          2       1600   
74   700000         2          1       1400   
75  1100000         4          3       2200   
76   800000         2          2       1500   
77   750000         3          2       1700   
78   680000         2          1       1300   
79   720000         3          2       1600   

                                          description   neighborhood_name  \
70  Situated in the trendy neighborhood of Prenzla...     Prenzlauer Berg   
71  Located in the vibrant neighborhood of Neuköll...            Neukölln   
72  In the heart of Berlin's city center, this lux...               Mitte   
73  This charming 3-bedroom, 2-bathroom home in Ei...          Eimsbüttel   
74  Nestled in the historic district of Altona, th...  

In [79]:
import lancedb

db = lancedb.connect(LANCEDB_URI)

neighborhood_table = db.open_table(NEIGHBORHOOD_TABLE)
neighborhood_df = neighborhood_table.to_pandas()

print(f"Table: {NEIGHBORHOOD_TABLE}")
print(f"Amount: {len(neighborhood_df)}")
print("10 Sets (latest):")

print(neighborhood_df.tail(10)) 



Table: neighborhoods
Amount: 20
10 Sets (latest):
               name                                        description
10        Ehrenfeld  Ehrenfeld is a creative and diverse neighborho...
11           Linden  Linden is a vibrant and alternative neighborho...
12     Berg am Laim  Berg am Laim is a residential neighborhood in ...
13    Sachsenhausen  Sachsenhausen is a historic neighborhood in Fr...
14       Bockenheim  Bockenheim is a student-friendly neighborhood ...
15        St. Georg  St. Georg is a diverse and multicultural neigh...
16           Ostend  Ostend is an up-and-coming neighborhood in Fra...
17  Ludwigsvorstadt  Ludwigsvorstadt is a bustling neighborhood in ...
18          Nordend  Nordend is a diverse and residential neighborh...
19         Altstadt  Altstadt is the historic old town of Düsseldor...


Step 4: Building the User Preference Interface

In [None]:
questions = [   
    "How big do you want your house to be?", 
    "What are 3 most important things for you in choosing this property?", 
    "Which amenities would you like?", 
    "Which transportation options are important to you?",
    "How urban do you want your neighborhood to be?",   
]

answers = [
    "A comfortable three-bedroom house with a spacious kitchen and a cozy living room.",
    "A quiet neighborhood, good local schools, and convenient shopping options.",
    "A backyard for gardening, a two-car garage, and a modern, energy-efficient heating system.",
    "Easy access to a reliable bus line, proximity to a major highway, and bike-friendly roads.",
    "A balance between suburban tranquility and access to urban amenities like restaurants and theaters."
]



In [None]:
from langchain.prompts import PromptTemplate

house_prompt = PromptTemplate(
    input_variables=["answer1", "answer2"],
    template="""
You are a helpful real estate assistant. Based on the following answers from a user, suggest a detailed description of an ideal house that matches their preferences.

User responses:
{for question, answer in zip(questions, answers) questions:answer}

Return the Information about the relevant house.
"""
)

# use questions 1 and question 2