This is a starter notebook for the project, you'll have to import the libraries you'll need, you can find a list of the ones available in this workspace in the requirements.txt file in this workspace. 

# REQUIREMENTS

- Set up proxy settings (if required)
- Install all required dependencies using the `requirements.txt` file


In [None]:
import os

# Sets a proxy for the internet communication
proxy = os.environ.get("HTTP_PROXY")
no_proxy = "localhost"

os.environ["HTTP_PROXY"] = proxy
os.environ["HTTPS_PROXY"] = proxy
os.environ["NO_PROXY"] = no_proxy


os.environ["http_proxy"] = proxy
os.environ["https_proxy"] = proxy
os.environ["no_proxy"] = no_proxy


In [None]:
# installs the requirements
%pip install -r requirements.txt

Collecting langchain-community (from -r requirements.txt (line 12))
  Downloading langchain_community-0.3.21-py3-none-any.whl.metadata (2.4 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community->-r requirements.txt (line 12))
  Using cached dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain-community->-r requirements.txt (line 12))
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community->-r requirements.txt (line 12))
  Downloading marshmallow-3.26.1-py3-none-any.whl.metadata (7.3 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community->-r requirements.txt (line 12))
  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)
Downloading langchain_community-0.3.21-py3-none-any.whl (2.5 MB)
   ---------------------------------------- 0.0/2.5 MB ? eta -:--:--
   ---


[notice] A new release of pip is available: 24.3.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


Step 1: Setting Up the Python Application

In [None]:
# load environment variables, like OpenAPI Key or Proxy
# for an example look at .env-dist

from dotenv import load_dotenv

load_dotenv()

True

In [3]:
# Define Some Projet Variables
MODEL_NAME = 'gpt-3.5-turbo'
LANCEDB_URI = "lancedb"
NEIGHBORHOOD_TABLE = "neighborhoods"
HOUSE_TABLE = "house"
LISTENING_TABLE = "listenings"
SENTENCE_TRANSFORMER_MODEL = "all-MiniLM-L6-v2"

# STEP 2: GENERATING REAL ESTATE LISTINGS

- Create listings based on a predefined set of neighborhoods
- Each listing is assigned to an existing neighborhood
- Saves tokens by avoiding redundant neighborhood generation
- Store the results in a LanceDB database


In [None]:
# defines the classes for the lancedb
from dataclasses import dataclass, asdict
import lancedb

@dataclass
class Neighborhood:
    name: str
    description: str

@dataclass
class House:
    price: int
    bedrooms: int
    bathrooms: int
    size_sqft: int
    description: str
    neighborhood: Neighborhood

    def to_dict(self) -> dict:
        data = asdict(self)
        # neighborhood auflösen in einzelne Felder
        data["neighborhood_name"] = self.neighborhood.name
        data["neighborhood_description"] = self.neighborhood.description
        # verschachteltes Feld entfernen (falls nötig)
        del data["neighborhood"]
        return data


In [None]:

# Defines with langchain the OpenAI client
import os
from langchain_community.chat_models import ChatOpenAI

openAI = ChatOpenAI(
    model_name=MODEL_NAME,  
    api_key=os.environ["OPENAI_API_KEY"],
    base_url="https://openai.vocareum.com/v1"
)

In [None]:
## Creates 20 Neighboorhoods via ChatOpenAI
# just executed once

# imports

import json
from langchain_community.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain


# variables
num = 20
example_name = "Green Oaks"
example_description = "Green Oaks is a close-knit, environmentally-conscious community with access to organic grocery stores, community gardens, and bike paths. Take a stroll through the nearby Green Oaks Park or grab a cup of coffee at the cozy Green Bean Cafe. With easy access to public transportation and bike lanes, commuting is a breeze."

# promt template
prompt = PromptTemplate(
    input_variables=["num", "example_name", "example_description"],
    template="""
Generate a list of {num} real-world neighborhoods in Germany in the following JSON format:

[
  {{ "name": "Name of the neighborhood", "description": "Short description" }},
  ...
]

Here is an example:
[
  {{ "name": "{example_name}", "description": "{example_description}" }}
]

Please return **only** the JSON output, with no comments or explanations.
"""
)


# execute
chain = LLMChain(llm=openAI, prompt=prompt)


response = chain.run(
    num=num,
    example_name=example_name,
    example_description=example_description
)

neighborhood_data = json.loads(response)
neighborhoods = [Neighborhood(**item) for item in neighborhood_data]


neighborhoods

[Neighborhood(name='Kreuzberg', description='Kreuzberg is known for its vibrant arts scene, eclectic mix of residents, and diverse culinary offerings. Explore the street art, enjoy a meal at a trendy restaurant, or relax in one of the many parks scattered throughout the neighborhood.'),
 Neighborhood(name='Prenzlauer Berg', description='Prenzlauer Berg is a trendy and family-friendly neighborhood with charming cobblestone streets, hip cafes, and local boutiques. Take a stroll through Mauerpark on a Sunday afternoon or enjoy a picnic in one of the many green spaces.'),
 Neighborhood(name='Neukölln', description='Neukölln is a multicultural neighborhood with a thriving food and nightlife scene. Sample international cuisine, browse vintage shops, or catch a live music performance at one of the many bars and clubs in the area.'),
 Neighborhood(name='Mitte', description='Mitte is the historical and cultural heart of Berlin, with world-class museums, historic landmarks, and upscale shopping 

In [42]:
# save neighborhoods to db
import lancedb
db = lancedb.connect(LANCEDB_URI) 
table = db.create_table(NEIGHBORHOOD_TABLE, data=neighborhood_data, mode="create")
table.to_pandas().head(10)

Unnamed: 0,name,description
0,Kreuzberg,"Kreuzberg is known for its vibrant arts scene,..."
1,Prenzlauer Berg,Prenzlauer Berg is a trendy and family-friendl...
2,Neukölln,Neukölln is a multicultural neighborhood with ...
3,Mitte,Mitte is the historical and cultural heart of ...
4,Schanzenviertel,Schanzenviertel is a lively and alternative ne...
5,Eimsbüttel,Eimsbüttel is a residential neighborhood in Ha...
6,Altona,Altona is a diverse and multicultural neighbor...
7,Schwabing,Schwabing is a bohemian and upscale neighborho...
8,Glockenbachviertel,Glockenbachviertel is a trendy and LGBTQ-frien...
9,Südstadt,Südstadt is a historic neighborhood in Cologne...


In [45]:
import lancedb
db = lancedb.connect(LANCEDB_URI) 
neighborhood_table = db.open_table(NEIGHBORHOOD_TABLE)
neighborhood_names = neighborhood_table.to_pandas()["name"].tolist()
comma_separated = ", ".join(neighborhood_names)
comma_separated

'Kreuzberg, Prenzlauer Berg, Neukölln, Mitte, Schanzenviertel, Eimsbüttel, Altona, Schwabing, Glockenbachviertel, Südstadt, Ehrenfeld, Linden, Berg am Laim, Sachsenhausen, Bockenheim, St. Georg, Ostend, Ludwigsvorstadt, Nordend, Altstadt'

In [None]:
## Creates for each run 10 houses in an alrerdy defined neighborhood
# executed multiple time 


# imports

import json
from langchain_community.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
import lancedb


# variables
num = 10
example_neighborhood = "Kreuzberg"
example_price = 800_000
example_bedrooms = 3
example_bathrooms = 2
example_size_sqft = 2_000
example_description = "Welcome to this eco-friendly oasis nestled in the heart of Green Oaks. This charming 3-bedroom, 2-bathroom home boasts energy-efficient features such as solar panels and a well-insulated structure. Natural light floods the living spaces, highlighting the beautiful hardwood floors and eco-conscious finishes. The open-concept kitchen and dining area lead to a spacious backyard with a vegetable garden, perfect for the eco-conscious family. Embrace sustainable living without compromising on style in this Green Oaks gem."

# promt template
prompt = PromptTemplate(
    input_variables=["num", "example_name", "example_description"],
    template="""
Generate a list of {num} realistic real estate listings in Germany in the following JSON format:

[
  {{
    "neighborhood": "Neighborhood name",
    "price": int,
    "bedrooms": int,
    "bathrooms": int,
    "size_sqft": int,
    "description": "Detailed multi-sentence description of the house."
  }},
  ...
]

Each listing must be located in one of the following neighborhoods:
{neighborhoods}

Example:
Here is an example:
[
  {{ "neighborhood": "{example_neighborhood}", "price": "{example_price}", "bedrooms": "{example_bedrooms}", "bathrooms": "{example_bathrooms}", "size_sqft": "{example_size_sqft}, "description": "{example_description}" }}
]
"""
)


# execute
chain = LLMChain(llm=openAI, prompt=prompt)


response = chain.run(
    num=num,
    neighborhoods=neighborhood_names,
    example_neighborhood=example_neighborhood,
    example_price=example_price,
    example_bedrooms=example_bedrooms,
    example_bathrooms=example_bathrooms,
    example_size_sqft=example_size_sqft,
    example_description=example_description
)

house_data = json.loads(response)

for item in house_data:

  houses = [
      House(
          price=item["price"],
          bedrooms=item["bedrooms"],
          bathrooms=item["bathrooms"],
          size_sqft=item["size_sqft"],
          description=item["description"],
          neighborhood=Neighborhood(name=item["neighborhood"], description="")
      )
      for item in house_data
  ]


house_dicts = [house.to_dict() for house in houses]

db = lancedb.connect(LANCEDB_URI) 
if HOUSE_TABLE in db.table_names():
  house_table = db.open_table(HOUSE_TABLE)
  house_table.add(house_dicts)
else:
  house_table = db.create_table(HOUSE_TABLE, data=house_dicts, mode="create")

house_table.to_pandas().head(10)

Unnamed: 0,price,bedrooms,bathrooms,size_sqft,description,neighborhood_name,neighborhood_description
0,1200000,4,2,1800,"Located in the vibrant neighborhood of Mitte, ...",Mitte,
1,650000,2,1,1200,Located in the trendy neighborhood of Prenzlau...,Prenzlauer Berg,
2,750000,4,2,1800,Situated in the popular neighborhood of Prenzl...,Prenzlauer Berg,
3,900000,3,3,2200,Located in the heart of Berlin's bustling Mitt...,Mitte,
4,600000,2,1,1400,Live the urban lifestyle in this stylish 2-bed...,Neukölln,
5,850000,3,2,2000,"This charming 3-bedroom, 2-bathroom home is lo...",Eimsbüttel,
6,700000,4,2,1600,Nestled in the historic neighborhood of Altona...,Altona,
7,950000,3,2,1800,Experience luxury living in the upscale neighb...,Schwabing,
8,650000,3,2,1500,Located in the bustling neighborhood of Linden...,Linden,
9,800000,2,1,1300,Embrace the historic charm of Altstadt with th...,Altstadt,


# STEP 3: STORING LISTINGS IN A VECTOR DATABASE

- Store the data generated in the previous step into the database
- **Vector Database Setup**: Initialize and configure ChromaDB (or a similar vector database) for storing real estate listings
- **Generating and Storing Embeddings**: 
  - Convert the LLM-generated listings into embeddings that capture their semantic content
  - Store these embeddings in the vector database


In [None]:
# Prints all houses

import lancedb

db = lancedb.connect(LANCEDB_URI)

house_table = db.open_table(HOUSE_TABLE)
house_df = house_table.to_pandas()

print(f"Table: {HOUSE_TABLE}")
print(f"Amount: {len(house_df)}")
print("10 Sets (latest):")

print(house_df.tail(10)) 





Table: house
Amount: 80
10 Sets (latest):
      price  bedrooms  bathrooms  size_sqft  \
70   950000         4          2       1800   
71   600000         2          1       1200   
72  1200000         3          2       2000   
73   850000         3          2       1600   
74   700000         2          1       1400   
75  1100000         4          3       2200   
76   800000         2          2       1500   
77   750000         3          2       1700   
78   680000         2          1       1300   
79   720000         3          2       1600   

                                          description   neighborhood_name  \
70  Situated in the trendy neighborhood of Prenzla...     Prenzlauer Berg   
71  Located in the vibrant neighborhood of Neuköll...            Neukölln   
72  In the heart of Berlin's city center, this lux...               Mitte   
73  This charming 3-bedroom, 2-bathroom home in Ei...          Eimsbüttel   
74  Nestled in the historic district of Altona, th...  

In [None]:
# Prints all neighborhoods

import lancedb

db = lancedb.connect(LANCEDB_URI)

neighborhood_table = db.open_table(NEIGHBORHOOD_TABLE)
neighborhood_df = neighborhood_table.to_pandas()

print(f"Table: {NEIGHBORHOOD_TABLE}")
print(f"Amount: {len(neighborhood_df)}")
print("10 Sets (latest):")

print(neighborhood_df.tail(10)) 



Table: neighborhoods
Amount: 20
10 Sets (latest):
               name                                        description
10        Ehrenfeld  Ehrenfeld is a creative and diverse neighborho...
11           Linden  Linden is a vibrant and alternative neighborho...
12     Berg am Laim  Berg am Laim is a residential neighborhood in ...
13    Sachsenhausen  Sachsenhausen is a historic neighborhood in Fr...
14       Bockenheim  Bockenheim is a student-friendly neighborhood ...
15        St. Georg  St. Georg is a diverse and multicultural neigh...
16           Ostend  Ostend is an up-and-coming neighborhood in Fra...
17  Ludwigsvorstadt  Ludwigsvorstadt is a bustling neighborhood in ...
18          Nordend  Nordend is a diverse and residential neighborh...
19         Altstadt  Altstadt is the historic old town of Düsseldor...


In [31]:
def row_to_text(row) -> str:
    return f"""
    Price: {row['price']} USD
    Bedrooms: {row['bedrooms']}
    Bathrooms: {row['bathrooms']}
    Size: {row['size_sqft']} sqft
    Neighborhood: {row['neighborhood_name']}
    Neighborhood Description: {row['neighborhood_description']}
    Description: {row['description']}
    """.strip()

In [None]:
from sentence_transformers import SentenceTransformer
import pandas as pd
import numpy as np
import pyarrow as pa
import lancedb

# db connection
db = lancedb.connect(LANCEDB_URI)
house_df = house_table.to_pandas()
neighborhood_df = neighborhood_table.to_pandas()

# join tables and merge them
house_df = house_df.drop(columns=["neighborhood_description"], errors="ignore")

merged_df = pd.merge(
    house_df,
    neighborhood_df.rename(columns={
        "name": "neighborhood_name",
        "description": "neighborhood_description"
    }),
    on="neighborhood_name",
    how="left"
)

# creates and saves embedding for each listening
def row_to_text(row):
    return f"""
    Price: {row['price']} USD
    Bedrooms: {row['bedrooms']}
    Bathrooms: {row['bathrooms']}
    Size: {row['size_sqft']} sqft
    Neighborhood: {row['neighborhood_name']}
    Neighborhood Description: {row['neighborhood_description']}
    Description: {row['description']}
    """.strip()
model = SentenceTransformer(SENTENCE_TRANSFORMER_MODEL)

if "embedding" not in merged_df.columns:
    merged_df["text"] = merged_df.apply(row_to_text, axis=1)
    merged_df["embedding"] = merged_df["text"].apply(lambda x: model.encode(x).astype(np.float32).tolist())

print("embedding dtype example:", np.array(merged_df["embedding"].iloc[0]).dtype)
print("embedding length:", len(merged_df["embedding"].iloc[0]))

embedding_matrix = np.array(merged_df["embedding"].tolist(), dtype=np.float32)
embedding_array = pa.FixedSizeListArray.from_arrays(
    pa.array(embedding_matrix.flatten(), type=pa.float32()),
    list_size=embedding_matrix.shape[1]
)

# stores the result in a new tabled called listenings
columns = {
    col: merged_df[col].tolist()
    for col in merged_df.columns
    if col != "embedding"
}
arrow_table = pa.table(columns)

arrow_table = arrow_table.append_column("embedding", embedding_array)

if LISTENING_TABLE in db.table_names():
    db.drop_table(LISTENING_TABLE)

db.create_table(LISTENING_TABLE, data=arrow_table)


embedding dtype example: float64
embedding length: 384
✅ Tabelle 'listings' mit korrektem Embedding gespeichert.


In [None]:
# print the new calculated results

import lancedb

db = lancedb.connect(LANCEDB_URI)

listening_table = db.open_table(LISTENING_TABLE)
listening_df = listening_table.to_pandas()

print(f"Table: {LISTENING_TABLE}")
print(f"Amount: {len(listening_df)}")
print("10 Sets (latest):")

listening_df.tail(10)

Table: listenings
Amount: 80
10 Sets (latest):


Unnamed: 0,price,bedrooms,bathrooms,size_sqft,description,neighborhood_name,neighborhood_description,text,embedding
70,950000,4,2,1800,Situated in the trendy neighborhood of Prenzla...,Prenzlauer Berg,Prenzlauer Berg is a trendy and family-friendl...,Price: 950000 USD\n Bedrooms: 4\n Bathro...,"[0.06086663, 0.021933625, -0.028627103, 0.0605..."
71,600000,2,1,1200,Located in the vibrant neighborhood of Neuköll...,Neukölln,Neukölln is a multicultural neighborhood with ...,Price: 600000 USD\n Bedrooms: 2\n Bathro...,"[0.09165213, -0.008691593, 0.057652894, 0.0547..."
72,1200000,3,2,2000,"In the heart of Berlin's city center, this lux...",Mitte,Mitte is the historical and cultural heart of ...,Price: 1200000 USD\n Bedrooms: 3\n Bathr...,"[0.07124193, -0.048662532, 0.03721692, 0.00960..."
73,850000,3,2,1600,"This charming 3-bedroom, 2-bathroom home in Ei...",Eimsbüttel,Eimsbüttel is a residential neighborhood in Ha...,Price: 850000 USD\n Bedrooms: 3\n Bathro...,"[0.13830097, 0.086828366, -0.0065933717, 0.023..."
74,700000,2,1,1400,"Nestled in the historic district of Altona, th...",Altona,Altona is a diverse and multicultural neighbor...,Price: 700000 USD\n Bedrooms: 2\n Bathro...,"[0.11051974, 0.058648743, 0.015355411, -0.0321..."
75,1100000,4,3,2200,"This spacious 4-bedroom, 3-bathroom villa in S...",Schwabing,Schwabing is a bohemian and upscale neighborho...,Price: 1100000 USD\n Bedrooms: 4\n Bathr...,"[0.10761478, -0.037483577, -0.04301043, -0.023..."
76,800000,2,2,1500,Located in the trendy neighborhood of Glockenb...,Glockenbachviertel,Glockenbachviertel is a trendy and LGBTQ-frien...,Price: 800000 USD\n Bedrooms: 2\n Bathro...,"[0.123856194, -0.022134893, 0.011702026, 0.023..."
77,750000,3,2,1700,"This 3-bedroom, 2-bathroom townhouse in Südsta...",Südstadt,Südstadt is a historic neighborhood in Cologne...,Price: 750000 USD\n Bedrooms: 3\n Bathro...,"[0.15478007, -0.0134061435, -0.020682761, 0.02..."
78,680000,2,1,1300,Situated in the vibrant neighborhood of Ehrenf...,Ehrenfeld,Ehrenfeld is a creative and diverse neighborho...,Price: 680000 USD\n Bedrooms: 2\n Bathro...,"[0.08298603, 0.014732312, 0.015303621, -0.0385..."
79,720000,3,2,1600,"This 3-bedroom, 2-bathroom house in Linden off...",Linden,Linden is a vibrant and alternative neighborho...,Price: 720000 USD\n Bedrooms: 3\n Bathro...,"[0.13862757, -0.038436092, 0.112714335, 0.0222..."


# STEP 4: BUILDING THE USER PREFERENCE INTERFACE

- Collect buyer preferences such as:
  - Number of bedrooms
  - Number of bathrooms
  - Desired location
  - Other specific requirements
- Preferences are hard-coded using predefined questions and answers
- Natural language input is not used in this version


In [74]:
questions = [   
    "How big do you want your house to be?", 
    "What are 3 most important things for you in choosing this property?", 
    "Which amenities would you like?", 
    "Which transportation options are important to you?",
    "How urban do you want your neighborhood to be?",   
]

answers = [
    "A comfortable three-bedroom house with a spacious kitchen and a cozy living room.",
    "A quiet neighborhood, good local schools, and convenient shopping options.",
    "A backyard for gardening, a two-car garage, and a modern, energy-efficient heating system.",
    "Easy access to a reliable bus line, proximity to a major highway, and bike-friendly roads.",
    "A balance between suburban tranquility and access to urban amenities like restaurants and theaters."
]



In [None]:
# converts the arrays to an string so OpenAI can work with them

def get_dialog(questions, answers):

    return "\n\n".join(
        f"Question: {q}\nAnswer: {a}" for q, a in zip(questions, answers)
    )


# STEP 5: SEARCHING BASED ON PREFERENCES

- **Semantic Search Implementation**:  
  Use the structured buyer preferences to perform a semantic search on the vector database, retrieving listings that closely match the user's requirements

- **Listing Retrieval Logic**:  
  Fine-tune the retrieval algorithm to prioritize listings based on semantic similarity to the buyer’s preferences


In [None]:
from sentence_transformers import SentenceTransformer
import lancedb
import numpy as np


def get_relevant_listenings(dialog: str):
    embedding_model = SentenceTransformer(SENTENCE_TRANSFORMER_MODEL)
    # transforms the dialog to a vector
    query_vector = embedding_model.encode(dialog, dtype=np.float32)

    db = lancedb.connect(LANCEDB_URI)
    listening_table = db.open_table(LISTENING_TABLE)

    # searchs in the db
    results_df = listening_table.search(query_vector, vector_column_name="embedding").limit(3).to_pandas()

    return results_df


In [None]:
# calls the function and print the results
dialog = get_dialog(questions=questions, answers=answers)
get_relevant_listenings(get_dialog(questions=questions, answers=answers))

Unnamed: 0,price,bedrooms,bathrooms,size_sqft,description,neighborhood_name,neighborhood_description,text,embedding,_distance
0,690000,3,2,1500,Located in the historic neighborhood of Linden...,Linden,Linden is a vibrant and alternative neighborho...,Price: 690000 USD\n Bedrooms: 3\n Bathro...,"[0.17243427, -0.029261168, 0.09592985, 0.01654...",1.055865
1,650000,3,2,1500,Located in the bustling neighborhood of Linden...,Linden,Linden is a vibrant and alternative neighborho...,Price: 650000 USD\n Bedrooms: 3\n Bathro...,"[0.16432923, -0.04172577, 0.10801396, 0.039910...",1.065147
2,500000,3,2,1500,"This spacious 3-bedroom, 2-bathroom loft in th...",Neukölln,Neukölln is a multicultural neighborhood with ...,Price: 500000 USD\n Bedrooms: 3\n Bathro...,"[0.11304807, -0.00034318265, 0.038355477, 0.04...",1.10832


# STEP 6: PERSONALIZING LISTING DESCRIPTIONS

- **LLM Augmentation**:  
  Use a language model to enhance each retrieved listing by tailoring the description to the buyer’s specific preferences  
  - Emphasize features that align with the buyer's interests  
  - Improve clarity and appeal while keeping the tone natural

- **Maintaining Factual Integrity**:  
  Ensure that no factual information is changed during the augmentation process  
  - Only stylistic and emphasis adjustments are made


In [None]:
from langchain.prompts import ChatPromptTemplate
from langchain.chat_models import ChatOpenAI
from sentence_transformers import SentenceTransformer
import os
import json
import re

def personlizing_listening_descriptions(questions, answers):

    # determine which listenigsn are relevant for the customer
    listenings_df = get_relevant_listenings(get_dialog(questions=questions, answers=answers))

    # logik if no listening is relevant for the customer
    if listenings_df.empty:
        print("Unable to find matching results.")
        raise Exception("We dont listen any object with that aspects")

    # Promt template
    # defines the tasks for the AI -> creat a personlized description and do not alternate facts
    # defines the output -> all relevant data so i can check that the modell dont alternate facts
    # adds a personalized_description with the wanted text
    # inputs the customer needs
    # and inputs the relevant listenings for the customer


    agent_promt_template = ChatPromptTemplate.from_template(
        """For each retrieved listing, augment the description by tailoring it to resonate with the buyer’s specific preferences. Emphasize aspects of the property that align with the buyer's answers without changing any factual information.

Return the result as a valid JSON array.  
Each listing must contain all original keys plus an additional key `"personalized_description"`.

Example format:

[
  {{
    "price": ...,
    "bedrooms": ...,
    "bathrooms": ...,
    "size": "...",
    "neighborhood": "...",
    "neighborhood_description": "...",
    "description": "...",
    "personalized_description": "..."
  }},
  ...
]

    Buyers need’s:
    {answers}

    Information about prefered houses:
    {listings}"""
    )

    # converts the array to a string
    answers_text = "\n---\n".join([
        elem for elem in answers
    ])
    listings_text = "\n---\n".join([
        row["text"] for _, row in listenings_df.head(3).iterrows()
    ])



    # creates the promt
    prompt = agent_promt_template.format(answers=answers_text, listings=listings_text)



    # asks the openai
    openAI = ChatOpenAI(
        model_name=MODEL_NAME,
        api_key=os.environ["OPENAI_API_KEY"],
        base_url="https://openai.vocareum.com/v1"
    )
    response_text = openAI.predict(prompt)  
    # returns the json
    try:
        return json.loads(response_text)
    except:
        return response_text


In [None]:
# print the personlized description for the listenings
personlized_text = personlizing_listening_descriptions(questions, answers)
for counter, elem in enumerate(personlized_text):
    print(f"Listening {counter}: {elem.get('personalized_description')}")

Listening 0: This charming 3-bedroom, 2-bathroom house in Linden is perfect for you. The spacious kitchen with custom cabinets and high-end appliances is ideal for your cooking needs, and the cozy fireplace in the living room provides a warm atmosphere. The backyard garden is perfect for your gardening hobby, and the detached studio can be your home office or guest suite. Enjoy the vibrant and alternative neighborhood of Linden with its trendy bars and quirky shops.
Listening 1: This modern 3-bedroom, 2-bathroom townhouse in Linden is just what you're looking for. The open-concept living area with hardwood floors and a gourmet kitchen is perfect for your taste. The private courtyard offers a cozy outdoor space for relaxation. Explore the vibrant local scene of Linden with trendy cafes, art galleries, and music venues just a short walk away.
Listening 2: You'll love this stylish 3-bedroom, 2-bathroom loft in Neukölln. The industrial chic design with exposed brick walls and high ceilings

# STEP 7: TESTING AND FINALIZING THE "HOMEMATCH" APPLICATION

- **Functionality Testing**:  
  Test the full "HomeMatch" application to ensure it meets all requirements outlined in the rubric  
  - Run the project code end-to-end  
  - Enter different buyer preferences and verify that the application responds correctly and generates relevant listings

- **Code Organization**:  
  - Compile the application either in a Jupyter Notebook or as a standalone Python program  
  - Ensure the code is clean, well-commented, and logically structured

- **Example Outputs**:  
  - Include sample outputs that demonstrate how user preferences are interpreted  
  - Show how personalized listing descriptions are generated  
  - Outputs can be included as comments or saved directly in the Jupyter Notebook with outputs preserved


In [None]:
# fact checking so the model do not alternate facts
# it looks all legit
personlized_text = personlizing_listening_descriptions(questions, answers)
personlized_text

[{'price': 690000,
  'bedrooms': 3,
  'bathrooms': 2,
  'size': '1500 sqft',
  'neighborhood': 'Linden',
  'neighborhood_description': "Linden is a vibrant and alternative neighborhood in Hanover, with a mix of historic buildings, trendy bars, and quirky shops. Explore the streets lined with street art, enjoy a drink at a local pub, or attend a live music event in one of the neighborhood's venues.",
  'description': 'Located in the historic neighborhood of Linden, this 3-bedroom, 2-bathroom house offers a mix of traditional charm and modern amenities. The renovated kitchen features custom cabinets and high-end appliances, while the living room boasts a cozy fireplace and original woodwork. The backyard oasis includes a deck, garden, and detached studio that can be used as a home office or guest suite. With easy access to shops, restaurants, and public transportation, this house is ideal for those seeking a peaceful retreat in the heart of the city.',
  'personalized_description': 'This

In [None]:
# example output
# a shorter interview with other facts
# other listenings gets printed
questions_2 = [   
    "How big do you want your house to be?", 
    "What are 3 most important things for you in choosing this property?", 
    "How urban do you want your neighborhood to be?",   
]

answers_2 = [
    "Something small and efficient, ideally with 2 bedrooms and 1 bathroom.",
    "It should be located in Neukölln, be affordable, and have good access to public transportation.",
    "I prefer an urban lifestyle – Neukölln is perfect with its cafés, nightlife, and multicultural vibe."
]



personlized_text = personlizing_listening_descriptions(questions_2, answers_2)
personlized_text

[{'price': 500000,
  'bedrooms': 1,
  'bathrooms': 1,
  'size': '800 sqft',
  'neighborhood': 'Neukölln',
  'neighborhood_description': 'Neukölln is a multicultural neighborhood with a thriving food and nightlife scene. Sample international cuisine, browse vintage shops, or catch a live music performance at one of the many bars and clubs in the area.',
  'description': 'This cozy 1-bedroom, 1-bathroom apartment in Neukölln is perfect for a young professional or couple looking for a modern space in a vibrant neighborhood. The open kitchen and living area feature sleek finishes and access to a private balcony. The bedroom offers ample closet space and a well-appointed bathroom. Enjoy the lively atmosphere of Neukölln with its eclectic mix of cafes, bars, and galleries.',
  'personalized_description': 'This cozy 1-bedroom, 1-bathroom apartment in Neukölln is perfect for a young professional or couple looking for a modern space in a vibrant neighborhood. With its efficient layout and moder

# Usage
please enter the questions and answers

In [None]:

# Plase enter here your questions and answers
questions_user = [   
    # TODO
]

answers_user = [
    # TODO
]



personlized_text = personlizing_listening_descriptions(questions_user, answers_user)
personlized_text