In [9]:
import pandas as pd

# Datensätze einlesen
apartments = pd.read_csv("original_apartment_data_analytics_hs24_with_lat_lon.csv")
minergie = pd.read_csv("ogd91_minergiegebaeude_pro_gemeinde.csv")

# Zusammenführen anhand BFS-Nummer
merged_data = apartments.merge(minergie, left_on='bfs_number', right_on='BfsNumber', how='left')

# Minergie-Feature erstellen
merged_data['minergie_total'] = (
    merged_data['Minergie'] +
    merged_data['Minergie_Eco'] +
    merged_data['Minergie_A'] +
    merged_data['Minergie_A_Eco'] +
    merged_data['Minergie_P'] +
    merged_data['Minergie_P_Eco']
)

# Prüfen auf fehlende Werte und ggf. füllen
merged_data['minergie_total'] = merged_data['minergie_total'].fillna(merged_data['minergie_total'].median())


In [10]:
# Feature-Auswahl
features = merged_data[['area', 'rooms', 'lat', 'lon', 'minergie_total']]
target = merged_data['price']


In [11]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

# Trainings- und Testdatensatz erstellen
X_train, X_test, y_train, y_test = train_test_split(
    features, target, test_size=0.2, random_state=42
)

model = RandomForestRegressor(
    n_estimators=10,    # sehr wenige Bäume
    max_depth=8,        # geringe Tiefe
    min_samples_split=20,  # weniger Splits
    random_state=42,
    n_jobs=-1           # Parallelisierung
)

model.fit(X_train, y_train)

# Evaluierung
y_pred = model.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

print(f"RMSE: {rmse:.2f}")
print(f"R²: {r2:.2f}")


RMSE: 915.92
R²: 0.50


In [12]:
import joblib
joblib.dump(model, 'model_minergie.pkl')


['model_minergie.pkl']

In [None]:
import pandas as pd
import joblib
import gradio as gr

# Modell laden
model = joblib.load('model_minergie.pkl')

# Apartments-Daten laden (für Koordinaten)
apartments = pd.read_csv("original_apartment_data_analytics_hs24_with_lat_lon.csv")

# Minergie-Daten laden und aufbereiten
minergie = pd.read_csv("ogd91_minergiegebaeude_pro_gemeinde.csv")
minergie['minergie_total'] = (
    minergie['Minergie'] +
    minergie['Minergie_Eco'] +
    minergie['Minergie_A'] +
    minergie['Minergie_A_Eco'] +
    minergie['Minergie_P'] +
    minergie['Minergie_P_Eco']
)

# Gemeindeliste vorbereiten (Name -> BFS)
gemeinden_df = apartments[['bfs_number', 'town']].drop_duplicates().sort_values('town')
gemeinde_options = dict(zip(gemeinden_df['town'], gemeinden_df['bfs_number']))

# Vorhersagefunktion
def predict_price(area, rooms, gemeinde_name):
    bfs_number = gemeinde_options[gemeinde_name]

    # Minergie-Wert abrufen
    minergie_value = minergie.loc[minergie['BfsNumber'] == bfs_number, 'minergie_total'].values
    minergie_total = minergie_value[0] if len(minergie_value) > 0 else minergie['minergie_total'].mean()

    # Durchschnittliche Koordinaten der Gemeinde berechnen
    coords = apartments.loc[apartments['bfs_number'] == bfs_number, ['lat', 'lon']].mean()

    # Input für das Modell vorbereiten
    input_df = pd.DataFrame([{
        'area': area,
        'rooms': rooms,
        'latitude': coords['lat'],
        'longitude': coords['lon'],
        'minergie_total': minergie_total
    }])

    prediction = model.predict(input_df)[0]
    return f"🟢 Geschätzter Wohnungspreis: CHF {prediction:.2f}"

# Gradio Interface
iface = gr.Interface(
    fn=predict_price,
    inputs=[
        gr.Number(label="Wohnfläche (m²)"),
        gr.Number(label="Zimmeranzahl"),
        gr.Dropdown(choices=list(gemeinde_options.keys()), label="Gemeinde")
    ],
    outputs="text",
    title="📍 Wohnungspreis-Vorhersage mit Minergie-Feature",
    description="Gib Wohnfläche, Zimmeranzahl und Gemeinde ein. Das Modell berechnet den geschätzten Preis basierend auf Gemeindedaten und Minergie-Gebäuden."
)

iface.launch()


* Running on local URL:  http://127.0.0.1:7866

To create a public link, set `share=True` in `launch()`.




Traceback (most recent call last):
  File "/home/codespace/.python/current/lib/python3.12/site-packages/gradio/queueing.py", line 625, in process_events
    response = await route_utils.call_process_api(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/codespace/.python/current/lib/python3.12/site-packages/gradio/route_utils.py", line 322, in call_process_api
    output = await app.get_blocks().process_api(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/codespace/.python/current/lib/python3.12/site-packages/gradio/blocks.py", line 2108, in process_api
    result = await self.call_function(
             ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/codespace/.python/current/lib/python3.12/site-packages/gradio/blocks.py", line 1655, in call_function
    prediction = await anyio.to_thread.run_sync(  # type: ignore
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/codespace/.local/lib/python3.12/site-packages/anyio/to_thread.py", line 