In [17]:
import pandas as pd

# 🔹 **Dateipfade**
apartments_path = "/workspaces/ai-applications-fs25/week2/test copy/original_apartment_data_analytics_hs24_with_lat_lon.csv"
minergie_path = "/workspaces/ai-applications-fs25/week2/test copy/plz_häufigkeit.csv"
bfs_data_path = "/workspaces/ai-applications-fs25/week2/test copy/bfs_municipality_and_tax_data.csv"
precomputed_data_path = "/workspaces/ai-applications-fs25/week2/test copy/precomputed_data.csv"

# 🔹 **Daten laden**
apartments = pd.read_csv(apartments_path)
minergie = pd.read_csv(minergie_path)
bfs_data = pd.read_csv(bfs_data_path)

# 🔹 **PLZ als Integer setzen**
apartments['postalcode'] = apartments['postalcode'].astype(int)
minergie['PLZ'] = minergie['PLZ'].astype(int)
bfs_data['bfs_number'] = bfs_data['bfs_number'].astype(int)

# 🔹 **Durchschnittliche Koordinaten pro PLZ berechnen**
coords_avg = apartments.groupby('postalcode')[['lat', 'lon']].mean().reset_index()

# 🔹 **Ortsnamen, Wohnfläche & Zimmeranzahl pro PLZ hinzufügen**
towns_avg = apartments.groupby('postalcode')['town'].first().reset_index()
area_avg = apartments.groupby('postalcode')['area'].mean().reset_index()
rooms_avg = apartments.groupby('postalcode')['rooms'].mean().reset_index()

# 🔹 **Preis pro Quadratmeter berechnen**
apartments['price_per_sqm'] = apartments['price'] / apartments['area']
price_per_sqm_avg = apartments.groupby('postalcode')['price_per_sqm'].mean().reset_index()

# 🔹 **Durchschnittliche Miete berechnen**
avg_rent = apartments.groupby('postalcode')['price'].mean().reset_index()
avg_rent.rename(columns={'price': 'avg_rent'}, inplace=True)

# 🔹 **Minergie-Daten mit BFS-Daten verbinden**
precomputed = coords_avg.merge(minergie, left_on='postalcode', right_on='PLZ', how='left')
precomputed = precomputed.merge(bfs_data, left_on='postalcode', right_on='bfs_number', how='left')
precomputed = precomputed.merge(towns_avg, on='postalcode', how='left')
precomputed = precomputed.merge(area_avg, on='postalcode', how='left')
precomputed = precomputed.merge(rooms_avg, on='postalcode', how='left')
precomputed = precomputed.merge(price_per_sqm_avg, on='postalcode', how='left')
precomputed = precomputed.merge(avg_rent, on='postalcode', how='left')

# 🔹 **Relevante Spalten behalten**
precomputed = precomputed[['postalcode', 'town', 'lat', 'lon', 'Häufigkeit', 'tax_income', 'pop_dens', 'emp', 
                           'area', 'rooms', 'price_per_sqm', 'avg_rent']]
precomputed.rename(columns={'Häufigkeit': 'minergie_anteil'}, inplace=True)

# 🔹 **Preis hinzufügen (Durchschnitt pro PLZ aus Apartments-Daten)**
price_avg = apartments.groupby('postalcode')['price'].mean().reset_index()
precomputed = precomputed.merge(price_avg, on='postalcode', how='left')

# 🔹 **Fehlende Werte füllen**
precomputed.fillna(0, inplace=True)

# 🔹 **Speichern der neuen Datei**
precomputed.to_csv(precomputed_data_path, index=False)

print("✅ `precomputed_data.csv` wurde aktualisiert und enthält jetzt `price_per_sqm` und `avg_rent`!")
print(precomputed.head())


✅ `precomputed_data.csv` wurde aktualisiert und enthält jetzt `price_per_sqm` und `avg_rent`!
   postalcode     town        lat       lon  minergie_anteil  tax_income  \
0        8001   Zürich  47.371268  8.545946                8           0   
1        8002   Zürich  47.362111  8.532543               46           0   
2        8003   Zürich  47.369856  8.518263               28           0   
3        8004   Zürich  47.376763  8.522542               60           0   
4        8005   Zürich  47.387668  8.521253               67           0   

   pop_dens  emp        area     rooms  price_per_sqm     avg_rent  \
0       0.0  0.0   84.285714  3.142857      41.069317  2919.000000   
1       0.0  0.0  115.416667  3.958333      44.844142  4789.583333   
2       0.0  0.0   59.105263  2.447368      56.221159  3138.210526   
3       0.0  0.0   56.782609  2.434783      65.465329  3249.608696   
4       0.0  0.0   97.375000  2.937500      46.676081  3785.125000   

         price  
0  2919.000

  precomputed.fillna(0, inplace=True)


In [24]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import joblib

# 🔹 **Dateipfade**
precomputed_data_path = "/workspaces/ai-applications-fs25/week2/test copy/precomputed_data.csv"
model_output_path = "/workspaces/ai-applications-fs25/week2/test copy/price_prediction_model.pkl"

# 🔹 **Daten laden**
print("📥 Lade `precomputed_data.csv`...")
precomputed = pd.read_csv(precomputed_data_path)

# 🔹 **Features & Zielvariable definieren**
features = ['lat', 'lon', 'area', 'rooms', 'minergie_anteil', 'tax_income', 
            'pop_dens', 'emp', 'price_per_sqm', 'avg_rent']

target = 'price'

# 🔹 **Überprüfung der Spalten**
missing_columns = [col for col in features + [target] if col not in precomputed.columns]
if missing_columns:
    raise ValueError(f"🚨 Fehlende Spalten in den Daten: {missing_columns}")

# 🔹 **Daten splitten (Train / Test)**
X_train, X_test, y_train, y_test = train_test_split(
    precomputed[features], precomputed[target], test_size=0.2, random_state=42
)

# 🔹 **Modell initialisieren & trainieren**
print("⚡ Training startet...")
from sklearn.ensemble import GradientBoostingRegressor
model = GradientBoostingRegressor(n_estimators=200, learning_rate=0.05, max_depth=5, random_state=42)

model.fit(X_train, y_train)
print("✅ Training abgeschlossen!")

# 🔹 **Modell evaluieren**
y_pred = model.predict(X_test)
import numpy as np
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
from sklearn.metrics import r2_score
r2 = r2_score(y_test, y_pred)

print(f"📊 Modell-Performance:\nRMSE: {rmse:.2f}\nR²: {r2:.2f}")

# 🔹 **Modell speichern**
joblib.dump(model, model_output_path)
print(f"💾 Modell gespeichert unter: {model_output_path}")


📥 Lade `precomputed_data.csv`...
⚡ Training startet...
✅ Training abgeschlossen!
📊 Modell-Performance:
RMSE: 80.53
R²: 0.99
💾 Modell gespeichert unter: /workspaces/ai-applications-fs25/week2/test copy/price_prediction_model.pkl


In [None]:
import gradio as gr
import pandas as pd
import joblib

# 🔹 **Dateipfade**
model_path = "/workspaces/ai-applications-fs25/week2/test copy/price_prediction_model.pkl"
precomputed_data_path = "/workspaces/ai-applications-fs25/week2/test copy/precomputed_data.csv"

# 🔹 **Modell & Daten laden**
print("📥 Lade Modell...")
model = joblib.load(model_path)

print("📥 Lade vorverarbeitete Daten...")
precomputed = pd.read_csv(precomputed_data_path)

# 🔹 **Sicherstellen, dass `town` eindeutige Werte hat**
precomputed = precomputed.groupby('town', as_index=False).mean()

# 🔹 **Daten als Dictionary für schnellen Zugriff**
precomputed_dict = precomputed.set_index('town').to_dict(orient='index')

# 🔹 **Dropdown-Liste mit Gemeindenamen**
gemeinde_options = sorted(precomputed['town'].unique())

# 🔹 **Beschreibung der App**
beschreibung = """
🏡 **Wohnungspreis-Vorhersage mit Minergie & Steuerdaten**

🔹 Wähle **Wohnfläche (m²)**, **Zimmeranzahl** und **Gemeinde**  
🔹 Die App berechnet den erwarteten **Wohnungspreis** auf Basis von:
  - Wohnfläche
  - Anzahl der Zimmer
  - Standortdaten
  - Steuer- & Minergie-Informationen

⚡ **Schnelle und genaue Vorhersage in Echtzeit!**
"""

# 🔹 **Vorhersagefunktion**
def predict_price(area, rooms, town):
    if town not in precomputed_dict:
        return "🚨 Fehler: Keine Daten für diese Gemeinde verfügbar."
    
    data = precomputed_dict[town]

    # 🔹 **Modell-Eingaben vorbereiten**
    input_data = pd.DataFrame([{
        'area': area,
        'rooms': rooms,
        'lat': data['lat'],
        'lon': data['lon'],
        'minergie_anteil': data['minergie_anteil'],
        'tax_income': data['tax_income'],
        'price_per_sqm': data['price_per_sqm'],
        'avg_rent': data['avg_rent']
    }])

    # 🔹 **Modell-Vorhersage**
    predicted_price = model.predict(input_data)[0]

    return f"🏡 Erwarteter Wohnungspreis in {town}: **CHF {predicted_price:.2f}**"

# 🔹 **Gradio Interface**
iface = gr.Interface(
    fn=predict_price,
    inputs=[
        gr.Number(label="Wohnfläche (m²)", value=50),
        gr.Number(label="Zimmeranzahl", value=2),
        gr.Dropdown(choices=gemeinde_options, label="Gemeinde")
    ],
    outputs="text",
    title="🏡 Wohnungspreis-Vorhersage",
    description=beschreibung,
    theme="huggingface"
)

iface.launch()


📥 Lade Modell...
📥 Lade vorverarbeitete Daten...



Sorry, we can't find the page you are looking for.


* Running on local URL:  http://127.0.0.1:7867

To create a public link, set `share=True` in `launch()`.




Traceback (most recent call last):
  File "/home/codespace/.python/current/lib/python3.12/site-packages/gradio/queueing.py", line 625, in process_events
    response = await route_utils.call_process_api(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/codespace/.python/current/lib/python3.12/site-packages/gradio/route_utils.py", line 322, in call_process_api
    output = await app.get_blocks().process_api(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/codespace/.python/current/lib/python3.12/site-packages/gradio/blocks.py", line 2108, in process_api
    result = await self.call_function(
             ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/codespace/.python/current/lib/python3.12/site-packages/gradio/blocks.py", line 1655, in call_function
    prediction = await anyio.to_thread.run_sync(  # type: ignore
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/codespace/.local/lib/python3.12/site-packages/anyio/to_thread.py", line 