# Fetch training data

In [12]:
import requests
import csv

url = "http://hasls6.bfh-webdev.ch:3000/get_apartments"

response = requests.get(url)
if response.status_code != 200:
    raise Exception(f"Failed to send data to API: {response.text}")

print(f"Count entries: {len(response.json())}")
print(f"Columns: {response.json()[0].keys()}")

# save as csv
with open("apartments.csv", "w") as f:
    writer = csv.DictWriter(f, fieldnames=response.json()[0].keys())
    writer.writeheader()
    for apartment in response.json():
        writer.writerow(apartment)

Count entries: 2870
Columns: dict_keys(['id', 'plz', 'price', 'rooms', 'space'])


# Train model

In [13]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

df = pd.read_csv("apartments.csv")

# split data
X = df[["rooms", "space", "plz"]]
y = df["price"]
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.35, random_state=42)

# create pipeline
preprocessor = ColumnTransformer(
    transformers=[
        ("onehot", OneHotEncoder(
            handle_unknown='ignore'), ["plz"]),
        ("passthrough", "passthrough", ["rooms", "space"])
    ]
)

model = Pipeline([
    ("preprocessor", preprocessor),
    ("regressor", LinearRegression())
])

# train model
model.fit(X_train, y_train)

# evaluate model
y_pred = model.predict(X_test)

# Evaluate model

In [14]:
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
# Mean Squared Error (MSE)

mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)  # Root Mean Squared Error (RMSE)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"MSE: {mse:.2f}")
print(f"RMSE: {rmse:.2f}")
print(f"MAE: {mae:.2f}")
print(f"R² Score: {r2:.2f}")

MSE: 748327.04
RMSE: 865.06
MAE: 517.55
R² Score: 0.78


## Manual prediction

In [15]:
print(model.predict(pd.DataFrame(
    [{"rooms": 3.5, "space": 78, "plz": 8057}])))

[3167.39694649]


# Deploy model

## Deploy model to MLFlow tracking server

In [16]:
import mlflow
import mlflow.sklearn
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from mlflow.models.signature import infer_signature

# Set the tracking URI to point to your MLflow server
mlflow.set_tracking_uri("http://135.181.85.42:5000")  # Replace with your actual server address

# Optional: give your experiment a name (creates if doesn't exist)
mlflow.set_experiment("apartment-price-prediction")

# Start an MLflow run
with mlflow.start_run():

    # Predict and calculate metrics
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    # Log metrics
    mlflow.log_metric("mse", mse)
    mlflow.log_metric("rmse", rmse)
    mlflow.log_metric("mae", mae)
    mlflow.log_metric("r2", r2)

    # Infer model input/output signature
    signature = infer_signature(X_test, y_pred)

    mlflow.log_artifact("./apartments.csv")

    # Log the model
    mlflow.sklearn.log_model(
        sk_model=model,
        artifact_path="predict_model",
        signature=signature,
        input_example=X_test,
        registered_model_name="candidate-model",
    )

Registered model 'candidate-model' already exists. Creating a new version of this model...
2025/05/19 10:05:11 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: candidate-model, version 2
Created version '2' of model 'candidate-model'.


🏃 View run sincere-pug-2 at: http://135.181.85.42:5000/#/experiments/2/runs/a4d3a22069304ed784126c856b367faf
🧪 View experiment at: http://135.181.85.42:5000/#/experiments/2


## Deploy locally

In [None]:
import mlflow
import mlflow.sklearn
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from mlflow.models.signature import infer_signature

# Set the tracking URI to point to your MLflow server
mlflow.set_tracking_uri("http://127.0.0.1:8080")  # Replace with your actual server address

# Optional: give your experiment a name (creates if doesn't exist)
mlflow.set_experiment("Predict Apartments")

# Start an MLflow run
with mlflow.start_run():

    # Predict and calculate metrics
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    # Log metrics
    mlflow.log_metric("mse", mse)
    mlflow.log_metric("rmse", rmse)
    mlflow.log_metric("mae", mae)
    mlflow.log_metric("r2", r2)

    # Infer model input/output signature
    signature = infer_signature(X_test, y_pred)

    # Log the model
    mlflow.sklearn.log_model(
        sk_model=model,
        artifact_path="predict_model",
        signature=signature,
        input_example=X_test,
        registered_model_name="tracking-quickstart",  # Optional
    )