In [18]:
# Importy i konfiguracja
import pandas as pd
import numpy as np
from nocarz.config import PROCESSED_DATA_DIR, MODELS_DIR
from nocarz.src.advanced_model import AdvancedModel

NUMERICAL_COLUMNS = ['accommodates', 'bathrooms', 'bedrooms', 'beds', 'price']
CATEGORICAL_COLUMNS = ['property_type', 'room_type', 'bathrooms_text']

In [19]:
# Wczytanie danych
train = pd.read_csv(PROCESSED_DATA_DIR / "train.csv")
test = pd.read_csv(PROCESSED_DATA_DIR / "test.csv")


print("Train shape:", train.shape)
print("Test shape:", test.shape)


Train shape: (2205, 13)
Test shape: (552, 13)


In [20]:
model = AdvancedModel()
model.fit(train)
model.save(MODELS_DIR / "advanced_model.pkl")


In [21]:
model = AdvancedModel()
model.load(MODELS_DIR / "advanced_model.pkl")


In [24]:
results = []

for _, row in test.iterrows():
    true_values = row.to_dict()
    predictions = model.predict(row)
    evaluation = model.evaluate_predictions(predictions, true_values)
    results.append(evaluation)


In [25]:
flat_results = {col: [] for col in NUMERICAL_COLUMNS + CATEGORICAL_COLUMNS}

for r in results:
    for col in r:
        if r[col]["type"] == "numerical":
            flat_results[col].append(r[col]["error"])
        else:
            flat_results[col].append(r[col]["match"])

print("\nMAE - regresja (AdvancedModel vs test):")
for col in NUMERICAL_COLUMNS:
    mae = np.mean(flat_results[col])
    print(f"{col}: {mae:.2f}")

print("\nAccuracy - klasyfikacja (AdvancedModel vs test):")
for col in CATEGORICAL_COLUMNS:
    acc = np.mean(flat_results[col])
    print(f"{col}: {acc:.2%}")


MAE - regresja (AdvancedModel vs test):
accommodates: 0.00
bathrooms: 0.00
bedrooms: 0.00
beds: 0.00
price: 0.71

Accuracy - klasyfikacja (AdvancedModel vs test):
property_type: 65.04%
room_type: 82.61%
bathrooms_text: 73.37%
