In [65]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, StackingRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.svm import SVR
from xgboost import XGBRegressor

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV

from sklearn.metrics import (
    mean_absolute_error,
    mean_squared_error,
    r2_score,
    mean_absolute_percentage_error,
    make_scorer
)


In [None]:
data = pd.read_csv("katowice.csv")

In [67]:
X = data.drop(columns=["price"])
y = data["price"]

In [69]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [None]:
results = []

In [None]:

def evaluate_model(model, X_train, y_train, X_test, y_test):
    model.fit(X_train, y_train)
    
    y_pred = model.predict(X_test)
    
    mae = mean_absolute_error(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)
    mape = mean_absolute_percentage_error(y_test, y_pred)  # format dziesiętny

    mean_price = np.mean(y_test)
    mae_norm = mae / mean_price
    rmse_norm = rmse / mean_price

    print("📊 Wyniki modelu:", model.__class__.__name__)
    print(f"R²: {r2:.4f}")
    print(f"MAPE: {mape:.4f}")
    print(f"Znormalizowany MAE: {mae_norm:.4f}")
    print(f"Znormalizowany RMSE: {rmse_norm:.4f}")

    results.append({
    "Model": model.__class__.__name__,
    "MAE": mae_norm,
    "RMSE": rmse_norm,
    "R2": r2,
    "MAPE": mape
    })  

    return {
        "R2": r2,
        "MAPE": mape,
        "MAE_norm": mae_norm,
        "RMSE_norm": rmse_norm
    }

In [None]:

def evaluate_model(model, X_train, y_train, X_test, y_test):
    model.fit(X_train, y_train)
    
    y_pred = model.predict(X_test)
    
    mae = mean_absolute_error(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)
    mape = mean_absolute_percentage_error(y_test, y_pred) 

    mean_price = np.mean(y_test)
    mae_norm = mae / mean_price
    rmse_norm = rmse / mean_price

    return {
        "R2": r2,
        "MAPE": mape,
        "MAE_norm": mae_norm,
        "RMSE_norm": rmse_norm
    }

In [71]:
model = LinearRegression()
evaluate_model(model, X_train, y_train, X_test, y_test)

📊 Wyniki modelu: LinearRegression
R²: 0.7781
MAPE: 0.1443
Znormalizowany MAE: 0.1333
Znormalizowany RMSE: 0.1724


{'R2': 0.7780982263764588,
 'MAPE': 0.14431770359719792,
 'MAE_norm': 0.13325237344959057,
 'RMSE_norm': 0.17242851651181815}

In [72]:
model_rf = RandomForestRegressor(random_state=42)
results_rf = evaluate_model(model_rf, X_train, y_train, X_test, y_test)

📊 Wyniki modelu: RandomForestRegressor
R²: 0.8161
MAPE: 0.1174
Znormalizowany MAE: 0.1089
Znormalizowany RMSE: 0.1570


In [73]:
model_gb = GradientBoostingRegressor(n_estimators=200, learning_rate=0.1, max_depth=4, random_state=42)
results_gb = evaluate_model(model_gb, X_train, y_train, X_test, y_test)

📊 Wyniki modelu: GradientBoostingRegressor
R²: 0.8552
MAPE: 0.1058
Znormalizowany MAE: 0.1006
Znormalizowany RMSE: 0.1393


In [74]:
model_xgb = XGBRegressor(n_estimators=200, learning_rate=0.1, max_depth=6, random_state=42)
results_xgb = evaluate_model(model_xgb, X_train, y_train, X_test, y_test)

📊 Wyniki modelu: XGBRegressor
R²: 0.8495
MAPE: 0.1074
Znormalizowany MAE: 0.0989
Znormalizowany RMSE: 0.1420


In [75]:
model_knn = KNeighborsRegressor()
results_knn = evaluate_model(model_knn, X_train, y_train, X_test, y_test)

📊 Wyniki modelu: KNeighborsRegressor
R²: 0.7320
MAPE: 0.1598
Znormalizowany MAE: 0.1445
Znormalizowany RMSE: 0.1895


In [76]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [77]:
svr = SVR(kernel='rbf', C=100, gamma='scale', epsilon=0.1)
results_svr = evaluate_model(svr, X_train_scaled, y_train, X_test_scaled, y_test)

📊 Wyniki modelu: SVR
R²: -0.0269
MAPE: 0.3129
Znormalizowany MAE: 0.2871
Znormalizowany RMSE: 0.3709


In [78]:
model_mlp = MLPRegressor(
    hidden_layer_sizes=(256, 128, 64), 
    activation='relu',
    solver='adam',
    alpha=0.001,
    learning_rate_init=0.001,
    max_iter=2000,
    random_state=42
)

results_mlp = evaluate_model(model_mlp, X_train_scaled, y_train, X_test_scaled, y_test)


📊 Wyniki modelu: MLPRegressor
R²: 0.7723
MAPE: 0.1347
Znormalizowany MAE: 0.1221
Znormalizowany RMSE: 0.1747




In [79]:
results_df = pd.DataFrame(results)
results_df = results_df.sort_values("R2", ascending=False)
results_df.reset_index(drop=True, inplace=True)

display(results_df)

Unnamed: 0,Model,MAE,RMSE,R2,MAPE
0,GradientBoostingRegressor,0.100567,0.139274,0.855228,0.105818
1,XGBRegressor,0.098881,0.142,0.849506,0.10735
2,RandomForestRegressor,0.108888,0.156982,0.816075,0.117392
3,LinearRegression,0.133252,0.172429,0.778098,0.144318
4,MLPRegressor,0.122066,0.17466,0.772318,0.134676
5,KNeighborsRegressor,0.144534,0.189478,0.732045,0.159824
6,SVR,0.287101,0.370937,-0.026933,0.312903
