Pipline for price prediction for Wrocław

In [None]:
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from xgboost import XGBRegressor
import pickle

data = pd.read_csv("./../data/processed/1_analysis_rent.csv")

# Data cleaning
data = data[data["city"] == "wroclaw"]
#print(data.columns)
columns_to_drop = ["Unnamed: 0", "id", "city", "latitude", "longitude", "Period","poiCount",'schoolDistance', 'clinicDistance', 'postOfficeDistance',
       'kindergartenDistance', 'restaurantDistance', 'collegeDistance',
       'pharmacyDistance', 'ownership','buildingAge', "condition"]
target = "price"
x = data.drop([target] + columns_to_drop, axis=1)
y = data[target]

# Feature types
numerical = [
    "squareMeters",
    "rooms",
    "floor",
    "floorCount",
    "centreDistance",
]
categorical = [
    "buildingMaterial",
    "hasParkingSpace",
    "hasBalcony",
    "hasElevator",
    "hasSecurity",
    "hasStorageRoom",
]

print(x.columns)
for i in x.columns:
    print(x[i].unique())

# Preprocessing
preprocessor = ColumnTransformer(
    [
        ("num", StandardScaler(), numerical),
        ("cat", OneHotEncoder(handle_unknown="ignore"), categorical),
    ]
)

# Pipeline

pipeline=Pipeline([
    ('preprocessor',preprocessor),
    ("model",XGBRegressor(
        n_estimators=100,
        max_depth=7,
        learning_rate=0.05,
        random_state=99,
        subsample=0.6
    ))
])
# Fit for pipeline
pipeline.fit(x,y)

with open("./../ML_models/XGB_pipeline.pkl", "wb") as f:
    pickle.dump(pipeline, f)



Test ML model

In [None]:
import pickle
import pandas as pd

with open("./../ML_models/XGB_pipeline.pkl","rb") as f:
    model=pickle.load(f)

input_data = pd.DataFrame([{
    "squareMeters": 42.0,
    "rooms": 4,
    "floor": 4,
    "floorCount": 5,
    "buildYear": 2000,
    "centreDistance": 4.3,
    "type": "apartment",
    "buildingMaterial": "brick",
    'pharmacyDistance':0.1,
    'hasBalcony': "Yes",
    'hasSecurity': "Yes", 
    'ownership': "condominium",
    'restaurantDistance': 0.1, 
    'postOfficeDistance':1.0, 
    'schoolDistance':0.3, 
    'clinicDistance':3, 
    'kindergartenDistance':1.0,
    'condition':"premium",
    'hasParkingSpace':"yes",
    'hasStorageRoom':"no",
    'buildingAge':25,
    'hasElevator':"yes",
    'collegeDistance':1.5
}])

predicted_price = model.predict(input_data)[0]
print(f"Predicted price: {predicted_price:,.2f} PLN")
