In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.experimental import enable_hist_gradient_boosting
from sklearn.ensemble import HistGradientBoostingRegressor
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder

data = pd.read_csv("Melbourne_housing_FULL.csv")

categorical_columns = ['Suburb', 'Address', 'Type', 'Method', 'SellerG', 'Date', 'CouncilArea', 'Regionname']

for column in categorical_columns:
    most_frequent = data[column].mode()[0]
    data[column].fillna(most_frequent, inplace=True)

price_imputer = SimpleImputer(strategy='mean')
data['Price'] = price_imputer.fit_transform(data[['Price']])


encoder = OneHotEncoder(drop='first', sparse=False)
data_encoded = encoder.fit_transform(data[categorical_columns])

encoded_columns = encoder.get_feature_names_out(categorical_columns)
data_encoded_df = pd.DataFrame(data_encoded, columns=encoded_columns)

data = data.drop(columns=categorical_columns)
data = pd.concat([data, data_encoded_df], axis=1)

X = data.drop(columns=['Price'])
y = data['Price']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = HistGradientBoostingRegressor()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print("Model Değerlendirmesi:")
print(f"MAE: {mae}")
print(f"MSE: {mse}")
print(f"RMSE: {rmse}")
print(f"R2: {r2}")



Model Değerlendirmesi:
MAE: 257315.7359212513
MSE: 175782214831.24188
RMSE: 419263.8964080283
R2: 0.5591926495399457
