In [None]:
#gerekli kütüphaneleri indiriyoruz
%pip install pandas scikit-learn
%pip install matplotlib
%pip install numpy

In [None]:
#test ve train setlerimizi import ediyoruz
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

train = pd.read_csv("evler-train.csv")
test = pd.read_csv("evler-test.csv")

Verilerin yapısı "evNo,fiyat,brutM2,netM2,oda,salon,yas,kat,katMaks,dogalGaz,banyo,amerikan,balkon,asansor,otopark,esya,siteMi,aidat,guneyMi" şeklinde.

In [None]:
#4 farklı regresyon modeli deniyoruz
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR

In [None]:
#lineer regresyon modeli
lr_model = LinearRegression()
lr_model.fit(train.drop(columns=["fiyat", "evNo"]), train["fiyat"])
lr_predictions = lr_model.predict(test.drop(columns=["fiyat", "evNo"]))

In [None]:
#DecisionTreeRegressor modeli
dt_model = DecisionTreeRegressor()
dt_model.fit(train.drop(columns=["fiyat", "evNo"]), train["fiyat"])
dt_predictions = dt_model.predict(test.drop(columns=["fiyat", "evNo"]))

In [None]:
#RandomForestRegressor modeli
rf_model = RandomForestRegressor()
rf_model.fit(train.drop(columns=["fiyat", "evNo"]), train["fiyat"])
rf_predictions = rf_model.predict(test.drop(columns=["fiyat", "evNo"]))

In [None]:
#SVR modeli
svr_model = SVR()
svr_model.fit(train.drop(columns=["fiyat", "evNo"]), train["fiyat"])
svr_predictions = svr_model.predict(test.drop(columns=["fiyat", "evNo"]))

In [None]:
#4 farklı metrik ile modellerimizi değerlendiriyoruz
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, median_absolute_error
def evaluate_model(true_values, predictions):
    mae = mean_absolute_error(true_values, predictions)
    mse = mean_squared_error(true_values, predictions)
    r2 = r2_score(true_values, predictions)
    medae = median_absolute_error(true_values, predictions)
    return {"MAE": mae, "MSE": mse, "R2": r2, "MedAE": medae}
lr_results = evaluate_model(test["fiyat"], lr_predictions)
dt_results = evaluate_model(test["fiyat"], dt_predictions)
rf_results = evaluate_model(test["fiyat"], rf_predictions)
svr_results = evaluate_model(test["fiyat"], svr_predictions)

#sonuçları tablo halinde gösteriyoruz
results_df = pd.DataFrame({
    "Model": ["Linear Regression", "Decision Tree", "Random Forest", "SVR"],
    "MAE": [lr_results["MAE"], dt_results["MAE"], rf_results["MAE"], svr_results["MAE"]],
    "MSE": [lr_results["MSE"], dt_results["MSE"], rf_results["MSE"], svr_results["MSE"]],
    "R2": [lr_results["R2"], dt_results["R2"], rf_results["R2"], svr_results["R2"]],
    "MedAE": [lr_results["MedAE"], dt_results["MedAE"], rf_results["MedAE"], svr_results["MedAE"]]
})

# Daha okunabilir gösterim için sayısal sütunları biçimlendiriyoruz
display_df = results_df.copy()
num_cols = ["MAE", "MSE", "R2", "MedAE"]
display_df[num_cols] = display_df[num_cols].applymap(lambda x: f"{x:,.2f}")

print(display_df)

In [None]:
# numpy'nin uzun çıktıları kesmemesi için
np.set_printoptions(threshold=1000)

# Her modelin tahminlerini gerçek değerlerle karşılaştıran grafik
plt.figure(figsize=(16, 8))

# Gerçek değerler
actual = test["fiyat"].values
x_pos = np.arange(len(actual))

# Bar genişliği
bar_width = 0.15

# Her model için tahminleri çiz
plt.bar(x_pos - 1.5*bar_width, actual, bar_width, label='Gerçek Değer', color='black', alpha=0.7)
plt.bar(x_pos - 0.5*bar_width, lr_predictions, bar_width, label='Linear Regression', color='blue', alpha=0.7)
plt.bar(x_pos + 0.5*bar_width, dt_predictions, bar_width, label='Decision Tree', color='green', alpha=0.7)
plt.bar(x_pos + 1.5*bar_width, rf_predictions, bar_width, label='Random Forest', color='orange', alpha=0.7)
plt.bar(x_pos + 2.5*bar_width, svr_predictions, bar_width, label='SVR', color='red', alpha=0.7)

plt.xlabel('Test Örnekleri')
plt.ylabel('Fiyat')
plt.title('Model Tahminlerinin Gerçek Değerlerle Karşılaştırılması')
plt.xticks(x_pos, range(1, len(actual) + 1))
plt.legend()
plt.tight_layout()
plt.show()


In [None]:
#sonuçları karşılaştırarak en iyi modeli seçiyoruz
results = {
    "Linear Regression": lr_results,
    "Decision Tree": dt_results,
    "Random Forest": rf_results,
    "SVR": svr_results
}
best_model = min(results, key=lambda x: results[x]["MAE"])
print("En İyi Model:", best_model, "with MAE:", results[best_model]["MAE"])

In [None]:
#en iyi modelin; model.intercept_,std_err ve model.coef_ değerlerini yazdırıyoruz

# formatlama fonksiyonları
fmt_float = lambda x: f"{x:,.2f}"
fmt_coef = lambda x: f"{x:,.4f}"
fmt_array = lambda arr, fmt=fmt_coef: ", ".join(fmt(float(v)) for v in np.atleast_1d(arr))

if best_model == "Linear Regression":
    print("En İyi Model: Linear Regression")
    intercept = float(lr_model.intercept_)
    coefs = lr_model.coef_
    # get feature names from training dataframe (preserve column order)
    feature_names = list(train.drop(columns=["fiyat", "evNo"]).columns)
    residuals = lr_model.predict(train.drop(columns=["fiyat", "evNo"])) - train["fiyat"]
    std_err = float(np.std(residuals))
    print("Intercept:", fmt_float(intercept))
    print("Coefficients:")
    # print coefficient with sequence number and feature name for clarity (1-based index)
    for idx, (name, c) in enumerate(zip(feature_names, coefs), start=1):
        print(f"  {idx}. {name}: {fmt_coef(c)}")
    print("Standard Error (train residuals):", fmt_float(std_err))

elif best_model == "Decision Tree":
    print("En İyi Model: Decision Tree")
    residuals = dt_model.predict(train.drop(columns=["fiyat", "evNo"])) - train["fiyat"]
    std_err = float(np.std(residuals))
    print("Intercept: N/A for Decision Tree")
    print("Coefficients: N/A for Decision Tree")
    print("Standard Error (train residuals):", fmt_float(std_err))

elif best_model == "Random Forest":
    print("En İyi Model: Random Forest")
    residuals = rf_model.predict(train.drop(columns=["fiyat", "evNo"])) - train["fiyat"]
    std_err = float(np.std(residuals))
    print("Intercept: N/A for Random Forest")
    print("Coefficients: N/A for Random Forest")
    print("Standard Error (train residuals):", fmt_float(std_err))

elif best_model == "SVR":
    print("En İyi Model: SVR")
    # SVR intercept_ ve support_vectors_ olabilir
    if hasattr(svr_model, "intercept_"):
        intercepts = np.atleast_1d(svr_model.intercept_)
        print("Intercept(s):", fmt_array(intercepts, fmt=fmt_float))
    else:
        print("Intercept: N/A for SVR")
    if hasattr(svr_model, "support_vectors_"):
        sv = svr_model.support_vectors_
        print("Support vectors count:", sv.shape[0])
        # İlk 5 support vector'u göster
        n_show = min(5, sv.shape[0])
        print(f"First {n_show} support vectors:")
        for i in range(n_show):
            print(" ", fmt_array(sv[i], fmt=fmt_coef))
    else:
        print("Support Vectors: N/A")

In [None]:
#model_intercept_, std_err ve model.coef_ değerlerini grafik olarak gösteriyoruz
if best_model == "Linear Regression":
    plt.figure(figsize=(10, 5))
    coef_vals = lr_model.coef_
    feature_names = list(train.drop(columns=["fiyat", "evNo"]).columns)
    # build numbered labels like '1. brutM2', '2. netM2', ... for the x-ticks
    numbered_labels = [f"{i+1}. {n}" for i, n in enumerate(feature_names)]
    plt.bar(range(len(coef_vals)), coef_vals)
    plt.title("Linear Regression Coefficients")
    plt.xlabel("Feature")
    plt.ylabel("Coefficient Value")
    # label x-ticks with feature names and rotate for readability
    plt.xticks(range(len(feature_names)), numbered_labels, rotation=45, ha="right")
    plt.tight_layout()
    plt.show()
else:
    print("Coefficient plot is only available for Linear Regression model.")