In [2]:
# =============================================================================
# HÜCRE 1 (Regresyon): GEREKLİ KÜTÜPHANELERİ YÜKLEME
# =============================================================================
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import warnings
warnings.filterwarnings('ignore')

plt.style.use('seaborn-v0_8-whitegrid')
print("✅ All necessary libraries for regression have been successfully loaded.")


✅ All necessary libraries for regression have been successfully loaded.


In [4]:
# =============================================================================
# HÜCRE 2: VERİYİ YÜKLEME VE İLK HAZIRLIK
# =============================================================================
# --- PARAMETRELER (Lütfen burayı kendi dosyanıza göre düzenleyin) ---
FILE_PATH = 'dataFinal.csv'  # <-- DOSYA YOLUNUZU BURAYA YAZIN
TARGET_COLUMN = 'price'            # <-- HEDEF (TAHMİN EDİLECEK) SÜTUNUN ADI

try:
    df = pd.read_csv(FILE_PATH, delimiter=',')
    print(f"✅ '{FILE_PATH}' dosyası başarıyla yüklendi.")
    
    # --- İLK KONTROLLER ---
    print("\n--- Veri Setine İlk Bakış ---")
    display(df.head())
    print("\n--- Veri Seti Bilgileri (Sütunlar, Veri Tipleri) ---")
    df.info()
    
    # --- Özellik (X) ve Hedef (y) Ayırma ---
    X = df.drop(TARGET_COLUMN, axis=1)
    y = df[TARGET_COLUMN]
    print(f"\n✅ Özellikler (X) ve hedef (y) '{TARGET_COLUMN}' sütununa göre ayrıldı.")
    
except FileNotFoundError:
    print(f"❌ HATA: '{FILE_PATH}' dosyası bulunamadı. Lütfen dosya yolunu kontrol edin.")
    df = None
except KeyError:
    print(f"❌ HATA: '{TARGET_COLUMN}' adında bir sütun bulunamadı. Lütfen 'TARGET_COLUMN' değişkenini kontrol edin.")
    X, y = None, None

✅ 'dataFinal.csv' dosyası başarıyla yüklendi.

--- Veri Setine İlk Bakış ---


Unnamed: 0,f1,f2,f3,f4,f5,f6,f7,f8,class
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23,5
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22,4
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24,4
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25,4
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25,4



--- Veri Seti Bilgileri (Sütunlar, Veri Tipleri) ---
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20640 entries, 0 to 20639
Data columns (total 9 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   f1      20640 non-null  float64
 1   f2      20640 non-null  float64
 2   f3      20640 non-null  float64
 3   f4      20640 non-null  float64
 4   f5      20640 non-null  float64
 5   f6      20640 non-null  float64
 6   f7      20640 non-null  float64
 7   f8      20640 non-null  float64
 8   class   20640 non-null  int64  
dtypes: float64(8), int64(1)
memory usage: 1.4 MB
❌ HATA: 'price' adında bir sütun bulunamadı. Lütfen 'TARGET_COLUMN' değişkenini kontrol edin.


In [5]:
# =============================================================================
# HÜCRE 3: KEŞİFSEL VERİ ANALİZİ (EDA)
# =============================================================================
if 'df' in locals() and df is not None:
    print("--- 1. Statistical Summary of Numerical Features ---")
    display(df.describe().T)

    print("\n--- 2. Target Variable Distribution ---")
    plt.figure(figsize=(10, 6))
    sns.histplot(df[TARGET_COLUMN], kde=True, bins=30)
    plt.title(f'Distribution of {TARGET_COLUMN}', fontsize=16)
    plt.show()

    print("\n--- 3. Correlation Matrix ---")
    plt.figure(figsize=(12, 8))
    correlation_matrix = df.corr(numeric_only=True)
    sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt=".2f")
    plt.title('Correlation Matrix Between Features', fontsize=16)
    plt.show()
    
    target_correlations = correlation_matrix[TARGET_COLUMN].abs().sort_values(ascending=False)
    print(f"\nFeatures most correlated with '{TARGET_COLUMN}':")
    display(target_correlations.drop(TARGET_COLUMN))
else:
    print("❌ Please run Cell 2 first to load the dataset.")


--- 1. Statistical Summary of Numerical Features ---


Unnamed: 0,count,mean,std,min,25%,50%,75%,max
f1,20640.0,3.870671,1.899822,0.4999,2.5634,3.5348,4.74325,15.0001
f2,20640.0,28.639486,12.585558,1.0,18.0,29.0,37.0,52.0
f3,20640.0,5.429,2.474173,0.846154,4.440716,5.229129,6.052381,141.909091
f4,20640.0,1.096675,0.473911,0.333333,1.006079,1.04878,1.099526,34.066667
f5,20640.0,1425.476744,1132.462122,3.0,787.0,1166.0,1725.0,35682.0
f6,20640.0,3.070655,10.38605,0.692308,2.429741,2.818116,3.282261,1243.333333
f7,20640.0,35.631861,2.135952,32.54,33.93,34.26,37.71,41.95
f8,20640.0,-119.569704,2.003532,-124.35,-121.8,-118.49,-118.01,-114.31
class,20640.0,2.515068,1.146291,1.0,2.0,2.0,3.0,5.0



--- 2. Target Variable Distribution ---


KeyError: 'price'

<Figure size 1000x600 with 0 Axes>

In [6]:
# =============================================================================
# HÜCRE 4: VERİ BÖLME VE ÖLÇEKLENDİRME
# =============================================================================
if 'X' in locals() and X is not None:
    # --- 1. Veriyi Eğitim ve Test Setlerine Ayırma ---
    # Not: Regresyonda 'stratify' kullanılmaz.
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    print("--- 1. Data Successfully Split ---")
    print(f"Training Set Shape (X_train): {X_train.shape}")
    print(f"Test Set Shape     (X_test):  {X_test.shape}")

    # --- 2. Veriyi Ölçeklendirme (Standardizasyon) ---
    scaler = StandardScaler()
    numerical_features = X_train.select_dtypes(include=np.number).columns
    
    X_train_scaled = X_train.copy()
    X_test_scaled = X_test.copy()

    X_train_scaled[numerical_features] = scaler.fit_transform(X_train[numerical_features])
    X_test_scaled[numerical_features] = scaler.transform(X_test[numerical_features])
    
    print("\n--- 2. Data Successfully Scaled ---")
else:
    print("❌ Please run Cell 2 first to define X and y.")


❌ Please run Cell 2 first to define X and y.


In [None]:
# =============================================================================
# HÜCRE 5: MODEL EĞİTİMİ VE ANALİZİ - LINEAR REGRESSION
# =============================================================================
model_name = "Linear Regression"
model = LinearRegression()

# --- Genel Model İşleme Kodu ---
print(f"--- Training the {model_name} model ---")
model.fit(X_train_scaled, y_train)
print("✅ Model trained successfully.")

y_test_pred = model.predict(X_test_scaled)

print("\n--- Model Performance (Test Set) ---")
mae = mean_absolute_error(y_test, y_test_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))
r2 = r2_score(y_test, y_test_pred)
print(f"Mean Absolute Error (MAE): {mae:.4f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
print(f"R-squared (R²): {r2:.4f}")

# --- Gerçek vs. Tahmin Grafiği ---
plt.figure(figsize=(8, 8))
plt.scatter(y_test, y_test_pred, alpha=0.5)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', lw=2)
plt.xlabel("Actual Values")
plt.ylabel("Predicted Values")
plt.title(f"{model_name}: Actual vs. Predicted", fontsize=16)
plt.show()

# --- Özellik Katsayıları Görselleştirme ---
print("\n--- Feature Coefficients ---")
coefficients = pd.DataFrame({
    'Feature': X_train.columns,
    'Coefficient': model.coef_
}).sort_values(by='Coefficient', key=abs, ascending=False)

plt.figure(figsize=(10, 6))
sns.barplot(x='Coefficient', y='Feature', data=coefficients, palette='coolwarm')
plt.title(f'{model_name} - Feature Coefficients', fontsize=14)
plt.show()


In [None]:
# =============================================================================
# HÜCRE 6: MODEL EĞİTİMİ VE ANALİZİ - RANDOM FOREST REGRESSOR
# =============================================================================
model_name = "Random Forest Regressor"
model = RandomForestRegressor(n_estimators=100, random_state=42)

# --- Genel Model İşleme Kodu ---
print(f"--- Training the {model_name} model ---")
model.fit(X_train_scaled, y_train)
print("✅ Model trained successfully.")

y_test_pred = model.predict(X_test_scaled)

print("\n--- Model Performance (Test Set) ---")
mae = mean_absolute_error(y_test, y_test_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))
r2 = r2_score(y_test, y_test_pred)
print(f"Mean Absolute Error (MAE): {mae:.4f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
print(f"R-squared (R²): {r2:.4f}")

# --- Gerçek vs. Tahmin Grafiği ---
plt.figure(figsize=(8, 8))
plt.scatter(y_test, y_test_pred, alpha=0.5)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', lw=2)
plt.xlabel("Actual Values")
plt.ylabel("Predicted Values")
plt.title(f"{model_name}: Actual vs. Predicted", fontsize=16)
plt.show()

# --- Özellik Önemi Grafiği ---
print("\n--- Feature Importance ---")
importances = pd.DataFrame({
    'Feature': X_train.columns,
    'Importance': model.feature_importances_
}).sort_values(by='Importance', ascending=False)

plt.figure(figsize=(10, 6))
sns.barplot(x='Importance', y='Feature', data=importances, palette='viridis')
plt.title(f'{model_name} - Feature Importances', fontsize=14)
plt.show()


In [None]:
# =============================================================================
# HÜCRE 7: MODEL EĞİTİMİ VE ANALİZİ - SUPPORT VECTOR REGRESSOR (SVR)
# =============================================================================
model_name = "Support Vector Regressor (SVR)"
model = SVR(kernel='rbf', C=1.0, epsilon=0.1)

# --- Genel Model İşleme Kodu ---
print(f"--- Training the {model_name} model ---")
model.fit(X_train_scaled, y_train)
print("✅ Model trained successfully.")

y_test_pred = model.predict(X_test_scaled)

print("\n--- Model Performance (Test Set) ---")
mae = mean_absolute_error(y_test, y_test_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))
r2 = r2_score(y_test, y_test_pred)
print(f"Mean Absolute Error (MAE): {mae:.4f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
print(f"R-squared (R²): {r2:.4f}")

# --- Gerçek vs. Tahmin Grafiği ---
plt.figure(figsize=(8, 8))
plt.scatter(y_test, y_test_pred, alpha=0.5)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', lw=2)
plt.xlabel("Actual Values")
plt.ylabel("Predicted Values")
plt.title(f"{model_name}: Actual vs. Predicted", fontsize=16)
plt.show()

# --- Özellik Önemi Notu ---
print(f"\n'{model_name}' with RBF kernel does not support direct feature importance visualization.")
