In [83]:
!pip install lightgbm xgboost catboost hyperopt ipywidgets deap

import pandas as pd
import numpy as np
import random
import time
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from deap import base, creator, tools, algorithms
from IPython.display import display, clear_output
import ipywidgets as widgets
import warnings
warnings.filterwarnings("ignore")

np.random.seed(42)
random.seed(42)

from google.colab import drive
drive.mount('/content/drive')

file_path = '/content/drive/MyDrive/Energy_consumption.csv'
df = pd.read_csv(file_path)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [84]:
df['Timestamp'] = pd.to_datetime(df['Timestamp'])
df['Hour'] = df['Timestamp'].dt.hour
df['DayofWeek'] = df['Timestamp'].dt.dayofweek
df['Month'] = df['Timestamp'].dt.month
df['Day'] = df['Timestamp'].dt.day
df['IsWeekend'] = df['DayofWeek'].apply(lambda x: 1 if x >= 5 else 0)
df.drop(['Timestamp', 'DayOfWeek'], axis=1, inplace=True)

def parse_on_off(value):
    if isinstance(value, str):
        val = value.lower().strip()
        return 1 if val == 'on' else 0
    return value

for col in ['HVACUsage', 'LightingUsage']:
    df[col] = df[col].apply(parse_on_off)

def parse_yes_no(value):
    if isinstance(value, str):
        val = value.lower().strip()
        return 1 if val == 'yes' else 0
    return value

df['Holiday'] = df['Holiday'].apply(parse_yes_no)

In [85]:
X = df.drop('EnergyConsumption', axis=1)
y = df['EnergyConsumption']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [86]:
best_model = None
y_pred = None

slider = widgets.IntSlider(
    min=10,
    max=len(y_test),
    step=10,
    value=50,
    description='Örnek Sayısı:',
    continuous_update=False,
    layout=widgets.Layout(width='70%')
)

output = widgets.Output()

def update_metrics(change):
    with output:
        clear_output(wait=True)
        if best_model is None:
            print("Önce bir model eğitmelisiniz!")
            return
        n = change['new']
        y_true_partial = y_test.iloc[:n]
        y_pred_partial = y_pred[:n]
        rmse = np.sqrt(mean_squared_error(y_true_partial, y_pred_partial))
        r2 = r2_score(y_true_partial, y_pred_partial)
        mae = mean_absolute_error(y_true_partial, y_pred_partial)
        print(f"İlk {n} test örneği için metrikler:")
        print(f"  RMSE: {rmse:.4f}")
        print(f"  R²: {r2:.4f}")
        print(f"  MAE: {mae:.4f}")

slider.observe(update_metrics, names='value')

In [87]:
def print_results(model_name, best_params, start, end):
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)

    clear_output(wait=True)
    display(all_buttons, slider, output)

    print(f"\n {model_name} Sonuçları")
    print(f"  - En iyi parametreler: {best_params}")
    print(f"  - RMSE: {rmse:.4f}")
    print(f"  - R²: {r2:.4f}")
    print(f"  - MAE: {mae:.4f}")
    print(f"Süre: {(end - start):.2f} saniye")

In [88]:
def train_lightgbm_default():
    global best_model, y_pred
    clear_output(wait=True)
    display(all_buttons, slider, output)
    print("LightGBM Varsayılan çalışıyor...")
    start = time.time()
    best_model = LGBMRegressor(random_state=42)
    best_model.fit(X_train, y_train)
    y_pred = best_model.predict(X_test)
    end = time.time()
    print_results("LightGBM Varsayılan", {}, start, end)

def train_lightgbm_random():
    global best_model, y_pred
    clear_output(wait=True)
    display(all_buttons, slider, output)
    print("LightGBM RandomizedSearch çalışıyor...")
    model = LGBMRegressor(random_state=42)
    param_dist = {
        'num_leaves': [31, 40, 50],
        'max_depth': [-1, 10, 15],
        'learning_rate': [0.01, 0.05, 0.1],
        'n_estimators': [100, 200, 300]
    }
    start = time.time()
    search = RandomizedSearchCV(model, param_distributions=param_dist, n_iter=5, cv=2, scoring='r2', n_jobs=-1)
    search.fit(X_train, y_train)
    end = time.time()
    best_model = search.best_estimator_
    y_pred = best_model.predict(X_test)
    print_results("LightGBM Randomized", search.best_params_, start, end)

In [89]:
def train_lightgbm_hyperopt_verbose():
    global best_model, y_pred
    print("LightGBM Hyperopt optimizasyonu başlıyor. Lütfen bekleyin...")

    space = {
        'num_leaves': hp.choice('num_leaves', [31, 40, 50]),
        'max_depth': hp.choice('max_depth', [-1, 10, 15]),
        'learning_rate': hp.uniform('learning_rate', 0.01, 0.1),
        'n_estimators': hp.choice('n_estimators', [100, 200, 300])
    }

    trials = Trials()
    global_trial_counter = {'count': 0}

    def objective(params):
        model = LGBMRegressor(**params, random_state=42)
        model.fit(X_train, y_train)
        y_pred_val = model.predict(X_test)
        loss = mean_squared_error(y_test, y_pred_val)
        global_trial_counter['count'] += 1

        clear_output(wait=True)
        print(f"Deneme {global_trial_counter['count']}:")
        print(f"  Parametreler: {params}")
        print(f"  Kayıp (MSE): {loss:.4f}")

        return {'loss': loss, 'status': STATUS_OK}

    start = time.time()
    best_params = fmin(fn=objective, space=space, algo=tpe.suggest, max_evals=20, trials=trials, rstate=np.random.default_rng(42))
    end = time.time()

    best_model = LGBMRegressor(
        num_leaves=[31, 40, 50][best_params['num_leaves']],
        max_depth=[-1, 10, 15][best_params['max_depth']],
        learning_rate=best_params['learning_rate'],
        n_estimators=[100, 200, 300][best_params['n_estimators']],
        random_state=42
    )
    best_model.fit(X_train, y_train)
    y_pred = best_model.predict(X_test)

    clear_output(wait=True)
    display(all_buttons, slider, output)

    print(f"LightGBM Hyperopt tamamlandı! Süre: {(end - start):.2f} saniye")
    print(f"En iyi parametreler: {best_params}")

    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)

    print(f"  RMSE: {rmse:.4f}")
    print(f"  R²: {r2:.4f}")
    print(f"  MAE: {mae:.4f}")

In [90]:
def train_xgb_default():
    global best_model, y_pred
    clear_output(wait=True)
    display(all_buttons, slider, output)
    print("XGBoost Varsayılan çalışıyor...")
    start = time.time()
    best_model = XGBRegressor(objective='reg:squarederror', random_state=42)
    best_model.fit(X_train, y_train)
    y_pred = best_model.predict(X_test)
    end = time.time()
    print_results("XGBoost Varsayılan", {}, start, end)

def train_xgb_random():
    global best_model, y_pred
    clear_output(wait=True)
    display(all_buttons, slider, output)
    print("XGBoost RandomizedSearch çalışıyor...")
    model = XGBRegressor(objective='reg:squarederror', random_state=42)
    param_dist = {
        'n_estimators': [100, 200, 300],
        'learning_rate': [0.01, 0.05, 0.1],
        'max_depth': [3, 5, 7],
        'subsample': [0.8, 1.0]
    }
    start = time.time()
    search = RandomizedSearchCV(model, param_distributions=param_dist, n_iter=5, cv=2, scoring='r2', n_jobs=-1)
    search.fit(X_train, y_train)
    end = time.time()
    best_model = search.best_estimator_
    y_pred = best_model.predict(X_test)
    print_results("XGBoost Randomized", search.best_params_, start, end)

In [91]:
def train_xgb_hyperopt_verbose():
    global best_model, y_pred
    print("XGBoost Hyperopt optimizasyonu başlıyor. Lütfen bekleyin...")

    space = {
        'n_estimators': hp.choice('n_estimators', [100, 200, 300]),
        'learning_rate': hp.uniform('learning_rate', 0.01, 0.1),
        'max_depth': hp.choice('max_depth', [3, 5, 7]),
        'subsample': hp.choice('subsample', [0.8, 1.0])
    }

    trials = Trials()
    global_trial_counter = {'count': 0}

    def objective(params):
        model = XGBRegressor(objective='reg:squarederror', random_state=42, **params)
        model.fit(X_train, y_train)
        y_pred_val = model.predict(X_test)
        loss = mean_squared_error(y_test, y_pred_val)
        global_trial_counter['count'] += 1

        clear_output(wait=True)
        print(f"Deneme {global_trial_counter['count']}:")
        print(f"  Parametreler: {params}")
        print(f"  Kayıp (MSE): {loss:.4f}")

        return {'loss': loss, 'status': STATUS_OK}

    start = time.time()
    best_params = fmin(fn=objective, space=space, algo=tpe.suggest, max_evals=20, trials=trials, rstate=np.random.default_rng(42))
    end = time.time()

    best_model = XGBRegressor(
        objective='reg:squarederror',
        n_estimators=[100, 200, 300][best_params['n_estimators']],
        learning_rate=best_params['learning_rate'],
        max_depth=[3, 5, 7][best_params['max_depth']],
        subsample=[0.8, 1.0][best_params['subsample']],
        random_state=42
    )
    best_model.fit(X_train, y_train)
    y_pred = best_model.predict(X_test)

    clear_output(wait=True)
    display(all_buttons, slider, output)

    print(f"XGBoost Hyperopt tamamlandı! Süre: {(end - start):.2f} saniye")
    print(f"En iyi parametreler: {best_params}")

    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)

    print(f"  RMSE: {rmse:.4f}")
    print(f"  R²: {r2:.4f}")
    print(f"  MAE: {mae:.4f}")

In [92]:
def train_cat_default():
    global best_model, y_pred
    clear_output(wait=True)
    display(all_buttons, slider, output)
    print("CatBoost Varsayılan çalışıyor...")
    start = time.time()
    best_model = CatBoostRegressor(verbose=0, random_state=42)
    best_model.fit(X_train, y_train)
    y_pred = best_model.predict(X_test)
    end = time.time()
    print_results("CatBoost Varsayılan", {}, start, end)

def train_cat_random():
    global best_model, y_pred
    clear_output(wait=True)
    display(all_buttons, slider, output)
    print("CatBoost RandomizedSearch çalışıyor...")
    model = CatBoostRegressor(verbose=0, random_state=42)
    param_dist = {
        'iterations': [100, 200, 300],
        'depth': [4, 6, 8],
        'learning_rate': [0.01, 0.05, 0.1]
    }
    start = time.time()
    search = RandomizedSearchCV(model, param_distributions=param_dist, n_iter=5, cv=2, scoring='r2', n_jobs=-1)
    search.fit(X_train, y_train)
    end = time.time()
    best_model = search.best_estimator_
    y_pred = best_model.predict(X_test)
    print_results("CatBoost Randomized", search.best_params_, start, end)

In [93]:
def train_cat_hyperopt_verbose():
    global best_model, y_pred
    print("CatBoost Hyperopt optimizasyonu başlıyor. Lütfen bekleyin...")

    space = {
        'iterations': hp.choice('iterations', [100, 200, 300]),
        'depth': hp.choice('depth', [4, 6, 8]),
        'learning_rate': hp.uniform('learning_rate', 0.01, 0.1)
    }

    trials = Trials()
    global_trial_counter = {'count': 0}

    def objective(params):
        model = CatBoostRegressor(verbose=0, random_state=42, **params)
        model.fit(X_train, y_train)
        y_pred_val = model.predict(X_test)
        loss = mean_squared_error(y_test, y_pred_val)
        global_trial_counter['count'] += 1

        clear_output(wait=True)
        print(f"Deneme {global_trial_counter['count']}:")
        print(f"  Parametreler: {params}")
        print(f"  Kayıp (MSE): {loss:.4f}")

        return {'loss': loss, 'status': STATUS_OK}

    start = time.time()
    best_params = fmin(fn=objective, space=space, algo=tpe.suggest, max_evals=20, trials=trials, rstate=np.random.default_rng(42))
    end = time.time()

    best_model = CatBoostRegressor(
        iterations=[100, 200, 300][best_params['iterations']],
        depth=[4, 6, 8][best_params['depth']],
        learning_rate=best_params['learning_rate'],
        verbose=0,
        random_state=42
    )
    best_model.fit(X_train, y_train)
    y_pred = best_model.predict(X_test)

    clear_output(wait=True)
    display(all_buttons, slider, output)

    print(f"CatBoost Hyperopt tamamlandı! Süre: {(end - start):.2f} saniye")
    print(f"En iyi parametreler: {best_params}")

    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)

    print(f"  RMSE: {rmse:.4f}")
    print(f"  R²: {r2:.4f}")
    print(f"  MAE: {mae:.4f}")

In [94]:
def print_results(model_name, best_params, start, end):
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)

    clear_output(wait=True)
    display(all_buttons, slider, output)

    print(f"\n {model_name} Sonuçları")
    print(f"  - En iyi parametreler: {best_params}")
    print(f"  - RMSE: {rmse:.4f}")
    print(f"  - R²: {r2:.4f}")
    print(f"  - MAE: {mae:.4f}")
    print(f"Süre: {(end - start):.2f} saniye")

    fig, axes = plt.subplots(2, 2, figsize=(14, 10))

    # Gerçek vs Tahmin Scatter
    axes[0, 0].scatter(y_test, y_pred, alpha=0.6, color='teal')
    axes[0, 0].plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], 'r--')
    axes[0, 0].set_title("Gerçek vs Tahmin")
    axes[0, 0].set_xlabel("Gerçek")
    axes[0, 0].set_ylabel("Tahmin")

    # Residual Plot
    residuals = y_test - y_pred
    sns.histplot(residuals, kde=True, ax=axes[0, 1], color='purple')
    axes[0, 1].set_title("Hata Dağılımı (Residuals)")

    # Zaman Serisi (İlk 100 Örnek)
    axes[1, 0].plot(y_test.values[:100], label="Gerçek", color='blue')
    axes[1, 0].plot(y_pred[:100], label="Tahmin", color='orange')
    axes[1, 0].set_title("Gerçek vs Tahmin (İlk 100 Örnek)")
    axes[1, 0].legend()

    # Feature Importance
    try:
        importances = best_model.feature_importances_
        sorted_idx = np.argsort(importances)[::-1]
        features_sorted = np.array(X.columns)[sorted_idx]
        axes[1, 1].barh(features_sorted, importances[sorted_idx], color='green')
        axes[1, 1].set_title("Özellik Önem Düzeyi")
    except:
        axes[1, 1].text(0.5, 0.5, "Feature importance yok", ha='center')

    plt.tight_layout()
    plt.show()

In [95]:
all_buttons = widgets.VBox([
    widgets.Label("LightGBM Modelleri"),
    widgets.Button(description="LightGBM Varsayılan", button_style='info', layout=widgets.Layout(width='50%')),
    widgets.Button(description="LightGBM RandomizedSearch", button_style='info', layout=widgets.Layout(width='50%')),
    widgets.Button(description="LightGBM Hyperopt", button_style='info', layout=widgets.Layout(width='50%')),

    widgets.Label("XGBoost Modelleri"),
    widgets.Button(description="XGBoost Varsayılan", button_style='warning', layout=widgets.Layout(width='50%')),
    widgets.Button(description="XGBoost RandomizedSearch", button_style='warning', layout=widgets.Layout(width='50%')),
    widgets.Button(description="XGBoost Hyperopt", button_style='warning', layout=widgets.Layout(width='50%')),

    widgets.Label("CatBoost Modelleri"),
    widgets.Button(description="CatBoost Varsayılan", button_style='success', layout=widgets.Layout(width='50%')),
    widgets.Button(description="CatBoost RandomizedSearch", button_style='success', layout=widgets.Layout(width='50%')),
    widgets.Button(description="CatBoost Hyperopt", button_style='success', layout=widgets.Layout(width='50%')),
])

button_map = {
    "LightGBM Varsayılan": train_lightgbm_default,
    "LightGBM RandomizedSearch": train_lightgbm_random,
    "LightGBM Hyperopt": train_lightgbm_hyperopt_verbose,
    "XGBoost Varsayılan": train_xgb_default,
    "XGBoost RandomizedSearch": train_xgb_random,
    "XGBoost Hyperopt": train_xgb_hyperopt_verbose,
    "CatBoost Varsayılan": train_cat_default,
    "CatBoost RandomizedSearch": train_cat_random,
    "CatBoost Hyperopt": train_cat_hyperopt_verbose,
}

for btn in all_buttons.children:
    if isinstance(btn, widgets.Button):
        desc = btn.description
        btn.on_click(lambda b, f=button_map[desc]: f())

display(all_buttons, slider, output)

VBox(children=(Label(value='LightGBM Modelleri'), Button(button_style='info', description='LightGBM Varsayılan…

IntSlider(value=150, continuous_update=False, description='Örnek Sayısı:', layout=Layout(width='70%'), max=250…

Output()

XGBoost Hyperopt tamamlandı! Süre: 5.22 saniye
En iyi parametreler: {'learning_rate': np.float64(0.017357224775694024), 'max_depth': np.int64(0), 'n_estimators': np.int64(2), 'subsample': np.int64(1)}
  RMSE: 5.3944
  R²: 0.5656
  MAE: 4.3351
