In [None]:
# ========================================
# 0) GEREKLİ KÜTÜPHANELERIN KURULUMU
# ========================================

!pip install pandas numpy matplotlib seaborn scikit-learn xgboost lightgbm catboost shap optuna --quiet
!pip install timm --quiet  # Gerekirse advanced image models
# ... (ihtiyaç duyduğun diğer kütüphaneler)
# HuggingFace kütüphaneleri (opsiyonel)
# !pip install transformers peft --quiet

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import shap
import optuna

from sklearn.model_selection import GroupKFold, train_test_split
from sklearn.metrics import log_loss
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder

# Ağaç tabanlı modeller
import xgboost as xgb
import lightgbm as lgb
import catboost as cb

# PyTorch, eğer custom CNN veya benzeri yapacaksan
import torch

# Kolab GPU kontrolü
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Cihaz:", device)


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.7/98.7 MB[0m [31m19.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m364.4/364.4 kB[0m [31m23.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.5/233.5 kB[0m [31m16.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.6/78.6 kB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m
[?25h

Dask dataframe query planning is disabled because dask-expr is not installed.

You can install it with `pip install dask[dataframe]` or `conda install dask`.
This will raise in a future version.



Cihaz: cuda


In [None]:
# ========================================
# 1) VERI YOLLARI ve OKUMA
# ========================================
from google.colab import drive
drive.mount('/content/drive')

# Dosya yollarını kendine göre ayarla
train_file_path = "/content/drive/My Drive/Yarisma/Train.csv"
test_file_path = "/content/drive/My Drive/Yarisma/Test.csv"
composite_file_path = "/content/drive/My Drive/Yarisma/composite_images.npz"

train_data = pd.read_csv(train_file_path)
test_data = pd.read_csv(test_file_path)
composite_data = np.load(composite_file_path)

print("Train shape:", train_data.shape)
print("Test shape:", test_data.shape)
display(train_data.head())


Mounted at /content/drive
Train shape: (492020, 3)
Test shape: (163520, 2)


Unnamed: 0,event_id,precipitation,label
0,id_spictby0jfsb_X_0,0.0,0
1,id_spictby0jfsb_X_1,0.095438,0
2,id_spictby0jfsb_X_2,1.94956,0
3,id_spictby0jfsb_X_3,3.23216,0
4,id_spictby0jfsb_X_4,0.0,0


In [None]:
# ========================================
# 2) HIZLI EDA
# ========================================
print("Label dağılımı:\n", train_data['label'].value_counts())
print("\nEksik değer var mı (Train)?\n", train_data.isnull().sum())
print("\nEksik değer var mı (Test)?\n", test_data.isnull().sum())

# Şiddetli imbalanced: label=1 çok az


Label dağılımı:
 label
0    491702
1       318
Name: count, dtype: int64

Eksik değer var mı (Train)?
 event_id         0
precipitation    0
label            0
dtype: int64

Eksik değer var mı (Test)?
 event_id         0
precipitation    0
dtype: int64


In [None]:
# ========================================
# 3) GÖRÜNTÜ ÖZELLİKLERINI EKLEME
#    (NDVI, NDWI, Slope mean/std)
# ========================================
import torch

composite_images = {}
for file_name in composite_data.files:
    # Devasa veri ise GPU'da sorun olabilir; yine de deniyoruz.
    composite_images[file_name] = torch.tensor(composite_data[file_name], dtype=torch.float32, device=device)

def process_image(event_id):
    """
    event_id: Örn. id_abcdef123_X_0 vs.
    """
    base_id = "_".join(event_id.split("_")[:-2])

    if base_id in composite_images:
        image = composite_images[base_id]
        # image: (H, W, 6) [B2, B3, B4, B8, B11, slope]
        # Normalize
        B2 = image[:,:,0] / 65535.0
        B3 = image[:,:,1] / 65535.0
        B4 = image[:,:,2] / 65535.0
        B8 = image[:,:,3] / 65535.0
        B11= image[:,:,4] / 65535.0
        slope = image[:,:,5] # slope genelde 0..xxx, normalizasyon yok

        NDVI = (B8 - B4) / (B8 + B4 + 1e-10)
        NDWI = (B3 - B8) / (B3 + B8 + 1e-10)

        return {
            'NDVI_mean': NDVI.mean().item(),
            'NDVI_std':  NDVI.std().item(),
            'NDWI_mean': NDWI.mean().item(),
            'NDWI_std':  NDWI.std().item(),
            'Slope_mean': slope.mean().item(),
            'Slope_std':  slope.std().item()
        }
    else:
        # Görüntü yoksa 0 doldur
        return {
            'NDVI_mean': 0.0,
            'NDVI_std':  0.0,
            'NDWI_mean': 0.0,
            'NDWI_std':  0.0,
            'Slope_mean':0.0,
            'Slope_std': 0.0
        }

# Train
unique_train_events = train_data['event_id'].unique()
train_image_feats = []
for eid in unique_train_events:
    feats = process_image(eid)
    feats['event_id'] = eid
    train_image_feats.append(feats)
train_image_feats_df = pd.DataFrame(train_image_feats)

train_data = pd.merge(train_data, train_image_feats_df, on='event_id', how='left')

# Test
unique_test_events = test_data['event_id'].unique()
test_image_feats = []
for eid in unique_test_events:
    feats = process_image(eid)
    feats['event_id'] = eid
    test_image_feats.append(feats)
test_image_feats_df = pd.DataFrame(test_image_feats)

test_data = pd.merge(test_data, test_image_feats_df, on='event_id', how='left')
print("Train shape:", train_data.shape, "Test shape:", test_data.shape)


Train shape: (492020, 9) Test shape: (163520, 8)


In [None]:
# ========================================
# 4) GELISMIS ZAMAN SERISI FEATURE ENGINEERING
#    (rolling mean, cumsum, trend, consecutive rain, vb.)
# ========================================

window_sizes = [3, 7, 14, 30]

# -- 1) TRAIN SET FEATURE ENGINEERING --

# Gün bilgisi
train_data['day'] = train_data['event_id'].apply(lambda x: int(x.split('_')[-1]))
train_data = train_data.sort_values(['event_id', 'day'])

# Rolling mean, rolling sum, trend
for w in window_sizes:
    train_data[f'ma_{w}'] = train_data.groupby('event_id')['precipitation'] \
                                      .transform(lambda x: x.rolling(w).mean())
    train_data[f'cum_{w}'] = train_data.groupby('event_id')['precipitation'] \
                                       .transform(lambda x: x.rolling(w).sum())
    train_data[f'trend_{w}'] = train_data.groupby('event_id')[f'ma_{w}'] \
                                         .transform(lambda x: x.diff())

# NaN değerleri 0 ile doldur (ilk rolling hesaplamalarda oluşan NaN'ler vs.)
train_data.fillna(0, inplace=True)

# Heavy rain (basit eşik)
train_data['heavy_rain'] = (train_data['precipitation'] > 20).astype(int)

# consecutive_rain fonksiyonunu tanımla
def calc_consecutive_rain(series):
    count = 0
    result = []
    for v in series:
        if v == 1:
            count += 1
        else:
            count = 0
        result.append(count)
    return pd.Series(result, index=series.index)

# Ardışık yağış sayısı
train_data['consecutive_rain'] = train_data.groupby('event_id')['heavy_rain'] \
                                           .transform(calc_consecutive_rain)

# 7 günlük rolling std
train_data['std_precip'] = train_data.groupby('event_id')['precipitation'] \
                                     .transform(lambda x: x.rolling(7).std())
train_data['std_precip'].fillna(0, inplace=True)

# Log transform
train_data['log_precip'] = np.log1p(train_data['precipitation'])


# -- 2) TEST SET FEATURE ENGINEERING --

# Gün bilgisi
test_data['day'] = test_data['event_id'].apply(lambda x: int(x.split('_')[-1]))
test_data = test_data.sort_values(['event_id', 'day'])

# Rolling mean, rolling sum, trend
for w in window_sizes:
    test_data[f'ma_{w}'] = test_data.groupby('event_id')['precipitation'] \
                                    .transform(lambda x: x.rolling(w).mean())
    test_data[f'cum_{w}'] = test_data.groupby('event_id')['precipitation'] \
                                     .transform(lambda x: x.rolling(w).sum())
    test_data[f'trend_{w}'] = test_data.groupby('event_id')[f'ma_{w}'] \
                                       .transform(lambda x: x.diff())

# NaN değerleri 0 ile doldur
test_data.fillna(0, inplace=True)

# Heavy rain (basit eşik)
test_data['heavy_rain'] = (test_data['precipitation'] > 20).astype(int)

# Ardışık yağış sayısı
test_data['consecutive_rain'] = test_data.groupby('event_id')['heavy_rain'] \
                                         .transform(calc_consecutive_rain)

# 7 günlük rolling std
test_data['std_precip'] = test_data.groupby('event_id')['precipitation'] \
                                   .transform(lambda x: x.rolling(7).std())
test_data['std_precip'].fillna(0, inplace=True)

# Log transform
test_data['log_precip'] = np.log1p(test_data['precipitation'])

print("Feature engineering tamamlandı!")


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  train_data['std_precip'].fillna(0, inplace=True)


Feature engineering tamamlandı!


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  test_data['std_precip'].fillna(0, inplace=True)


In [None]:
# ========================================
# 5) FEATURE LİSTESI
# ========================================
features = [
    'precipitation', 'log_precip',
    # rolling means
    *[f'ma_{w}' for w in window_sizes],
    # cumsums
    *[f'cum_{w}' for w in window_sizes],
    # trends
    *[f'trend_{w}' for w in window_sizes],
    # NDVI / NDWI / slope
    'NDVI_mean','NDVI_std','NDWI_mean','NDWI_std','Slope_mean','Slope_std',
    # extra
    'heavy_rain','consecutive_rain','std_precip'
]

X_train = train_data[features].copy()
y_train = train_data['label'].copy()

X_test = test_data[features].copy()

print("Train shape:", X_train.shape)
print("Test shape:", X_test.shape)


Train shape: (492020, 23)
Test shape: (163520, 23)


In [None]:
# ========================================
# 6) BASİT OPTUNA ARAMALI XGBOOST MODELİ
#    (GroupKFold - 5 katlı)
# ========================================
import xgboost as xgb
from sklearn.model_selection import GroupKFold

groups = train_data['event_id'].values

def objective(trial):
    # Hyperparam aralığı:
    params = {
        'objective': 'binary:logistic',
        'eval_metric': 'logloss',
        'learning_rate': trial.suggest_loguniform('learning_rate', 1e-3, 0.1),
        'max_depth': trial.suggest_int('max_depth', 3, 12),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 15),
        'seed': 42,
    }

    cv = GroupKFold(n_splits=3)
    scores = []
    for train_idx, val_idx in cv.split(X_train, y_train, groups):
        X_tr, X_val = X_train.iloc[train_idx], X_train.iloc[val_idx]
        y_tr, y_val = y_train.iloc[train_idx], y_train.iloc[val_idx]

        # dengesiz
        pos_count = y_tr.sum()
        neg_count = len(y_tr) - pos_count
        if pos_count == 0:
            spw = 1
        else:
            spw = neg_count / pos_count
        params['scale_pos_weight'] = spw

        dtrain = xgb.DMatrix(X_tr, label=y_tr)
        dval   = xgb.DMatrix(X_val, label=y_val)

        model = xgb.train(
            params,
            dtrain,
            num_boost_round=3000,
            evals=[(dtrain, 'train'), (dval, 'valid')],
            early_stopping_rounds=50,
            verbose_eval=False
        )
        pred_val = model.predict(dval)
        score = log_loss(y_val, pred_val)
        scores.append(score)

    return np.mean(scores)

study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=30)
print("Best params:", study.best_params)
print("Best value (logloss):", study.best_value)


[I 2024-12-28 06:16:24,675] A new study created in memory with name: no-name-ba890aae-8c75-4aba-83ac-fea9ddbb29ba
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-3, 0.1),
[I 2024-12-28 06:21:10,282] Trial 0 finished with value: 0.1847073002360258 and parameters: {'learning_rate': 0.013104090558859523, 'max_depth': 5, 'subsample': 0.9956096147164197, 'colsample_bytree': 0.6337731043136301, 'min_child_weight': 8}. Best is trial 0 with value: 0.1847073002360258.
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-3, 0.1),
[I 2024-12-28 06:21:44,333] Trial 1 finished with value: 0.12607713051625646 and parameters: {'learning_rate': 0.09907825867464916, 'max_depth': 9, 'subsample': 0.7010446886156609, 'colsample_bytree': 0.6058547715055685, 'min_child_weight': 10}. Best is trial 1 with value: 0.12607713051625646.
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-3, 0.1),
[I 2024-12-28 06:26:44,432] Trial 2 finished with value: 0.24983153432876246 an

Best params: {'learning_rate': 0.09877437270380848, 'max_depth': 12, 'subsample': 0.5641150646411577, 'colsample_bytree': 0.7282268952593725, 'min_child_weight': 14}
Best value (logloss): 0.08894538806005346


In [None]:
# ========================================
# 7) EN IYI PARAMLARLA 5-KATLI XGBOOST EĞITIMI
# ========================================
best_params = study.best_params
print("En iyi parametreler:", best_params)

# 5-Fold GroupKFold
gkf = GroupKFold(n_splits=5)
fold_models_xgb = []
fold_scores_xgb = []

for fold, (tr_idx, val_idx) in enumerate(gkf.split(X_train, y_train, groups)):
    print(f"--- XGB FOLD {fold} ---")
    X_tr, X_val = X_train.iloc[tr_idx], X_train.iloc[val_idx]
    y_tr, y_val = y_train.iloc[tr_idx], y_train.iloc[val_idx]

    pos_count = y_tr.sum()
    neg_count = len(y_tr) - pos_count
    spw = neg_count / pos_count if pos_count > 0 else 1

    current_params = best_params.copy()
    current_params.update({
        'objective':'binary:logistic',
        'eval_metric':'logloss',
        'scale_pos_weight':spw,
        'seed':42
    })

    dtr = xgb.DMatrix(X_tr, label=y_tr)
    dval= xgb.DMatrix(X_val, label=y_val)

    model_xgb = xgb.train(
        current_params,
        dtr,
        num_boost_round=5000,
        evals=[(dtr,'train'), (dval,'valid')],
        early_stopping_rounds=50,
        verbose_eval=200
    )
    val_pred = model_xgb.predict(dval)
    score = log_loss(y_val, val_pred)
    print(f"Fold {fold} logloss = {score:.4f}")
    fold_scores_xgb.append(score)
    fold_models_xgb.append(model_xgb)

print("XGBoost CV Scores:", fold_scores_xgb)
print("XGBoost Mean:", np.mean(fold_scores_xgb))


En iyi parametreler: {'learning_rate': 0.09877437270380848, 'max_depth': 12, 'subsample': 0.5641150646411577, 'colsample_bytree': 0.7282268952593725, 'min_child_weight': 14}
--- XGB FOLD 0 ---
[0]	train-logloss:0.63202	valid-logloss:0.63204
[199]	train-logloss:0.09666	valid-logloss:0.10186
Fold 0 logloss = 0.1048
--- XGB FOLD 1 ---
[0]	train-logloss:0.62814	valid-logloss:0.62828
[200]	train-logloss:0.09816	valid-logloss:0.10390
[274]	train-logloss:0.10016	valid-logloss:0.10677
Fold 1 logloss = 0.1067
--- XGB FOLD 2 ---
[0]	train-logloss:0.62718	valid-logloss:0.62726
[200]	train-logloss:0.10298	valid-logloss:0.11029
[220]	train-logloss:0.09794	valid-logloss:0.10552
Fold 2 logloss = 0.1055
--- XGB FOLD 3 ---
[0]	train-logloss:0.63937	valid-logloss:0.63936
[200]	train-logloss:0.09662	valid-logloss:0.10082
[283]	train-logloss:0.09124	valid-logloss:0.09616
Fold 3 logloss = 0.0961
--- XGB FOLD 4 ---
[0]	train-logloss:0.63772	valid-logloss:0.63793
[179]	train-logloss:0.10681	valid-logloss:0.1

In [None]:
# ========================================
# 8) LIGHTGBM ve CATBOOST (OPSİYONEL)
#    Hızlı birer model denemesi, hyperparam tuning'ini
#    istersen yine Optuna ile benzer şekilde yapabilirsin.
# ========================================

import lightgbm as lgb
import catboost as cb
import numpy as np
from sklearn.model_selection import GroupKFold
from sklearn.metrics import log_loss

def train_lightgbm_cv(X, y, groups):
    gkf_ = GroupKFold(n_splits=5)
    fold_models = []
    scores = []

    for fold, (tr_idx, val_idx) in enumerate(gkf_.split(X, y, groups)):
        X_tr, X_val = X.iloc[tr_idx], X.iloc[val_idx]
        y_tr, y_val = y.iloc[tr_idx], y.iloc[val_idx]

        # Sınıf dengesizliği varsa scale_pos_weight hesaplayalım
        pos_count = y_tr.sum()
        neg_count = len(y_tr) - pos_count
        spw = neg_count / pos_count if pos_count > 0 else 1

        lgbm_params = {
            'objective': 'binary',
            'metric': 'binary_logloss',
            'boosting_type': 'gbdt',
            'verbosity': -1,
            'seed': 42,
            'learning_rate': 0.03,
            'max_depth': 6,
            'num_leaves': 64,
            'subsample': 0.8,
            'colsample_bytree': 0.8,
            'scale_pos_weight': spw
        }

        dtrain = lgb.Dataset(X_tr, label=y_tr)
        dval   = lgb.Dataset(X_val, label=y_val, reference=dtrain)

        # Eski sürüm LightGBM'de early_stopping_rounds ve verbose_eval desteklenmeyebilir
        model_lgb = lgb.train(
            lgbm_params,
            dtrain,
            num_boost_round=3000,
            valid_sets=[dtrain, dval]   # <- Eğer valid_names parametresi de hata verirse bunu da kaldırın
            # valid_names=['train', 'valid'],  # <- Gerekirse yoruma alın
            # early_stopping_rounds=50,        # <- Eski sürümlerde sorun çıkarabilir
            # verbose_eval=200                 # <- Eski sürümlerde sorun çıkarabilir
        )

        val_pred = model_lgb.predict(X_val)
        sc = log_loss(y_val, val_pred)
        print(f"Fold {fold} logloss={sc:.4f}")
        scores.append(sc)
        fold_models.append(model_lgb)

    return fold_models, scores

# LightGBM ile eğitimi başlat
lgb_models, lgb_scores = train_lightgbm_cv(X_train, y_train, groups)
print("LightGBM CV Scores:", lgb_scores, "Mean:", np.mean(lgb_scores))

def train_catboost_cv(X, y, groups):
    gkf_ = GroupKFold(n_splits=5)
    fold_models = []
    scores = []

    for fold, (tr_idx, val_idx) in enumerate(gkf_.split(X, y, groups)):
        X_tr, X_val = X.iloc[tr_idx], X.iloc[val_idx]
        y_tr, y_val = y.iloc[tr_idx], y.iloc[val_idx]

        # Sınıf dengesizliği için scale_pos_weight
        pos_count = y_tr.sum()
        neg_count = len(y_tr) - pos_count
        spw = neg_count / pos_count if pos_count > 0 else 1

        # CatBoost erken durdurmayı (early_stopping_rounds) sorunsuz destekler
        model_cb = cb.CatBoostClassifier(
            iterations=3000,
            learning_rate=0.03,
            depth=6,
            random_seed=42,
            logging_level='Silent',
            eval_metric='Logloss',
            scale_pos_weight=spw,
            use_best_model=True,
            early_stopping_rounds=50
        )
        model_cb.fit(X_tr, y_tr, eval_set=(X_val, y_val))

        val_pred = model_cb.predict_proba(X_val)[:, 1]
        sc = log_loss(y_val, val_pred)
        print(f"Fold {fold} logloss={sc:.4f}")
        scores.append(sc)
        fold_models.append(model_cb)

    return fold_models, scores

# CatBoost ile eğitimi başlat
cb_models, cb_scores = train_catboost_cv(X_train, y_train, groups)
print("CatBoost CV Scores:", cb_scores, "Mean:", np.mean(cb_scores))


Fold 0 logloss=10.9821
Fold 1 logloss=11.1980
Fold 2 logloss=12.3746
Fold 3 logloss=14.0669
Fold 4 logloss=10.6851
LightGBM CV Scores: [10.982113785470387, 11.198046246342875, 12.374611566508982, 14.066888483532875, 10.685120688680154] Mean: 11.861356154107057
Fold 0 logloss=0.4254
Fold 1 logloss=0.4268
Fold 2 logloss=0.4711
Fold 3 logloss=0.4305
Fold 4 logloss=0.4315
CatBoost CV Scores: [0.4253680833085262, 0.42684143245557477, 0.4711166002666626, 0.43048848000390116, 0.4314994528655009] Mean: 0.43706280978003303


In [None]:
# ========================================
# 9) ENSEMBLE
#    - Basitçe 3 modelin (XGB, LGB, CB) out-of-fold tahminlerinin
#      ortalamasıyla final bir meta-model elde edebiliriz.
#    - Şimdilik test aşamasında, her modelin test tahminlerini
#      ortalayıp submission oluşturacağız.
# ========================================

import xgboost as xgb
import lightgbm as lgb
import catboost as cb
import numpy as np
from sklearn.model_selection import GroupKFold
from sklearn.metrics import log_loss

# A) XGB out-of-fold tahminlerini (CV'de) alabilirdik, ama basit yaklaşım:
#    Tüm train ile model eğit, test'te tahmin -> sonra ortalama.

# XGB final:
dtrain_full = xgb.DMatrix(X_train, label=y_train)
pos_count = y_train.sum()
neg_count = len(y_train) - pos_count
spw = neg_count / pos_count if pos_count > 0 else 1

final_xgb_params = best_params.copy()
final_xgb_params.update({
    'objective': 'binary:logistic',
    'eval_metric': 'logloss',
    'scale_pos_weight': spw,
    'seed': 42
})

xgb_final = xgb.train(
    final_xgb_params,
    dtrain_full,
    num_boost_round=int(1.2 * max([m.best_iteration for m in fold_models_xgb]))  # kabaca
)

dtest = xgb.DMatrix(X_test)
pred_test_xgb = xgb_final.predict(dtest)

# LGB final:
lgbm_params_final = {
    'objective': 'binary',
    'metric': 'binary_logloss',
    'boosting_type': 'gbdt',
    'verbosity': -1,
    'seed': 42,
    'learning_rate': 0.03,
    'max_depth': 6,
    'num_leaves': 64,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'scale_pos_weight': spw
}

dtrain_lgb = lgb.Dataset(X_train, label=y_train)

# Eski sürüm LightGBM’de verbose_eval veya valid_names desteklenmiyor olabilir
lgb_final = lgb.train(
    lgbm_params_final,
    dtrain_lgb,
    num_boost_round=3000,
    # valid_sets=[dtrain_lgb],  # <-- isterseniz valid_sets'i de kaldırabilirsiniz
    # valid_names=['train'],    # <-- eski sürümlerde bu da hata verebilir
    # verbose_eval=False        # <-- eski sürümlerde hata veriyorsa yoruma alın
)

pred_test_lgb = lgb_final.predict(X_test)

# CatBoost final:
cb_final = cb.CatBoostClassifier(
    iterations=3000,
    learning_rate=0.03,
    depth=6,
    random_seed=42,
    logging_level='Silent',
    eval_metric='Logloss',
    scale_pos_weight=spw,
    use_best_model=False
)
cb_final.fit(X_train, y_train)
pred_test_cb = cb_final.predict_proba(X_test)[:, 1]

# Ensemble - basit ortalama
pred_ens = (pred_test_xgb + pred_test_lgb + pred_test_cb) / 3.0

print("Ortalama ensemble test predict, örnek:", pred_ens[:10])


Ortalama ensemble test predict, örnek: [3.73925036e-05 3.73925036e-05 3.73925036e-05 3.73925036e-05
 3.73925036e-05 3.73925036e-05 3.73925036e-05 3.73925036e-05
 3.73925036e-05 7.58360522e-05]


In [None]:
# ========================================
# 10) SUBMISSION
# ========================================
submission = pd.DataFrame({
    'ID': test_data['event_id'],
    'Target': pred_ens  # 0..1 arası tahmin
})
save_path = "/content/drive/My Drive/Yarisma/submission_ensemble.csv"
submission.to_csv(save_path, index=False)
print("Submission file saved:", save_path)
submission.head(10)


Submission file saved: /content/drive/My Drive/Yarisma/submission_ensemble.csv


Unnamed: 0,ID,Target
86140,id_066zz28m11mr_X_0,3.7e-05
86141,id_066zz28m11mr_X_1,3.7e-05
86150,id_066zz28m11mr_X_10,3.7e-05
86240,id_066zz28m11mr_X_100,3.7e-05
86241,id_066zz28m11mr_X_101,3.7e-05
86242,id_066zz28m11mr_X_102,3.7e-05
86243,id_066zz28m11mr_X_103,3.7e-05
86244,id_066zz28m11mr_X_104,3.7e-05
86245,id_066zz28m11mr_X_105,3.7e-05
86246,id_066zz28m11mr_X_106,7.6e-05
