In [5]:
import pandas as pd
from sklearn.metrics import roc_auc_score
import numpy as np


def recall_at_k(y_true, y_prob, k=0.1):
    """
    Tahmin edilen olasılıkların en üst k%'sını pozitif etiketleyerek recall değerini hesaplar.

    Parametreler:
        y_true (list): Gerçek ikili etiketler.
        y_prob (list): Tahmin edilen olasılıklar.
        k (float): Pozitif etiketlenecek olasılıkların yüzdelik dilimi (varsayılan 0.1).

    Döndürür:
        float: En iyi k% tahminlerindeki recall oranı.
    """
    y_true = np.asarray(y_true)
    y_prob = np.asarray(y_prob)
    n = len(y_true)
    m = max(1, int(np.round(k * n)))
    order = np.argsort(-y_prob, kind="mergesort")
    top = order[:m]

    tp_at_k = y_true[top].sum()
    P = y_true.sum()

    return float(tp_at_k / P) if P > 0 else 0.0


def lift_at_k(y_true, y_prob, k=0.1):
    """
    Tahmin edilen olasılıkların en üst k%'sını pozitif etiketleyerek lift (precision/prevalence) değerini hesaplar.

    Parametreler:
        y_true (list): Gerçek ikili etiketler.
        y_prob (list): Tahmin edilen olasılıklar.
        k (float): Pozitif etiketlenecek olasılıkların yüzdelik dilimi (varsayılan 0.1).

    Döndürür:
        float: En iyi k% tahminlerindeki lift değeri.
    """
    y_true = np.asarray(y_true)
    y_prob = np.asarray(y_prob)
    n = len(y_true)
    m = max(1, int(np.round(k * n)))
    order = np.argsort(-y_prob, kind="mergesort")
    top = order[:m]

    tp_at_k = y_true[top].sum()
    precision_at_k = tp_at_k / m
    prevalence = y_true.mean()

    return float(precision_at_k / prevalence) if prevalence > 0 else 0.0


def convert_auc_to_gini(auc):
    """
    ROC AUC skorunu Gini katsayısına dönüştürür.

    Gini katsayısı, ROC AUC skorunun doğrusal bir dönüşümüdür.

    Parametreler:
        auc (float): ROC AUC skoru (0 ile 1 arasında).

    Döndürür:
        float: Gini katsayısı (-1 ile 1 arasında).
    """
    return 2 * auc - 1


def ing_hubs_datathon_metric(y_true, y_prob):
    """
    Gini, recall@10% ve lift@10% metriklerini birleştiren özel bir metrik hesaplar.

    Metrik, her bir skoru bir baseline modelin metrik değerlerine göre oranlar ve aşağıdaki ağırlıkları uygular:
    - Gini: %40
    - Recall@10%: %30
    - Lift@10%: %30

    Parametreler:
        y_true (list): Gerçek ikili etiketler.
        y_prob (list): Tahmin edilen olasılıklar.

    Döndürür:
        float: Ağırlıklandırılmış bileşik skor.
    """
    # final metrik için ağırlıklar
    score_weights = {
        "gini": 0.4,
        "recall_at_10perc": 0.3,
        "lift_at_10perc": 0.3,
    }

    # baseline modelin her bir metrik için değerleri
    baseline_scores = {
        "roc_auc": 0.6925726757936908,
        "recall_at_10perc": 0.18469015795868773,
        "lift_at_10perc": 1.847159286784029,
    }

    # y_prob tahminleri için metriklerin hesaplanması
    roc_auc = roc_auc_score(y_true, y_prob)
    recall_at_10perc = recall_at_k(y_true, y_prob, k=0.1)
    lift_at_10perc = lift_at_k(y_true, y_prob, k=0.1)

    new_scores = {
        "roc_auc": roc_auc,
        "recall_at_10perc": recall_at_10perc,
        "lift_at_10perc": lift_at_10perc,
    }

    # roc auc değerlerinin gini değerine dönüştürülmesi
    baseline_scores["gini"] = convert_auc_to_gini(baseline_scores["roc_auc"])
    new_scores["gini"] = convert_auc_to_gini(new_scores["roc_auc"])

    # baseline modeline oranlama
    final_gini_score = new_scores["gini"] / baseline_scores["gini"]
    final_recall_score = new_scores["recall_at_10perc"] / baseline_scores["recall_at_10perc"]
    final_lift_score = new_scores["lift_at_10perc"] / baseline_scores["lift_at_10perc"]

    # ağırlıklandırılmış metriğin hesaplanması
    final_score = (
        final_gini_score * score_weights["gini"] +
        final_recall_score * score_weights["recall_at_10perc"] + 
        final_lift_score * score_weights["lift_at_10perc"]
    )
    return final_score


In [2]:
import numpy as np
import pandas as pd

In [3]:
train_ref_data = pd.read_csv('/kaggle/input/ing-hubs-turkiye-datathon/referance_data.csv')
test_ref_data = pd.read_csv('/kaggle/input/ing-hubs-turkiye-datathon/referance_data_test.csv')
customers = pd.read_csv('/kaggle/input/ing-hubs-turkiye-datathon/customers.csv')
customer_history = pd.read_csv('/kaggle/input/ing-hubs-turkiye-datathon/customer_history.csv')

In [4]:
train_ref_data

Unnamed: 0,cust_id,ref_date,churn
0,0,2017-09-01,0
1,3,2018-10-01,0
2,5,2018-03-01,1
3,6,2018-04-01,1
4,7,2018-05-01,0
...,...,...,...
133282,199995,2018-09-01,0
133283,199996,2018-06-01,0
133284,199997,2018-12-01,0
133285,199998,2018-02-01,1


In [5]:
train_ref_data['split'] = "Train"
test_ref_data['split'] = "Test"

ref_data = pd.concat([train_ref_data, test_ref_data], ignore_index=True)

ref_data['ref_date'] = pd.to_datetime(ref_data['ref_date'])
ref_data['year'] = ref_data['ref_date'].dt.year
ref_data['month'] = ref_data['ref_date'].dt.month
ref_data['day'] = ref_data['ref_date'].dt.day
ref_data['weekday'] = ref_data['ref_date'].dt.weekday
ref_data['dayofyear'] = ref_data['ref_date'].dt.dayofyear

ref_data['month_sin'] = np.sin(2 * np.pi * ref_data['month'] / 12)
ref_data['month_cos'] = np.cos(2 * np.pi * ref_data['month'] / 12)

ref_data['day_sin'] = np.sin(2 * np.pi * ref_data['day'] / 31)
ref_data['day_cos'] = np.cos(2 * np.pi * ref_data['day'] / 31)

ref_data['weekday_sin'] = np.sin(2 * np.pi * ref_data['weekday'] / 7)
ref_data['weekday_cos'] = np.cos(2 * np.pi * ref_data['weekday'] / 7)

ref_data['dayofyear_sin'] = np.sin(2 * np.pi * ref_data['dayofyear'] / 365)
ref_data['dayofyear_cos'] = np.cos(2 * np.pi * ref_data['dayofyear'] / 365)

In [6]:
customers['work_sector'] = customers['work_sector'].fillna('Not Working')
customers['tenure_per_age'] = (customers['tenure']/365) / customers['age']
ref_data_v2 = ref_data.merge(customers,on=['cust_id'],how="left")

In [7]:
ref_data_v2[ref_data_v2.select_dtypes(include=['object',"category"]).columns] = ref_data_v2.select_dtypes(include=['object',"category"]).astype("category") 

In [8]:
numeric_cols = ['mobile_eft_all_cnt', 'active_product_category_nbr', 
                'mobile_eft_all_amt', 'cc_transaction_all_amt', 'cc_transaction_all_cnt']
customer_history = customer_history.sort_values(['cust_id', 'date'])
customer_first = customer_history.groupby('cust_id')[numeric_cols].first().add_suffix('_first').reset_index()
customer_last  = customer_history.groupby('cust_id')[numeric_cols].last().add_suffix('_last').reset_index()
customer_mean  = customer_history.groupby('cust_id')[numeric_cols].mean().add_suffix('_mean').reset_index()
customer_agg = customer_first.merge(customer_last, on='cust_id').merge(customer_mean, on='cust_id')
ref_data_v3 = ref_data_v2.merge(customer_agg, on='cust_id', how='left')

In [9]:
window_sizes = [3, 5, 7, 10, 15, 30, 60]
numeric_cols = ['mobile_eft_all_cnt', 'active_product_category_nbr', 
                'mobile_eft_all_amt', 'cc_transaction_all_amt', 'cc_transaction_all_cnt']

# Başlangıçta boş DataFrame oluştur
customer_last_feats = pd.DataFrame({'cust_id': customer_history['cust_id'].unique()})

for w in window_sizes:
    print(f"Processing window_size: {w}")
    
    # --- SUM feature ---
    temp_sum = (
        customer_history
        .sort_values(['cust_id', 'date'])
        .groupby('cust_id')[numeric_cols]
        .apply(lambda x: x.tail(w).sum())
        .reset_index()
    )
    temp_sum = temp_sum.rename(columns={col: f"{col}_last_{w}_event_sum" for col in numeric_cols})
    
    # --- MEAN feature (sadece 2 kolon için) ---
    temp_mean = (
        customer_history
        .sort_values(['cust_id', 'date'])
        .groupby('cust_id')[['mobile_eft_all_amt', 'cc_transaction_all_amt']]
        .apply(lambda x: x.tail(w).mean())
        .reset_index()
    )
    temp_mean = temp_mean.rename(columns={
        'mobile_eft_all_amt': f"mobile_eft_all_amt_last_{w}_event_mean",
        'cc_transaction_all_amt': f"cc_transaction_all_amt_last_{w}_event_mean"
    })
    
    # Merge işlemleri
    customer_last_feats = (
        customer_last_feats
        .merge(temp_sum, on='cust_id', how='left')
        .merge(temp_mean, on='cust_id', how='left')
    )

# Ref data ile merge
ref_data_v4 = ref_data_v3.merge(customer_last_feats, on='cust_id', how='left')

# Yeni feature'ları yazdır
new_feats = [c for c in ref_data_v4.columns if c.endswith('_event_sum') or c.endswith('_event_mean')]
print("Yeni feature'lar eklendi:", new_feats)


Processing window_size: 3
Processing window_size: 5
Processing window_size: 7
Processing window_size: 10
Processing window_size: 15
Processing window_size: 30
Processing window_size: 60
Yeni feature'lar eklendi: ['mobile_eft_all_cnt_last_3_event_sum', 'active_product_category_nbr_last_3_event_sum', 'mobile_eft_all_amt_last_3_event_sum', 'cc_transaction_all_amt_last_3_event_sum', 'cc_transaction_all_cnt_last_3_event_sum', 'mobile_eft_all_amt_last_3_event_mean', 'cc_transaction_all_amt_last_3_event_mean', 'mobile_eft_all_cnt_last_5_event_sum', 'active_product_category_nbr_last_5_event_sum', 'mobile_eft_all_amt_last_5_event_sum', 'cc_transaction_all_amt_last_5_event_sum', 'cc_transaction_all_cnt_last_5_event_sum', 'mobile_eft_all_amt_last_5_event_mean', 'cc_transaction_all_amt_last_5_event_mean', 'mobile_eft_all_cnt_last_7_event_sum', 'active_product_category_nbr_last_7_event_sum', 'mobile_eft_all_amt_last_7_event_sum', 'cc_transaction_all_amt_last_7_event_sum', 'cc_transaction_all_cnt_la

In [10]:
month_windows = [1, 3, 6, 12, 24]
numeric_cols = ['mobile_eft_all_cnt', 'active_product_category_nbr', 
                'mobile_eft_all_amt', 'cc_transaction_all_amt', 'cc_transaction_all_cnt']

# Tarih kolonlarını datetime yap
customer_history['date'] = pd.to_datetime(customer_history['date'])

customer_last_feats_month = pd.DataFrame({'cust_id': customer_history['cust_id'].unique()})

# Her müşteri için son tarih bilgisi
last_dates = (
    customer_history
    .groupby('cust_id')['date']
    .max()
    .reset_index()
    .rename(columns={'date': 'last_date'})
)
last_dates['last_date'] = pd.to_datetime(last_dates['last_date'])

# Son tarihi ekle
customer_history_with_last = customer_history.merge(last_dates, on='cust_id', how='left')

for m in month_windows:
    print(f"Processing last {m} month(s)")
    temp = customer_history_with_last.copy()
    
    # İlgili ay aralığındaki kayıtları filtrele
    temp = temp[temp['date'] >= (temp['last_date'] - pd.DateOffset(months=m))]
    
    # --- SUM feature ---
    temp_sum = temp.groupby('cust_id')[numeric_cols].sum().reset_index()
    temp_sum = temp_sum.rename(columns={col: f"{col}_last_{m}m_sum" for col in numeric_cols})
    
    # --- MEAN feature (sadece 2 kolon için) ---
    temp_mean = (
        temp.groupby('cust_id')[['mobile_eft_all_amt', 'cc_transaction_all_amt']]
        .mean()
        .reset_index()
        .rename(columns={
            'mobile_eft_all_amt': f"mobile_eft_all_amt_last_{m}m_mean",
            'cc_transaction_all_amt': f"cc_transaction_all_amt_last_{m}m_mean"
        })
    )
    
    # Merge işlemleri
    customer_last_feats_month = (
        customer_last_feats_month
        .merge(temp_sum, on='cust_id', how='left')
        .merge(temp_mean, on='cust_id', how='left')
    )

# Ref data ile birleştir
ref_data_v5 = ref_data_v4.merge(customer_last_feats_month, on='cust_id', how='left')

# Yeni feature’ları göster
new_feats = [c for c in ref_data_v5.columns if c.endswith('_sum') or c.endswith('_mean')]
print("Yeni feature'lar eklendi:", new_feats)


Processing last 1 month(s)
Processing last 3 month(s)
Processing last 6 month(s)
Processing last 12 month(s)
Processing last 24 month(s)
Yeni feature'lar eklendi: ['mobile_eft_all_cnt_mean', 'active_product_category_nbr_mean', 'mobile_eft_all_amt_mean', 'cc_transaction_all_amt_mean', 'cc_transaction_all_cnt_mean', 'mobile_eft_all_cnt_last_3_event_sum', 'active_product_category_nbr_last_3_event_sum', 'mobile_eft_all_amt_last_3_event_sum', 'cc_transaction_all_amt_last_3_event_sum', 'cc_transaction_all_cnt_last_3_event_sum', 'mobile_eft_all_amt_last_3_event_mean', 'cc_transaction_all_amt_last_3_event_mean', 'mobile_eft_all_cnt_last_5_event_sum', 'active_product_category_nbr_last_5_event_sum', 'mobile_eft_all_amt_last_5_event_sum', 'cc_transaction_all_amt_last_5_event_sum', 'cc_transaction_all_cnt_last_5_event_sum', 'mobile_eft_all_amt_last_5_event_mean', 'cc_transaction_all_amt_last_5_event_mean', 'mobile_eft_all_cnt_last_7_event_sum', 'active_product_category_nbr_last_7_event_sum', 'mobi

In [11]:
import pandas as pd
import numpy as np

month_windows = [3, 6, 9, 12, 24]
numeric_cols = ['mobile_eft_all_cnt', 'mobile_eft_all_amt', 
                'cc_transaction_all_amt', 'cc_transaction_all_cnt']

# --- Ortalama feature'lar ---
customer_avg_feats = pd.DataFrame({'cust_id': customer_history['cust_id'].unique()})

last_dates = customer_history.groupby('cust_id')['date'].max().reset_index()
last_dates = last_dates.rename(columns={'date': 'last_date'})

customer_history_with_last = customer_history.merge(last_dates, on='cust_id', how='left')

for m in month_windows:
    print(f"Processing last {m} month(s)")
    temp = customer_history_with_last.copy()
    temp = temp[temp['date'] >= (temp['last_date'] - pd.DateOffset(months=m))]

    temp_agg = temp.groupby('cust_id')[numeric_cols].sum().reset_index()

    temp_agg[f'mobile_eft_avg_amt_last_{m}m'] = temp_agg['mobile_eft_all_amt'] / temp_agg['mobile_eft_all_cnt']
    temp_agg[f'cc_transaction_avg_amt_last_{m}m'] = temp_agg['cc_transaction_all_amt'] / temp_agg['cc_transaction_all_cnt']

    temp_agg.fillna(0, inplace=True)
    temp_agg.replace([float('inf'), -float('inf')], 0, inplace=True)

    temp_agg = temp_agg[['cust_id', f'mobile_eft_avg_amt_last_{m}m', f'cc_transaction_avg_amt_last_{m}m']]

    customer_avg_feats = customer_avg_feats.merge(temp_agg, on='cust_id', how='left')

# --- Ref data merge ---
ref_data_v6 = ref_data_v5.merge(customer_avg_feats, on='cust_id', how='left')

# =====================================================================
# === BURADAN İTİBAREN VOLATILITE HESABI EKLENDİ ======================
# =====================================================================

print("\nVolatility hesaplama başlıyor...\n")

# Tarih tipleri
customer_history['date'] = pd.to_datetime(customer_history['date'])
customer_history['year_month'] = customer_history['date'].dt.to_period('M')

# Aylık bazda toplama
monthly = (
    customer_history
    .groupby(['cust_id', 'year_month'])
    [['mobile_eft_all_cnt', 'mobile_eft_all_amt', 
      'cc_transaction_all_amt', 'cc_transaction_all_cnt']]
    .sum()
    .reset_index()
)
monthly['year_month'] = monthly['year_month'].dt.to_timestamp()

# Son tarihleri tekrar ekle
last_dates = customer_history.groupby('cust_id')['date'].max().reset_index().rename(columns={'date': 'last_date'})
monthly = monthly.merge(last_dates, on='cust_id', how='left')

# Boş DF
customer_vol_feats = pd.DataFrame({'cust_id': monthly['cust_id'].unique()})

for m in month_windows:
    print(f"Processing volatility for last {m} month(s)")
    temp = monthly[monthly['year_month'] >= (monthly['last_date'] - pd.DateOffset(months=m))].copy()

    # std ve mean hesapla (amt ve cnt için ayrı ayrı)
    temp_std = (
        temp.groupby('cust_id')[['mobile_eft_all_cnt', 'mobile_eft_all_amt',
                                 'cc_transaction_all_cnt', 'cc_transaction_all_amt']]
        .std()
        .reset_index()
        .rename(columns={
            'mobile_eft_all_cnt': f'mobile_eft_cnt_volatility_last_{m}m',
            'mobile_eft_all_amt': f'mobile_eft_amt_volatility_last_{m}m',
            'cc_transaction_all_cnt': f'cc_tx_cnt_volatility_last_{m}m',
            'cc_transaction_all_amt': f'cc_tx_amt_volatility_last_{m}m'
        })
    )

    temp_std.fillna(0, inplace=True)
    customer_vol_feats = customer_vol_feats.merge(temp_std, on='cust_id', how='left')

# Son merge
ref_data_v7 = ref_data_v6.merge(customer_vol_feats, on='cust_id', how='left')

Processing last 3 month(s)
Processing last 6 month(s)
Processing last 9 month(s)
Processing last 12 month(s)
Processing last 24 month(s)

Volatility hesaplama başlıyor...

Processing volatility for last 3 month(s)
Processing volatility for last 6 month(s)
Processing volatility for last 9 month(s)
Processing volatility for last 12 month(s)
Processing volatility for last 24 month(s)


## Modelleme

In [12]:
ref_final = ref_data_v7
ref_final

  has_large_values = (abs_vals > 1e6).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()
  has_large_values = (abs_vals > 1e6).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()


Unnamed: 0,cust_id,ref_date,churn,split,year,month,day,weekday,dayofyear,month_sin,...,cc_tx_cnt_volatility_last_9m,cc_tx_amt_volatility_last_9m,mobile_eft_cnt_volatility_last_12m,mobile_eft_amt_volatility_last_12m,cc_tx_cnt_volatility_last_12m,cc_tx_amt_volatility_last_12m,mobile_eft_cnt_volatility_last_24m,mobile_eft_amt_volatility_last_24m,cc_tx_cnt_volatility_last_24m,cc_tx_amt_volatility_last_24m
0,0,2017-09-01,0.0,Train,2017,9,1,4,244,-1.000000,...,0.000000,0.000000,1.182132,63.881875,0.000000,0.000000,1.220851,60.269572,0.000000,0.000000
1,3,2018-10-01,0.0,Train,2018,10,1,0,274,-0.866025,...,8.355969,333.977211,1.182132,152.909429,9.898200,421.184932,0.918332,124.307026,10.603144,495.617714
2,5,2018-03-01,1.0,Train,2018,3,1,3,60,1.000000,...,1.135292,2.277489,1.260850,361.012044,1.165751,2.477156,1.474223,358.930488,13.440362,79.657908
3,6,2018-04-01,1.0,Train,2018,4,1,6,91,0.866025,...,8.987028,63.529787,0.854850,145.779818,9.899495,74.914229,2.888483,371.072171,7.721615,58.448003
4,7,2018-05-01,0.0,Train,2018,5,1,1,121,0.500000,...,11.726513,800.765954,0.000000,0.000000,19.295276,1930.738768,1.458310,13.739859,30.478244,1928.227713
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
176288,199951,2019-03-01,,Test,2019,3,1,4,60,1.000000,...,3.552777,174.477503,1.250641,899.336635,3.341656,251.349738,1.227464,938.653683,11.600718,242.274258
176289,199952,2019-05-01,,Test,2019,5,1,2,121,0.500000,...,5.513620,549.295120,0.660225,37.244852,9.579626,737.114421,0.812404,27.527065,16.467443,1598.406962
176290,199963,2019-05-01,,Test,2019,5,1,2,121,0.500000,...,14.712429,566.391210,2.366974,22.879275,13.418797,507.336335,2.533114,20.720623,11.518102,556.275042
176291,199964,2019-03-01,,Test,2019,3,1,4,60,1.000000,...,6.945022,855.419619,1.601282,101.654487,15.759409,901.567506,1.457166,91.121335,31.245373,1487.393394


In [None]:
DROP_COLS = ['cust_id','ref_date','split']
train_data_final = ref_final[ref_final['split'] == "Train"].drop(DROP_COLS,axis=1)
test_data_final = ref_final[ref_final['split'] == "Test"].drop(DROP_COLS + ['churn'],axis=1)
train_data_final[train_data_final.select_dtypes(exclude=['object','category']).columns] = train_data_final.select_dtypes(exclude=['object','category']).fillna(-9999)
test_data_final[test_data_final.select_dtypes(exclude=['object','category']).columns] = test_data_final.select_dtypes(exclude=['object','category']).fillna(-9999)

In [2]:
import pandas as pd
import numpy as np
train_data_final = pd.read_parquet('/kaggle/input/ing-my-feat-v1/train_data_v7.parquet')
test_data_final = pd.read_parquet('/kaggle/input/ing-my-feat-v1/test_data_v7.parquet')

In [7]:
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score
import numpy as np
from catboost import CatBoostClassifier
import lightgbm as lgb

# ---- Veri ----
target_col = 'churn'
cat_features = ['gender', 'province', 'religion', 'work_type', 'work_sector']

df = train_data_final.copy()
X_test = test_data_final.copy()
X = df.drop(target_col, axis=1)
y = df[target_col]

# Stratified 5-fold
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# ---- Ortak Parametreler ----
catboost_params = {
    'iterations': 1500,
    'depth': 6,
    'learning_rate': 0.02,      # 🔹 Güncellendi
    'l2_leaf_reg': 3,
    'border_count': 128,
    'random_seed': 42,
    'eval_metric': 'AUC',
    'verbose': 0,
    'task_type': 'GPU'
}

lgb_params = {
    'n_estimators': 1500,
    'learning_rate': 0.02,      # 🔹 Güncellendi
    'max_depth': 6,
    'reg_lambda': 3,
    'objective': 'binary',
    'metric': 'auc',
    'boosting_type': 'gbdt',
    'random_state': 42,
    'verbosity': -1,
    'device': 'gpu'
}

# ---- Model listesi ----
models = {
    "CatBoost": ("catboost", catboost_params),
    "LightGBM": ("lgbm", lgb_params)
}

# Sonuçları saklamak için
all_test_preds = []
all_auc_means = {}
all_custom_means = {}

# ---- Eğitim ve değerlendirme ----
for model_name, (model_type, params) in models.items():
    print(f"\n\n================= {model_name} =================")

    auc_scores, custom_scores, test_preds_list = [], [], []

    for fold, (train_idx, val_idx) in enumerate(kf.split(X, y), 1):
        print(f"\n----- Fold {fold} -----")
        X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
        y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]

        # Model seçimi
        if model_type == "catboost":
            model = CatBoostClassifier(**params)
            model.fit(
                X_train, y_train,
                cat_features=cat_features,
                eval_set=(X_val, y_val),
                early_stopping_rounds=400,   # 🔹 Güncellendi
                verbose=0
            )

        elif model_type == "lgbm":
            model = lgb.LGBMClassifier(**params)
            model.fit(
                X_train, y_train,
                eval_set=[(X_val, y_val)],
                callbacks=[lgb.early_stopping(400, verbose=False)]   # 🔹 Güncellendi
            )

        # Tahminler
        y_val_pred = model.predict_proba(X_val)[:, 1]
        auc = roc_auc_score(y_val, y_val_pred)
        custom_score = ing_hubs_datathon_metric(y_val, y_val_pred)

        auc_scores.append(auc)
        custom_scores.append(custom_score)

        print(f"Fold AUC: {auc:.4f} | Custom: {custom_score:.4f}")

        test_preds_list.append(model.predict_proba(X_test)[:, 1])

    # Ortalama skorlar
    mean_auc = np.mean(auc_scores)
    mean_custom = np.mean(custom_scores)

    print(f"\n>>> {model_name} Mean CV AUC: {mean_auc:.4f}")
    print(f">>> {model_name} Mean CV Custom: {mean_custom:.4f}")

    # Test tahminlerinin ortalaması
    y_test_pred = np.mean(test_preds_list, axis=0)
    all_test_preds.append((model_name, y_test_pred))
    all_auc_means[model_name] = mean_auc
    all_custom_means[model_name] = mean_custom

# ---- Ensemble (CatBoost %60 + LightGBM %40) ----
cat_pred = dict(all_test_preds)["CatBoost"]
lgb_pred = dict(all_test_preds)["LightGBM"]

ensemble_pred = 0.6 * cat_pred + 0.4 * lgb_pred

print("\n\n================= 🧠 ENSEMBLE (CatBoost %60 + LightGBM %40) =================")
print(f"Ortalama Model AUC: {np.mean(list(all_auc_means.values())):.4f}")
print(f"Ortalama Model Custom: {np.mean(list(all_custom_means.values())):.4f}")
print("Ensemble test tahminleri hazır (ağırlıklı ortalama: 60/40).")





----- Fold 1 -----


Default metric period is 5 because AUC is/are not implemented for GPU


Fold AUC: 0.7261 | Custom: 1.2039

----- Fold 2 -----


Default metric period is 5 because AUC is/are not implemented for GPU


Fold AUC: 0.7200 | Custom: 1.1532

----- Fold 3 -----


Default metric period is 5 because AUC is/are not implemented for GPU


Fold AUC: 0.7188 | Custom: 1.1750

----- Fold 4 -----


Default metric period is 5 because AUC is/are not implemented for GPU


Fold AUC: 0.7217 | Custom: 1.1757

----- Fold 5 -----


Default metric period is 5 because AUC is/are not implemented for GPU


Fold AUC: 0.7274 | Custom: 1.1927

>>> CatBoost Mean CV AUC: 0.7228
>>> CatBoost Mean CV Custom: 1.1801



----- Fold 1 -----




Fold AUC: 0.7230 | Custom: 1.1932

----- Fold 2 -----
Fold AUC: 0.7174 | Custom: 1.1522

----- Fold 3 -----
Fold AUC: 0.7147 | Custom: 1.1542

----- Fold 4 -----
Fold AUC: 0.7198 | Custom: 1.1597

----- Fold 5 -----
Fold AUC: 0.7243 | Custom: 1.1906

>>> LightGBM Mean CV AUC: 0.7198
>>> LightGBM Mean CV Custom: 1.1700


Ortalama Model AUC: 0.7213
Ortalama Model Custom: 1.1750
Ensemble test tahminleri hazır (ağırlıklı ortalama: 60/40).


In [8]:
ensemble_pred

array([0.11811363, 0.12223906, 0.24734256, ..., 0.12028461, 0.03162989,
       0.08424188])

In [None]:
import pandas as pd
submission = pd.read_csv('/kaggle/input/ing-hubs-turkiye-datathon/sample_submission.csv')
submission['churn'] = ensemble_pred

In [None]:
submission.to_csv('catboost_60_lgbm_40_lr_002.csv',index=False)