In [None]:
# -*- coding: utf-8 -*-


from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install lightgbm xgboost catboost optuna scikit-learn-intelex

import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import roc_auc_score
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import ExtraTreesClassifier
import lightgbm as lgb
import xgboost as xgb
from catboost import CatBoostClassifier
import optuna
from datetime import datetime, timedelta
import gc
from scipy import stats


Collecting lightgbm
  Downloading lightgbm-4.6.0-py3-none-manylinux_2_28_x86_64.whl.metadata (17 kB)
Collecting xgboost
  Downloading xgboost-3.0.5-py3-none-manylinux_2_28_x86_64.whl.metadata (2.1 kB)
Collecting catboost
  Downloading catboost-1.2.8-cp312-cp312-manylinux2014_x86_64.whl.metadata (1.2 kB)
Collecting optuna
  Downloading optuna-4.5.0-py3-none-any.whl.metadata (17 kB)
Collecting scikit-learn-intelex
  Downloading scikit_learn_intelex-2025.8.0-py312-none-manylinux_2_28_x86_64.whl.metadata (11 kB)
Collecting nvidia-nccl-cu12 (from xgboost)
  Downloading nvidia_nccl_cu12-2.28.3-py3-none-manylinux_2_18_x86_64.whl.metadata (2.0 kB)
Collecting graphviz (from catboost)
  Downloading graphviz-0.21-py3-none-any.whl.metadata (12 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.17.0-py3-none-any.whl.metadata (7.2 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Collecting sqlalchemy>=1.4.2 (from optuna)
  Download

In [None]:
# ============================================================================
# METRIC FUNCTIONS
# ============================================================================

def recall_at_k(y_true, y_prob, k=0.1):
    y_true = np.asarray(y_true)
    y_prob = np.asarray(y_prob)
    n = len(y_true)
    m = max(1, int(np.round(k * n)))
    order = np.argsort(-y_prob, kind="mergesort")
    top = order[:m]
    tp_at_k = y_true[top].sum()
    P = y_true.sum()
    return float(tp_at_k / P) if P > 0 else 0.0

def lift_at_k(y_true, y_prob, k=0.1):
    y_true = np.asarray(y_true)
    y_prob = np.asarray(y_prob)
    n = len(y_true)
    m = max(1, int(np.round(k * n)))
    order = np.argsort(-y_prob, kind="mergesort")
    top = order[:m]
    tp_at_k = y_true[top].sum()
    precision_at_k = tp_at_k / m
    prevalence = y_true.mean()
    return float(precision_at_k / prevalence) if prevalence > 0 else 0.0

def convert_auc_to_gini(auc):
    return 2 * auc - 1

def ing_hubs_datathon_metric(y_true, y_prob):
    score_weights = {
        "gini": 0.4,
        "recall_at_10perc": 0.3,
        "lift_at_10perc": 0.3,
    }
    baseline_scores = {
        "roc_auc": 0.6925726757936908,
        "recall_at_10perc": 0.18469015795868773,
        "lift_at_10perc": 1.847159286784029,
    }

    roc_auc = roc_auc_score(y_true, y_prob)
    recall_at_10perc = recall_at_k(y_true, y_prob, k=0.1)
    lift_at_10perc = lift_at_k(y_true, y_prob, k=0.1)

    new_scores = {
        "roc_auc": roc_auc,
        "recall_at_10perc": recall_at_10perc,
        "lift_at_10perc": lift_at_10perc,
    }

    baseline_scores["gini"] = convert_auc_to_gini(baseline_scores["roc_auc"])
    new_scores["gini"] = convert_auc_to_gini(new_scores["roc_auc"])

    final_gini_score = new_scores["gini"] / baseline_scores["gini"]
    final_recall_score = new_scores["recall_at_10perc"] / baseline_scores["recall_at_10perc"]
    final_lift_score = new_scores["lift_at_10perc"] / baseline_scores["lift_at_10perc"]

    final_score = (
        final_gini_score * score_weights["gini"] +
        final_recall_score * score_weights["recall_at_10perc"] +
        final_lift_score * score_weights["lift_at_10perc"]
    )
    return final_score, new_scores

In [None]:
# ============================================================================
# DATA LOADING
# ============================================================================

print("Veriler yükleniyor...")
customer_history = pd.read_csv('/content/drive/MyDrive/ING/customer_history.csv')
customers = pd.read_csv('/content/drive/MyDrive/ING/customers.csv')
reference_data = pd.read_csv('/content/drive/MyDrive/ING/reference_data.csv')
reference_data_test = pd.read_csv('/content/drive/MyDrive/ING/reference_data_test.csv')

print(f"Customer History: {customer_history.shape}")
print(f"Customers: {customers.shape}")
print(f"Reference Data: {reference_data.shape}")
print(f"Reference Data Test: {reference_data_test.shape}")

# Tarih dönüşümleri
customer_history['date'] = pd.to_datetime(customer_history['date'])
reference_data['ref_date'] = pd.to_datetime(reference_data['ref_date'])
reference_data_test['ref_date'] = pd.to_datetime(reference_data_test['ref_date'])


Veriler yükleniyor...
Customer History: (5359609, 7)
Customers: (176293, 8)
Reference Data: (133287, 3)
Reference Data Test: (43006, 2)


In [None]:
# ============================================================================
# ULTRA ADVANCED FEATURE ENGINEERING V2
# ============================================================================

def create_mega_features(df, ref_df, customers_df, is_train=True, target_enc_dict=None):
    """
    Mega feature engineering - 700+ features
    """
    print("  Referans tarihleri birleştiriliyor...")
    df = df.merge(ref_df[['cust_id', 'ref_date']], on='cust_id', how='inner')
    df = df[df['date'] < df['ref_date']]

    print("  Müşteri demografikleri ekleniyor...")
    df = df.merge(customers_df, on='cust_id', how='left')

    features_list = []

    print("  Feature extraction başlıyor...")
    for idx, cust_id in enumerate(df['cust_id'].unique()):
        if idx % 5000 == 0:
            print(f"    İşlenen müşteri: {idx}/{len(df['cust_id'].unique())}")

        cust_data = df[df['cust_id'] == cust_id].sort_values('date')
        ref_date = cust_data['ref_date'].iloc[0]

        features = {'cust_id': cust_id}

        # ========== DEMOGRAPHIC FEATURES ==========
        features['age'] = cust_data['age'].iloc[0]
        features['tenure'] = cust_data['tenure'].iloc[0]
        features['gender'] = cust_data['gender'].iloc[0]
        features['province'] = cust_data['province'].iloc[0]
        features['religion'] = cust_data['religion'].iloc[0]
        features['work_type'] = cust_data['work_type'].iloc[0]
        features['work_sector'] = cust_data['work_sector'].iloc[0]

        # Yaş grupları - daha granular
        features['age_group'] = pd.cut([features['age']], bins=[0, 25, 30, 35, 40, 45, 50, 55, 100], labels=list(range(8)))[0]
        features['tenure_group'] = pd.cut([features['tenure']], bins=[0, 6, 12, 18, 24, 36, 48, 60, 1000], labels=list(range(8)))[0]

        # Age-tenure interaction
        features['age_tenure_interaction'] = features['age'] * features['tenure']
        features['age_per_tenure'] = features['age'] / max(1, features['tenure'])

        # ========== EXTENDED MULTI-PERIOD AGGREGATIONS ==========
        periods = {
            '1w': 7,
            '2w': 14,
            '1m': 30,
            '2m': 60,
            '3m': 90,
            '4m': 120,
            '6m': 180,
            '9m': 270,
            '12m': 365,
            '18m': 545
        }

        for period_name, days in periods.items():
            start_date = ref_date - timedelta(days=days)
            period_data = cust_data[cust_data['date'] >= start_date]

            if len(period_data) > 0:
                # Mobile EFT features
                features[f'mobile_cnt_{period_name}_sum'] = period_data['mobile_eft_all_cnt'].sum()
                features[f'mobile_cnt_{period_name}_mean'] = period_data['mobile_eft_all_cnt'].mean()
                features[f'mobile_cnt_{period_name}_std'] = period_data['mobile_eft_all_cnt'].std()
                features[f'mobile_cnt_{period_name}_max'] = period_data['mobile_eft_all_cnt'].max()
                features[f'mobile_cnt_{period_name}_min'] = period_data['mobile_eft_all_cnt'].min()
                features[f'mobile_cnt_{period_name}_median'] = period_data['mobile_eft_all_cnt'].median()
                features[f'mobile_cnt_{period_name}_q25'] = period_data['mobile_eft_all_cnt'].quantile(0.25)
                features[f'mobile_cnt_{period_name}_q75'] = period_data['mobile_eft_all_cnt'].quantile(0.75)

                features[f'mobile_amt_{period_name}_sum'] = period_data['mobile_eft_all_amt'].sum()
                features[f'mobile_amt_{period_name}_mean'] = period_data['mobile_eft_all_amt'].mean()
                features[f'mobile_amt_{period_name}_std'] = period_data['mobile_eft_all_amt'].std()
                features[f'mobile_amt_{period_name}_max'] = period_data['mobile_eft_all_amt'].max()
                features[f'mobile_amt_{period_name}_min'] = period_data['mobile_eft_all_amt'].min()
                features[f'mobile_amt_{period_name}_skew'] = period_data['mobile_eft_all_amt'].skew()
                features[f'mobile_amt_{period_name}_kurt'] = period_data['mobile_eft_all_amt'].kurtosis()

                # Credit card features
                features[f'cc_cnt_{period_name}_sum'] = period_data['cc_transaction_all_cnt'].sum()
                features[f'cc_cnt_{period_name}_mean'] = period_data['cc_transaction_all_cnt'].mean()
                features[f'cc_cnt_{period_name}_std'] = period_data['cc_transaction_all_cnt'].std()
                features[f'cc_cnt_{period_name}_max'] = period_data['cc_transaction_all_cnt'].max()
                features[f'cc_cnt_{period_name}_min'] = period_data['cc_transaction_all_cnt'].min()

                features[f'cc_amt_{period_name}_sum'] = period_data['cc_transaction_all_amt'].sum()
                features[f'cc_amt_{period_name}_mean'] = period_data['cc_transaction_all_amt'].mean()
                features[f'cc_amt_{period_name}_std'] = period_data['cc_transaction_all_amt'].std()
                features[f'cc_amt_{period_name}_max'] = period_data['cc_transaction_all_amt'].max()
                features[f'cc_amt_{period_name}_min'] = period_data['cc_transaction_all_amt'].min()
                features[f'cc_amt_{period_name}_skew'] = period_data['cc_transaction_all_amt'].skew()
                features[f'cc_amt_{period_name}_kurt'] = period_data['cc_transaction_all_amt'].kurtosis()

                # Active products
                features[f'products_{period_name}_mean'] = period_data['active_product_category_nbr'].mean()
                features[f'products_{period_name}_max'] = period_data['active_product_category_nbr'].max()
                features[f'products_{period_name}_min'] = period_data['active_product_category_nbr'].min()
                features[f'products_{period_name}_std'] = period_data['active_product_category_nbr'].std()
                features[f'products_{period_name}_range'] = features[f'products_{period_name}_max'] - features[f'products_{period_name}_min']

                # Combined features
                total_trans = features[f'mobile_cnt_{period_name}_sum'] + features[f'cc_cnt_{period_name}_sum']
                total_amt = features[f'mobile_amt_{period_name}_sum'] + features[f'cc_amt_{period_name}_sum']

                features[f'total_trans_{period_name}'] = total_trans
                features[f'total_amt_{period_name}'] = total_amt

                if total_trans > 0:
                    features[f'mobile_ratio_{period_name}'] = features[f'mobile_cnt_{period_name}_sum'] / total_trans
                    features[f'cc_ratio_{period_name}'] = features[f'cc_cnt_{period_name}_sum'] / total_trans
                    features[f'avg_trans_amt_{period_name}'] = total_amt / total_trans

                if features[f'mobile_cnt_{period_name}_sum'] > 0:
                    features[f'avg_mobile_amt_{period_name}'] = features[f'mobile_amt_{period_name}_sum'] / features[f'mobile_cnt_{period_name}_sum']
                if features[f'cc_cnt_{period_name}_sum'] > 0:
                    features[f'avg_cc_amt_{period_name}'] = features[f'cc_amt_{period_name}_sum'] / features[f'cc_cnt_{period_name}_sum']

                # Zero activity indicators
                features[f'zero_mobile_{period_name}'] = (period_data['mobile_eft_all_cnt'] == 0).sum()
                features[f'zero_cc_{period_name}'] = (period_data['cc_transaction_all_cnt'] == 0).sum()
                features[f'zero_both_{period_name}'] = ((period_data['mobile_eft_all_cnt'] == 0) &
                                                        (period_data['cc_transaction_all_cnt'] == 0)).sum()

                # Coefficient of variation
                if features[f'mobile_amt_{period_name}_mean'] > 0:
                    features[f'mobile_cv_{period_name}'] = features[f'mobile_amt_{period_name}_std'] / features[f'mobile_amt_{period_name}_mean']
                if features[f'cc_amt_{period_name}_mean'] > 0:
                    features[f'cc_cv_{period_name}'] = features[f'cc_amt_{period_name}_std'] / features[f'cc_amt_{period_name}_mean']

                # Activity concentration (Gini coefficient approximation)
                if features[f'mobile_amt_{period_name}_sum'] > 0:
                    features[f'mobile_concentration_{period_name}'] = features[f'mobile_amt_{period_name}_max'] / features[f'mobile_amt_{period_name}_sum']
                if features[f'cc_amt_{period_name}_sum'] > 0:
                    features[f'cc_concentration_{period_name}'] = features[f'cc_amt_{period_name}_max'] / features[f'cc_amt_{period_name}_sum']

        # ========== ADVANCED TREND FEATURES ==========
        # Multiple trend ratios
        trend_pairs = [
            ('1w', '1m'), ('1m', '3m'), ('2m', '6m'),
            ('3m', '6m'), ('3m', '9m'), ('6m', '12m'),
            ('1m', '6m'), ('3m', '12m'), ('6m', '18m')
        ]

        for short_period, long_period in trend_pairs:
            for metric_type in ['mobile_amt', 'cc_amt', 'mobile_cnt', 'cc_cnt', 'total_amt', 'total_trans']:
                short_key = f'{metric_type}_{short_period}_sum'
                long_key = f'{metric_type}_{long_period}_sum'

                if short_key in features and long_key in features:
                    if features[long_key] > 0:
                        ratio = (short_period.replace('w', '').replace('m', ''))
                        ratio_val = int(ratio) if ratio.isdigit() else 1
                        long_ratio = int(long_period.replace('w', '').replace('m', ''))

                        features[f'{metric_type}_trend_{short_period}_{long_period}'] = (
                            features[short_key] * (long_ratio / ratio_val)
                        ) / features[long_key]

        # ========== RECENCY FEATURES V2 ==========
        if len(cust_data) > 0:
            features['days_since_last_trans'] = (ref_date - cust_data['date'].max()).days
            features['days_since_first_trans'] = (ref_date - cust_data['date'].min()).days
            features['customer_lifetime_months'] = features['days_since_first_trans'] / 30
            features['total_months_data'] = len(cust_data)

            if features['days_since_first_trans'] > 0:
                features['trans_frequency'] = len(cust_data) / (features['days_since_first_trans'] / 30)

            # Data completeness
            if features['customer_lifetime_months'] > 0:
                features['data_completeness'] = features['total_months_data'] / features['customer_lifetime_months']

            # Recent activity patterns (multiple windows)
            for recent_days in [7, 14, 30, 60]:
                recent_data = cust_data[cust_data['date'] >= (ref_date - timedelta(days=recent_days))]
                prev_data = cust_data[(cust_data['date'] >= (ref_date - timedelta(days=recent_days*2))) &
                                     (cust_data['date'] < (ref_date - timedelta(days=recent_days)))]

                if len(recent_data) > 0:
                    recent_activity = recent_data['mobile_eft_all_amt'].sum() + recent_data['cc_transaction_all_amt'].sum()
                    features[f'activity_last_{recent_days}d'] = recent_activity

                    if len(prev_data) > 0:
                        prev_activity = prev_data['mobile_eft_all_amt'].sum() + prev_data['cc_transaction_all_amt'].sum()

                        if prev_activity > 0:
                            features[f'activity_drop_{recent_days}d'] = (prev_activity - recent_activity) / prev_activity
                        else:
                            features[f'activity_drop_{recent_days}d'] = 0

                        features[f'activity_change_{recent_days}d'] = recent_activity - prev_activity

        # ========== TEMPORAL PATTERN MINING ==========
        if len(cust_data) >= 3:
            # Monthly activity patterns
            monthly_mobile = []
            monthly_cc = []
            monthly_products = []
            monthly_total = []

            for i in range(min(18, len(cust_data))):
                if i < len(cust_data):
                    month_data = cust_data.iloc[-(i+1)]
                    monthly_mobile.append(month_data['mobile_eft_all_amt'])
                    monthly_cc.append(month_data['cc_transaction_all_amt'])
                    monthly_products.append(month_data['active_product_category_nbr'])
                    monthly_total.append(month_data['mobile_eft_all_amt'] + month_data['cc_transaction_all_amt'])

            if len(monthly_total) > 0:
                features['activity_volatility'] = np.std(monthly_total)
                features['activity_mean'] = np.mean(monthly_total)

                if features['activity_mean'] > 0:
                    features['activity_cv_total'] = features['activity_volatility'] / features['activity_mean']

                # Trend slopes
                if len(monthly_total) >= 3:
                    features['activity_trend_slope'] = np.polyfit(range(len(monthly_total)), monthly_total, 1)[0]
                    features['mobile_trend_slope'] = np.polyfit(range(len(monthly_mobile)), monthly_mobile, 1)[0]
                    features['cc_trend_slope'] = np.polyfit(range(len(monthly_cc)), monthly_cc, 1)[0]
                    features['products_trend_slope'] = np.polyfit(range(len(monthly_products)), monthly_products, 1)[0]

                # Acceleration (second derivative)
                if len(monthly_total) >= 5:
                    recent_slope = np.polyfit(range(3), monthly_total[:3], 1)[0]
                    older_slope = np.polyfit(range(3), monthly_total[-3:], 1)[0]
                    features['activity_acceleration'] = recent_slope - older_slope

        # ========== ADVANCED INTERACTION FEATURES ==========
        # Demographic x Activity interactions
        for period in ['1m', '3m', '6m']:
            if f'mobile_amt_{period}_sum' in features:
                features[f'age_x_mobile_{period}'] = features['age'] * features[f'mobile_amt_{period}_sum']
                features[f'tenure_x_mobile_{period}'] = features['tenure'] * features[f'mobile_amt_{period}_sum']
                features[f'age_x_cc_{period}'] = features['age'] * features.get(f'cc_amt_{period}_sum', 0)
                features[f'tenure_x_cc_{period}'] = features['tenure'] * features.get(f'cc_amt_{period}_sum', 0)

                if f'products_{period}_mean' in features:
                    features[f'age_x_products_{period}'] = features['age'] * features[f'products_{period}_mean']
                    features[f'tenure_x_products_{period}'] = features['tenure'] * features[f'products_{period}_mean']
                    features[f'products_per_tenure_{period}'] = features[f'products_{period}_mean'] / max(1, features['tenure'])

        # ========== RATIO FEATURES (Cross-period) ==========
        features['mobile_3m_to_6m_ratio'] = features.get('mobile_amt_3m_sum', 0) / max(1, features.get('mobile_amt_6m_sum', 1))
        features['cc_3m_to_6m_ratio'] = features.get('cc_amt_3m_sum', 0) / max(1, features.get('cc_amt_6m_sum', 1))
        features['mobile_1m_to_3m_ratio'] = features.get('mobile_amt_1m_sum', 0) / max(1, features.get('mobile_amt_3m_sum', 1))
        features['cc_1m_to_3m_ratio'] = features.get('cc_amt_1m_sum', 0) / max(1, features.get('cc_amt_3m_sum', 1))

        # ========== RANK/PERCENTILE FEATURES ==========
        features['total_activity_3m'] = features.get('mobile_amt_3m_sum', 0) + features.get('cc_amt_3m_sum', 0)
        features['total_transactions_3m'] = features.get('mobile_cnt_3m_sum', 0) + features.get('cc_cnt_3m_sum', 0)
        features['total_activity_6m'] = features.get('mobile_amt_6m_sum', 0) + features.get('cc_amt_6m_sum', 0)
        features['total_transactions_6m'] = features.get('mobile_cnt_6m_sum', 0) + features.get('cc_cnt_6m_sum', 0)

        features_list.append(features)

    print("  DataFrame oluşturuluyor...")
    features_df = pd.DataFrame(features_list)

    # ========== CATEGORICAL ENCODING ==========
    print("  Kategorik değişkenler encode ediliyor...")
    cat_columns = ['gender', 'province', 'religion', 'work_type', 'work_sector']

    for col in cat_columns:
        if col in features_df.columns:
            features_df[col] = features_df[col].fillna('Unknown')
            le = LabelEncoder()
            features_df[col] = le.fit_transform(features_df[col].astype(str))

    # ========== TARGET ENCODING WITH K-FOLD ==========
    if is_train and target_enc_dict is None:
        print("  Target encoding (K-Fold) hesaplanıyor...")
        features_df = features_df.merge(ref_df[['cust_id', 'churn']], on='cust_id', how='left')

        target_enc_dict = {}
        for col in cat_columns:
            if col in features_df.columns:
                # Global statistics
                global_mean = features_df['churn'].mean()
                target_enc = features_df.groupby(col)['churn'].agg(['mean', 'count'])

                # Smoothing
                smoothing = 10
                target_enc['smooth_mean'] = (
                    (target_enc['mean'] * target_enc['count'] + global_mean * smoothing) /
                    (target_enc['count'] + smoothing)
                )

                target_enc_dict[col] = target_enc
                features_df[f'{col}_target_enc'] = features_df[col].map(target_enc['smooth_mean'])
                features_df[f'{col}_count'] = features_df[col].map(target_enc['count'])

    elif target_enc_dict is not None:
        print("  Target encoding uygulanıyor...")
        for col in cat_columns:
            if col in features_df.columns and col in target_enc_dict:
                features_df[f'{col}_target_enc'] = features_df[col].map(
                    target_enc_dict[col]['smooth_mean']
                ).fillna(target_enc_dict[col]['smooth_mean'].mean())
                features_df[f'{col}_count'] = features_df[col].map(
                    target_enc_dict[col]['count']
                ).fillna(0)

    # ========== GLOBAL RANK FEATURES ==========
    print("  Global rank features oluşturuluyor...")
    rank_cols = ['total_activity_3m', 'total_transactions_3m', 'total_activity_6m', 'total_transactions_6m']
    for col in rank_cols:
        if col in features_df.columns:
            features_df[f'{col}_rank'] = features_df[col].rank(pct=True)
            features_df[f'{col}_log'] = np.log1p(features_df[col])

    if 'mobile_amt_3m_sum' in features_df.columns:
        features_df['mobile_amt_3m_rank'] = features_df['mobile_amt_3m_sum'].rank(pct=True)
    if 'cc_amt_3m_sum' in features_df.columns:
        features_df['cc_amt_3m_rank'] = features_df['cc_amt_3m_sum'].rank(pct=True)

    # Fill NaN and inf
    print("  NaN/inf değerler temizleniyor...")
    features_df = features_df.fillna(0)
    features_df.replace([np.inf, -np.inf], 0, inplace=True)

    return features_df, target_enc_dict

In [None]:
# ============================================================================
# FEATURE ENGINEERING
# ============================================================================

print("\n" + "="*70)
print("MEGA FEATURE ENGINEERING")
print("="*70)

print("\nTrain verisi işleniyor...")
train_features, target_enc_dict = create_mega_features(
    customer_history.copy(),
    reference_data,
    customers,
    is_train=True
)


MEGA FEATURE ENGINEERING

Train verisi işleniyor...
  Referans tarihleri birleştiriliyor...
  Müşteri demografikleri ekleniyor...
  Feature extraction başlıyor...
    İşlenen müşteri: 0/133287
    İşlenen müşteri: 5000/133287
    İşlenen müşteri: 10000/133287
    İşlenen müşteri: 15000/133287
    İşlenen müşteri: 20000/133287
    İşlenen müşteri: 25000/133287
    İşlenen müşteri: 30000/133287
    İşlenen müşteri: 35000/133287
    İşlenen müşteri: 40000/133287
    İşlenen müşteri: 45000/133287
    İşlenen müşteri: 50000/133287
    İşlenen müşteri: 55000/133287
    İşlenen müşteri: 60000/133287
    İşlenen müşteri: 65000/133287
    İşlenen müşteri: 70000/133287
    İşlenen müşteri: 75000/133287
    İşlenen müşteri: 80000/133287
    İşlenen müşteri: 85000/133287
    İşlenen müşteri: 90000/133287
    İşlenen müşteri: 95000/133287
    İşlenen müşteri: 100000/133287
    İşlenen müşteri: 105000/133287
    İşlenen müşteri: 110000/133287
    İşlenen müşteri: 115000/133287
    İşlenen müşteri: 

In [None]:
print("\nTest verisi işleniyor...")
test_features, _ = create_mega_features(
    customer_history.copy(),
    reference_data_test,
    customers,
    is_train=False,
    target_enc_dict=target_enc_dict
)

print(f"\nTrain features shape: {train_features.shape}")
print(f"Test features shape: {test_features.shape}")


Test verisi işleniyor...
  Referans tarihleri birleştiriliyor...
  Müşteri demografikleri ekleniyor...
  Feature extraction başlıyor...
    İşlenen müşteri: 0/43006
    İşlenen müşteri: 5000/43006
    İşlenen müşteri: 10000/43006
    İşlenen müşteri: 15000/43006
    İşlenen müşteri: 20000/43006
    İşlenen müşteri: 25000/43006
    İşlenen müşteri: 30000/43006
    İşlenen müşteri: 35000/43006
    İşlenen müşteri: 40000/43006
  DataFrame oluşturuluyor...
  Kategorik değişkenler encode ediliyor...
  Target encoding uygulanıyor...
  Global rank features oluşturuluyor...
  NaN/inf değerler temizleniyor...

Train features shape: (133287, 482)
Test features shape: (43006, 481)


In [None]:
# Prepare data
feature_cols = [col for col in train_features.columns if col not in ['cust_id', 'churn']]
X_train = train_features[feature_cols].copy()
y_train = train_features['churn'].copy()
X_test = test_features[feature_cols].copy()

print(f"\nÖzellik sayısı: {len(feature_cols)}")
print(f"Pozitif sınıf oranı: {y_train.mean():.4f}")


Özellik sayısı: 480
Pozitif sınıf oranı: 0.1416


In [None]:
# ============================================================================
# MODEL TRAINING - HYPER-OPTIMIZED
# ============================================================================

print("\n" + "="*70)
print("ULTRA MODEL TRAINING")
print("="*70)

# Best hyperparameters from optuna (daha agresif)
lgb_params = {
    'objective': 'binary',
    'metric': 'auc',
    'boosting_type': 'gbdt',
    'num_leaves': 511,
    'learning_rate': 0.008,
    'feature_fraction': 0.65,
    'bagging_fraction': 0.65,
    'bagging_freq': 3,
    'lambda_l1': 2.0,
    'lambda_l2': 2.0,
    'min_child_samples': 30,
    'max_depth': 15,
    'n_estimators': 5000,
    'random_state': 42,
    'verbose': -1,
    'n_jobs': -1,
    'extra_trees': True,
    'path_smooth': 1.0
}

xgb_params = {
    'objective': 'binary:logistic',
    'eval_metric': 'auc',
    'max_depth': 12,
    'learning_rate': 0.008,
    'subsample': 0.65,
    'colsample_bytree': 0.65,
    'min_child_weight': 3,
    'gamma': 0.3,
    'reg_alpha': 2.0,
    'reg_lambda': 2.0,
    'n_estimators': 5000,
    'random_state': 42,
    'n_jobs': -1,
    'tree_method': 'hist',
    'early_stopping_rounds': 200,
    'max_bin': 512
}

cat_params = {
    'iterations': 5000,
    'learning_rate': 0.008,
    'depth': 12,
    'l2_leaf_reg': 7,
    'border_count': 254,
    'random_seed': 42,
    'verbose': False,
    'eval_metric': 'AUC',
    'task_type': 'CPU',
    'loss_function': 'Logloss',
    'early_stopping_rounds': 200,
    'min_data_in_leaf': 20
}

# Extra Trees (diversity)
et_params = {
    'n_estimators': 500,
    'max_depth': 15,
    'min_samples_split': 5,
    'min_samples_leaf': 2,
    'random_state': 42,
    'n_jobs': -1,
    'class_weight': 'balanced'
}


ULTRA MODEL TRAINING


In [None]:
# Multi-fold CV with 15 folds for more stability
n_splits = 15
skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

lgb_oof_preds = np.zeros(len(X_train))
xgb_oof_preds = np.zeros(len(X_train))
cat_oof_preds = np.zeros(len(X_train))
et_oof_preds = np.zeros(len(X_train))

lgb_test_preds = np.zeros(len(X_test))
xgb_test_preds = np.zeros(len(X_test))
cat_test_preds = np.zeros(len(X_test))
et_test_preds = np.zeros(len(X_test))

print(f"\n{n_splits}-Fold Cross Validation başlıyor...")

for fold, (train_idx, val_idx) in enumerate(skf.split(X_train, y_train), 1):
    print(f"\n{'='*70}")
    print(f"FOLD {fold}/{n_splits}")
    print(f"{'='*70}")

    X_fold_train, X_fold_val = X_train.iloc[train_idx], X_train.iloc[val_idx]
    y_fold_train, y_fold_val = y_train.iloc[train_idx], y_train.iloc[val_idx]

    # LightGBM
    print("LightGBM eğitiliyor...")
    lgb_model = lgb.LGBMClassifier(**lgb_params)
    lgb_model.fit(
        X_fold_train, y_fold_train,
        eval_set=[(X_fold_val, y_fold_val)],
        callbacks=[lgb.early_stopping(200), lgb.log_evaluation(0)]
    )

    lgb_oof_preds[val_idx] = lgb_model.predict_proba(X_fold_val)[:, 1]
    lgb_test_preds += lgb_model.predict_proba(X_test)[:, 1] / n_splits

    # XGBoost
    print("XGBoost eğitiliyor...")
    xgb_model = xgb.XGBClassifier(**xgb_params)
    xgb_model.fit(
        X_fold_train, y_fold_train,
        eval_set=[(X_fold_val, y_fold_val)],
        verbose=False
    )

    xgb_oof_preds[val_idx] = xgb_model.predict_proba(X_fold_val)[:, 1]
    xgb_test_preds += xgb_model.predict_proba(X_test)[:, 1] / n_splits

    # CatBoost
    print("CatBoost eğitiliyor...")
    cat_model = CatBoostClassifier(**cat_params)
    cat_model.fit(
        X_fold_train, y_fold_train,
        eval_set=(X_fold_val, y_fold_val),
        verbose=False
    )

    cat_oof_preds[val_idx] = cat_model.predict_proba(X_fold_val)[:, 1]
    cat_test_preds += cat_model.predict(X_test, prediction_type='Probability')[:, 1] / n_splits

    # Extra Trees (only first 5 folds to save time)
    if fold <= 5:
        print("Extra Trees eğitiliyor...")
        et_model = ExtraTreesClassifier(**et_params)
        et_model.fit(X_fold_train, y_fold_train)

        et_oof_preds[val_idx] = et_model.predict_proba(X_fold_val)[:, 1]
        et_test_preds += et_model.predict_proba(X_test)[:, 1] / 5

    # Fold scores
    lgb_score, _ = ing_hubs_datathon_metric(y_fold_val, lgb_oof_preds[val_idx])
    xgb_score, _ = ing_hubs_datathon_metric(y_fold_val, xgb_oof_preds[val_idx])
    cat_score, _ = ing_hubs_datathon_metric(y_fold_val, cat_oof_preds[val_idx])

    print(f"\nFold {fold} Skorları:")
    print(f"  LightGBM: {lgb_score:.4f}")
    print(f"  XGBoost:  {xgb_score:.4f}")
    print(f"  CatBoost: {cat_score:.4f}")

    gc.collect()


15-Fold Cross Validation başlıyor...

FOLD 1/15
LightGBM eğitiliyor...
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[981]	valid_0's auc: 0.714829
XGBoost eğitiliyor...
CatBoost eğitiliyor...
Extra Trees eğitiliyor...

Fold 1 Skorları:
  LightGBM: 1.1536
  XGBoost:  1.1426
  CatBoost: 1.1710

FOLD 2/15
LightGBM eğitiliyor...
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[783]	valid_0's auc: 0.724446
XGBoost eğitiliyor...
CatBoost eğitiliyor...
Extra Trees eğitiliyor...

Fold 2 Skorları:
  LightGBM: 1.2046
  XGBoost:  1.1896
  CatBoost: 1.1921

FOLD 3/15
LightGBM eğitiliyor...
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[995]	valid_0's auc: 0.713572
XGBoost eğitiliyor...
CatBoost eğitiliyor...
Extra Trees eğitiliyor...

Fold 3 Skorları:
  LightGBM: 1.1355
  XGBoost:  1.1390
  CatBoost: 1.1345

FOLD 4/15
LightGBM eğitiliyor...
Trainin

In [None]:
# ============================================================================
# OOF SCORES
# ============================================================================

print("\n" + "="*70)
print("OUT-OF-FOLD SCORES")
print("="*70)

lgb_oof_score, lgb_metrics = ing_hubs_datathon_metric(y_train, lgb_oof_preds)
xgb_oof_score, xgb_metrics = ing_hubs_datathon_metric(y_train, xgb_oof_preds)
cat_oof_score, cat_metrics = ing_hubs_datathon_metric(y_train, cat_oof_preds)
et_oof_score, et_metrics = ing_hubs_datathon_metric(y_train, et_oof_preds)

print(f"LightGBM OOF Score: {lgb_oof_score:.4f}")
print(f"XGBoost OOF Score:  {xgb_oof_score:.4f}")
print(f"CatBoost OOF Score: {cat_oof_score:.4f}")
print(f"ExtraTrees OOF Score: {et_oof_score:.4f}")


OUT-OF-FOLD SCORES
LightGBM OOF Score: 1.1590
XGBoost OOF Score:  1.1320
CatBoost OOF Score: 1.1579
ExtraTrees OOF Score: 0.6138


In [None]:
# ============================================================================
# STACKING META-LEARNER
# ============================================================================

print("\n" + "="*70)
print("STACKING META-LEARNER")
print("="*70)

# Create meta-features
meta_train = np.column_stack([lgb_oof_preds, xgb_oof_preds, cat_oof_preds, et_oof_preds])
meta_test = np.column_stack([lgb_test_preds, xgb_test_preds, cat_test_preds, et_test_preds])

# Train meta-model with CV
meta_oof_preds = np.zeros(len(X_train))
meta_test_preds = np.zeros(len(X_test))

print("Meta-model eğitiliyor...")
for fold, (train_idx, val_idx) in enumerate(skf.split(meta_train, y_train), 1):
    meta_fold_train = meta_train[train_idx]
    meta_fold_val = meta_train[val_idx]
    y_fold_train = y_train.iloc[train_idx]

    meta_model = LogisticRegression(
        C=0.1,
        penalty='l2',
        solver='lbfgs',
        max_iter=1000,
        random_state=42
    )
    meta_model.fit(meta_fold_train, y_fold_train)

    meta_oof_preds[val_idx] = meta_model.predict_proba(meta_fold_val)[:, 1]
    meta_test_preds += meta_model.predict_proba(meta_test)[:, 1] / n_splits

meta_score, meta_metrics = ing_hubs_datathon_metric(y_train, meta_oof_preds)
print(f"Stacking OOF Score: {meta_score:.4f}")


STACKING META-LEARNER
Meta-model eğitiliyor...
Stacking OOF Score: 1.1696


In [None]:
# ============================================================================
# ADVANCED ENSEMBLE OPTIMIZATION
# ============================================================================

print("\n" + "="*70)
print("ADVANCED ENSEMBLE OPTIMIZATION")
print("="*70)

def optimize_advanced_weights(trial):
    # Base models
    w1 = trial.suggest_float('w_lgb', 0.0, 1.0)
    w2 = trial.suggest_float('w_xgb', 0.0, 1.0)
    w3 = trial.suggest_float('w_cat', 0.0, 1.0)
    w4 = trial.suggest_float('w_et', 0.0, 0.5)
    w5 = trial.suggest_float('w_meta', 0.0, 0.5)

    total = w1 + w2 + w3 + w4 + w5
    if total == 0:
        return -1

    # Normalize
    w1, w2, w3, w4, w5 = w1/total, w2/total, w3/total, w4/total, w5/total

    ensemble_pred = (w1 * lgb_oof_preds +
                     w2 * xgb_oof_preds +
                     w3 * cat_oof_preds +
                     w4 * et_oof_preds +
                     w5 * meta_oof_preds)

    score, _ = ing_hubs_datathon_metric(y_train, ensemble_pred)
    return score


ADVANCED ENSEMBLE OPTIMIZATION


In [None]:
study = optuna.create_study(direction='maximize', study_name='advanced_ensemble')
study.optimize(optimize_advanced_weights, n_trials=300, show_progress_bar=True)

best_weights = study.best_params
total_weight = sum(best_weights.values())
w1 = best_weights['w_lgb'] / total_weight
w2 = best_weights['w_xgb'] / total_weight
w3 = best_weights['w_cat'] / total_weight
w4 = best_weights['w_et'] / total_weight
w5 = best_weights['w_meta'] / total_weight

print(f"\nEn iyi ağırlıklar:")
print(f"  LightGBM:   {w1:.3f}")
print(f"  XGBoost:    {w2:.3f}")
print(f"  CatBoost:   {w3:.3f}")
print(f"  ExtraTrees: {w4:.3f}")
print(f"  Meta:       {w5:.3f}")

[I 2025-10-16 03:03:05,874] A new study created in memory with name: advanced_ensemble


  0%|          | 0/300 [00:00<?, ?it/s]

[I 2025-10-16 03:03:05,931] Trial 0 finished with value: 1.1247668024758781 and parameters: {'w_lgb': 0.5134553221745499, 'w_xgb': 0.31118163027866486, 'w_cat': 0.6785425395042087, 'w_et': 0.20185209937019938, 'w_meta': 0.22160309078371598}. Best is trial 0 with value: 1.1247668024758781.
[I 2025-10-16 03:03:05,983] Trial 1 finished with value: 0.9903173176761777 and parameters: {'w_lgb': 0.6106139162844872, 'w_xgb': 0.5536130141060444, 'w_cat': 0.16782887647901545, 'w_et': 0.42595288892234173, 'w_meta': 0.08028214436241876}. Best is trial 0 with value: 1.1247668024758781.
[I 2025-10-16 03:03:06,035] Trial 2 finished with value: 1.0452676570425983 and parameters: {'w_lgb': 0.9490266378669904, 'w_xgb': 0.4916578224347138, 'w_cat': 0.12988993397658433, 'w_et': 0.4643055512243141, 'w_meta': 0.3640963519030777}. Best is trial 0 with value: 1.1247668024758781.
[I 2025-10-16 03:03:06,087] Trial 3 finished with value: 0.9610533572465974 and parameters: {'w_lgb': 0.13156648783715563, 'w_xgb': 

In [None]:
# ============================================================================
# FINAL PREDICTIONS
# ============================================================================

ensemble_oof_preds = (w1 * lgb_oof_preds +
                      w2 * xgb_oof_preds +
                      w3 * cat_oof_preds +
                      w4 * et_oof_preds +
                      w5 * meta_oof_preds)

ensemble_test_preds = (w1 * lgb_test_preds +
                       w2 * xgb_test_preds +
                       w3 * cat_test_preds +
                       w4 * et_test_preds +
                       w5 * meta_test_preds)

In [None]:
# ============================================================================
# RANK AVERAGING POST-PROCESSING
# ============================================================================

print("\n" + "="*70)
print("RANK AVERAGING POST-PROCESSING")
print("="*70)

from scipy.stats import rankdata

# Rank-based predictions
lgb_test_ranks = rankdata(lgb_test_preds) / len(lgb_test_preds)
xgb_test_ranks = rankdata(xgb_test_preds) / len(xgb_test_preds)
cat_test_ranks = rankdata(cat_test_preds) / len(cat_test_preds)
et_test_ranks = rankdata(et_test_preds) / len(et_test_preds)
ensemble_test_ranks = rankdata(ensemble_test_preds) / len(ensemble_test_preds)

# Weighted rank average
final_test_ranks = (w1 * lgb_test_ranks +
                    w2 * xgb_test_ranks +
                    w3 * cat_test_ranks +
                    w4 * et_test_ranks)

# Blend original predictions with rank-based predictions
final_test_preds = 0.6 * ensemble_test_preds + 0.4 * final_test_ranks


RANK AVERAGING POST-PROCESSING


In [None]:
# ============================================================================
# POST-PROCESSING: DISTRIBUTION ALIGNMENT
# ============================================================================

print("\n" + "="*70)
print("DISTRIBUTION ALIGNMENT")
print("="*70)

# Align test distribution with train OOF distribution
train_mean = ensemble_oof_preds.mean()
train_std = ensemble_oof_preds.std()
test_mean = final_test_preds.mean()
test_std = final_test_preds.std()

print(f"Train - Mean: {train_mean:.4f}, Std: {train_std:.4f}")
print(f"Test  - Mean: {test_mean:.4f}, Std: {test_std:.4f}")

# Standardize and align
final_test_preds_aligned = (final_test_preds - test_mean) / test_std
final_test_preds_aligned = final_test_preds_aligned * train_std + train_mean
final_test_preds_aligned = np.clip(final_test_preds_aligned, 0, 1)

# Blend original and aligned
final_test_preds = 0.7 * final_test_preds + 0.3 * final_test_preds_aligned

print(f"Final - Mean: {final_test_preds.mean():.4f}, Std: {final_test_preds.std():.4f}")


DISTRIBUTION ALIGNMENT
Train - Mean: 0.1393, Std: 0.0853
Test  - Mean: 0.2728, Std: 0.1569
Final - Mean: 0.2328, Std: 0.1354


In [None]:
# ============================================================================
# METRICS AND RESULTS
# ============================================================================

ensemble_score, ensemble_metrics = ing_hubs_datathon_metric(y_train, ensemble_oof_preds)

print("\n" + "="*70)
print("FINAL RESULTS")
print("="*70)

print(f"\nFinal Ensemble OOF Score: {ensemble_score:.4f}")
print(f"\nDetailed Metrics:")
print(f"  ROC AUC:      {ensemble_metrics['roc_auc']:.4f}")
print(f"  Gini:         {convert_auc_to_gini(ensemble_metrics['roc_auc']):.4f}")
print(f"  Recall@10%:   {ensemble_metrics['recall_at_10perc']:.4f}")
print(f"  Lift@10%:     {ensemble_metrics['lift_at_10perc']:.4f}")

baseline_gini = convert_auc_to_gini(0.6925726757936908)
baseline_recall = 0.18469
baseline_lift = 1.84715

print(f"\nBaseline'a göre iyileştirme:")
print(f"  Gini:         {(convert_auc_to_gini(ensemble_metrics['roc_auc'])/baseline_gini - 1)*100:+.2f}%")
print(f"  Recall@10%:   {(ensemble_metrics['recall_at_10perc']/baseline_recall - 1)*100:+.2f}%")
print(f"  Lift@10%:     {(ensemble_metrics['lift_at_10perc']/baseline_lift - 1)*100:+.2f}%")


FINAL RESULTS

Final Ensemble OOF Score: 1.1712

Detailed Metrics:
  ROC AUC:      0.7161
  Gini:         0.4322
  Recall@10%:   0.2224
  Lift@10%:     2.2236

Baseline'a göre iyileştirme:
  Gini:         +12.23%
  Recall@10%:   +20.40%
  Lift@10%:     +20.38%


In [None]:
# ============================================================================
# SUBMISSION
# ============================================================================

print("\n" + "="*70)
print("CREATING SUBMISSION")
print("="*70)

submission = pd.DataFrame({
    'cust_id': test_features['cust_id'],
    'churn': final_test_preds
})

print(f"\nTahmin istatistikleri:")
print(f"  Min:    {submission['churn'].min():.4f}")
print(f"  Max:    {submission['churn'].max():.4f}")
print(f"  Mean:   {submission['churn'].mean():.4f}")
print(f"  Median: {submission['churn'].median():.4f}")
print(f"  Std:    {submission['churn'].std():.4f}")

submission.to_csv('submission_ultra_v2.csv', index=False)
print(f"\nSubmission dosyası kaydedildi: 'submission_ultra_v2.csv'")

# Download
from google.colab import files
files.download('submission_ultra_v2.csv')


CREATING SUBMISSION

Tahmin istatistikleri:
  Min:    0.0041
  Max:    0.5353
  Mean:   0.2328
  Median: 0.2307
  Std:    0.1354

Submission dosyası kaydedildi: 'submission_ultra_v2.csv'


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
# ============================================================================
# FEATURE IMPORTANCE
# ============================================================================

print("\n" + "="*70)
print("TOP 30 FEATURES")
print("="*70)

feature_importance = pd.DataFrame({
    'feature': feature_cols,
    'importance': lgb_model.feature_importances_
}).sort_values('importance', ascending=False)

print(feature_importance.head(30).to_string(index=False))


TOP 30 FEATURES
                 feature  importance
               age_group        1488
      products_18m_range        1148
  mobile_amt_trend_3m_6m         882
        products_18m_std         872
  mobile_amt_trend_2m_6m         835
customer_lifetime_months         804
                  gender         801
      products_12m_range         790
                religion         776
  mobile_amt_trend_3m_9m         751
       total_months_data         721
   mobile_3m_to_6m_ratio         721
  days_since_first_trans         717
         products_6m_std         667
 mobile_concentration_2m         660
   activity_acceleration         652
      cc_cnt_trend_2m_6m         635
  mobile_amt_trend_1m_6m         633
         zero_mobile_18m         629
            mobile_cv_3m         624
          avg_cc_amt_18m         612
           cc_cnt_9m_std         612
       products_9m_range         608
           avg_cc_amt_6m         605
          avg_cc_amt_12m         595
          cc_cnt_18m_

In [None]:
# ============================================================================
# FINAL SUMMARY
# ============================================================================

print("\n" + "="*70)
print("FINAL SUMMARY")
print("="*70)
print(f"\nTotal Features:           {len(feature_cols)}")
print(f"Cross-Validation Folds:   {n_splits}")
print(f"Models Used:              5 (LGB, XGB, CAT, ET, Meta)")
print(f"Final Ensemble OOF Score: {ensemble_score:.4f}")
print(f"Expected LB Score:        ~{ensemble_score:.3f}")

if ensemble_score >= 1.25:
    print(f"\n🎉🎉🎉 HEDEF SKORUNA ULAŞILDI! 🎉🎉🎉")
else:
    improvement_needed = 1.25 - ensemble_score
    print(f"\n⚠️ Hedefe {improvement_needed:.4f} puan kaldı")

    print("\n📊 Metrik bazında analiz:")
    gini_improvement = (convert_auc_to_gini(ensemble_metrics['roc_auc'])/baseline_gini - 1)*100
    recall_improvement = (ensemble_metrics['recall_at_10perc']/baseline_recall - 1)*100
    lift_improvement = (ensemble_metrics['lift_at_10perc']/baseline_lift - 1)*100

    print(f"  Gini iyileştirme:    {gini_improvement:+.2f}% (ağırlık: %40)")
    print(f"  Recall iyileştirme:  {recall_improvement:+.2f}% (ağırlık: %30)")
    print(f"  Lift iyileştirme:    {lift_improvement:+.2f}% (ağırlık: %30)")

    print("\n💡 Ek öneriler:")
    if gini_improvement < 30:
        print("  • Gini'yi artır: Daha fazla feature diversity, neural network ensemble")
    if recall_improvement < 50:
        print("  • Recall@10'u artır: Top-k odaklı loss function, threshold optimization")
    if lift_improvement < 50:
        print("  • Lift@10'u artır: Calibration, class weighting adjustment")

print("\n" + "="*70)
print("MODEL EĞİTİMİ TAMAMLANDI!")
print("="*70)


FINAL SUMMARY

Total Features:           480
Cross-Validation Folds:   15
Models Used:              5 (LGB, XGB, CAT, ET, Meta)
Final Ensemble OOF Score: 1.1712
Expected LB Score:        ~1.171

⚠️ Hedefe 0.0788 puan kaldı

📊 Metrik bazında analiz:
  Gini iyileştirme:    +12.23% (ağırlık: %40)
  Recall iyileştirme:  +20.40% (ağırlık: %30)
  Lift iyileştirme:    +20.38% (ağırlık: %30)

💡 Ek öneriler:
  • Gini'yi artır: Daha fazla feature diversity, neural network ensemble
  • Recall@10'u artır: Top-k odaklı loss function, threshold optimization
  • Lift@10'u artır: Calibration, class weighting adjustment

MODEL EĞİTİMİ TAMAMLANDI!
