In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score, precision_score
import numpy as np
data = pd.read_csv('bank-additional-full.csv', sep=';')
data = data.drop('duration', axis=1) 
data['y'] = data['y'].map({'no': 0, 'yes': 1})  
X = data.drop('y', axis=1)
y = data['y']
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)
categorical_features = X.select_dtypes(include=['object']).columns
numerical_features = X.select_dtypes(include=['int64', 'float64']).columns

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_features),
        ('cat', OneHotEncoder(drop='first', handle_unknown='ignore'), categorical_features)
    ])
model = Pipeline([
    ('preprocessor', preprocessor),
    ('classifier', LogisticRegression(class_weight='balanced', random_state=42))
])
model.fit(X_train, y_train)
y_pred_proba = model.predict_proba(X_test)[:, 1]
auc_roc = roc_auc_score(y_test, y_pred_proba)
k = int(0.1 * len(y_test))  # Top 10%
top_k_indices = np.argsort(y_pred_proba)[-k:]
top_k_precision = precision_score(y_test.iloc[top_k_indices], np.ones(k))
top_15_df = pd.DataFrame({
    'row_id': X_test.index[:15],
    'predicted_prob': np.round(y_pred_proba[:15], 4)
}).sort_values('predicted_prob', ascending=False).head(15)
print(f"AUC_ROC: {auc_roc:.3f}")
print(f"TopK_Precision@10%: {top_k_precision:.3f}")
print("\nTop 15 customers by predicted probability:")
print(top_15_df.to_string(index=False))

AUC_ROC: 0.801
TopK_Precision@10%: 0.512

Top 15 customers by predicted probability:
 row_id  predicted_prob
  40138          0.9135
  40076          0.9063
  36380          0.8580
  27939          0.8580
  37546          0.7659
  37959          0.7480
  28763          0.5044
  14455          0.3550
  27310          0.3510
  20148          0.3354
  16213          0.3126
   8377          0.2861
  24077          0.2698
  10778          0.2655
   7334          0.2286
