In [11]:
!pip install catboost --quiet

import pandas as pd
import numpy as np
from catboost import CatBoostClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from google.colab import files

train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
sample_submission = pd.read_csv('sample_submission.csv')

X = train.drop(columns=['survived_2y'])
y = train['survived_2y']

if 'id' in test.columns:
    X_test = test.drop(columns=['id'])
    test_ids = test['id']
else:
    X_test = test.copy()
    test_ids = test.index

X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

model = CatBoostClassifier(
    iterations=1000,
    learning_rate=0.05,
    depth=6,
    eval_metric='AUC',
    random_seed=42,
    verbose=100,
    early_stopping_rounds=50
)

model.fit(
    X_train, y_train,
    eval_set=(X_val, y_val),
    use_best_model=True
)

val_preds = model.predict_proba(X_val)[:, 1]
auc = roc_auc_score(y_val, val_preds)
print(f"ROC-AUC на валидации: {auc:.4f}")

test_preds = model.predict_proba(X_test)[:, 1]

submission = pd.DataFrame({
    'id': test_ids,
    'survived_2y': test_preds
})

submission.to_csv('submission.csv', index=False)

files.download('submission.csv')

0:	test: 0.8097357	best: 0.8097357 (0)	total: 8.99ms	remaining: 8.98s
100:	test: 0.9654115	best: 0.9654115 (100)	total: 755ms	remaining: 6.72s
200:	test: 0.9716866	best: 0.9716866 (200)	total: 1.47s	remaining: 5.85s
300:	test: 0.9739838	best: 0.9739843 (299)	total: 2.22s	remaining: 5.15s
400:	test: 0.9742918	best: 0.9743006 (399)	total: 2.95s	remaining: 4.41s
500:	test: 0.9746280	best: 0.9746543 (481)	total: 3.66s	remaining: 3.65s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.9747923916
bestIteration = 534

Shrink model to first 535 iterations.
ROC-AUC на валидации: 0.9748


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>