In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split

train = pd.read_csv('/content/drive/MyDrive/kaggle_heart_disease/train.csv')
test = pd.read_csv('/content/drive/MyDrive/kaggle_heart_disease/test.csv')

train['Heart Disease'] = train['Heart Disease'].map({'Absence': 0, 'Presence': 1})

X_orig = train.drop(['id', 'Heart Disease'], axis=1)
y = train['Heart Disease']
X_test_orig = test.drop(['id'], axis=1)
test_ids = test['id']

X_orig.columns = X_orig.columns.str.replace(' ', '_').str.replace('-', '_')
X_test_orig.columns = X_test_orig.columns.str.replace(' ', '_').str.replace('-', '_')

def add_manual_features(df):
    df = df.copy()
    df['thal_x_chest'] = df['Thallium'] * df['Chest_pain_type']
    df['exang_x_vessels'] = df['Exercise_angina'] * df['Number_of_vessels_fluro']
    df['stdep_x_slope'] = df['ST_depression'] * df['Slope_of_ST']
    df['age_x_maxhr'] = df['Age'] * df['Max_HR']
    df['bp_per_age'] = df['BP'] / (df['Age'] + 1)
    df['chol_per_age'] = df['Cholesterol'] / (df['Age'] + 1)
    return df

X = add_manual_features(X_orig)
X_test = add_manual_features(X_test_orig)

print(f"Train shape: {X.shape}, Test shape: {X_test.shape}")  # Should match

Train shape: (630000, 19), Test shape: (270000, 19)


In [None]:
best_xgb_params = {
    'learning_rate': 0.150165,
    'max_depth': 4,
    'subsample': 0.9845,
    'colsample_bytree': 0.5068,
    'min_child_weight': 7,
    'gamma': 0.3314,
    'reg_alpha': 0.9975,
    'reg_lambda': 2.5372
}

In [None]:
# Split 10% for validation
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.1, random_state=42, stratify=y)

X_train_np = X_train.values.astype('float32')
X_val_np = X_val.values.astype('float32')
y_train_np = y_train.values.ravel()
y_val_np = y_val.values.ravel()

model = xgb.XGBClassifier(
    **best_xgb_params,
    n_estimators=1000,
    early_stopping_rounds=50,
    random_state=42,
    n_jobs=-1
)

model.fit(
    X_train_np, y_train_np,
    eval_set=[(X_val_np, y_val_np)],
    verbose=False
)

best_iter = model.best_iteration
print(f"Optimal number of trees: {best_iter}")

Optimal number of trees: 354


In [None]:
X_full_np = X.values.astype('float32')
y_full_np = y.values.ravel()
X_test_np = X_test.values.astype('float32')

final_model = xgb.XGBClassifier(
    **best_xgb_params,
    n_estimators=best_iter,
    random_state=42,
    n_jobs=-1
)
final_model.fit(X_full_np, y_full_np)

test_preds = final_model.predict_proba(X_test_np)[:, 1]

In [None]:
import numpy as np
import pandas as pd
import xgboost as xgb

best_xgb_params = {
    'learning_rate': 0.150165,
    'max_depth': 4,
    'subsample': 0.9845,
    'colsample_bytree': 0.5068,
    'min_child_weight': 7,
    'gamma': 0.3314,
    'reg_alpha': 0.9975,
    'reg_lambda': 2.5372
}

# Seeds for 5 models
seeds = [42, 123, 456, 789, 101112]
predictions = []

for i, seed in enumerate(seeds):
    print(f"Training model {i+1} with seed {seed}...")
    model = xgb.XGBClassifier(
        **best_xgb_params,
        n_estimators=1000,          # use the optimal number from earlier, or keep 1000
        random_state=seed,
        n_jobs=-1,
        eval_metric='auc'
    )
    model.fit(X, y)   # train on full data (no early stopping needed here)

    pred = model.predict_proba(X_test)[:, 1]
    predictions.append(pred)

# Average predictions
avg_pred = np.mean(predictions, axis=0)

# Create submission
submission = pd.DataFrame({'id': test_ids, 'Heart Disease': avg_pred})
submission.to_csv('ensemble_5xgb.csv', index=False)

from google.colab import files
files.download('ensemble_5xgb.csv')

Training model 1 with seed 42...


KeyboardInterrupt: 

In [None]:
submission = pd.DataFrame({
    'id': test_ids,
    'Heart Disease': test_preds
})
submission.to_csv('final_submission.csv', index=False)

from google.colab import files
files.download('final_submission.csv')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>