In [None]:
import numpy as np 
import pandas as pd 
import os
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from catboost import CatBoostClassifier
import lightgbm as lgb
from xgboost import XGBClassifier
from sklearn.model_selection import KFold
import sklearn.metrics
from sklearn.metrics import roc_auc_score

In [None]:
train_path = os.path.abspath('train.csv')
test_path = os.path.abspath('test.csv')

train_and_target = pd.read_csv(train_path)
test_and_id = pd.read_csv(test_path)

In [None]:
train = train_and_target.drop(['claim','id'], axis = 1)
test = test_and_id.drop('id', axis = 1)
train["nan_count"] = train.isnull().sum(axis=1)
test["nan_count"] = test.isnull().sum(axis=1)
target = train_and_target['claim']

In [None]:
cb_model = CatBoostClassifier(task_type = 'GPU', silent = True)
lgb_model = lgb.LGBMClassifier(device = 'gpu')
xgb_model = XGBClassifier(tree_method = 'gpu_hist')
models = [cb_model, lgb_model, xgb_model]

In [None]:
rounds = 0
meta_X, meta_y = [], []
kfold = KFold(n_splits = 10, shuffle = True)
for train_index, test_index in kfold.split(train):
    fold_preds = []
    X_train_split, y_train_split = train.iloc[train_index], target.iloc[train_index]
    X_test_split, y_test_split = train.iloc[test_index], target.iloc[test_index]
    meta_y.extend(y_test_split) #test_y = y_test_split
    for n,m in enumerate(models):
        m.fit(X_train_split, y_train_split)
        yhat = m.predict_proba(X_test_split)[:,1]
        fold_preds.append(yhat.reshape(len(yhat),1))
        print(f"Round {rounds}, Model {n}, Score: {roc_auc_score(y_test_split,yhat)}")
    meta_X.append(np.hstack(fold_preds))
    rounds += 1
meta_X = np.vstack(meta_X)
meta_y = np.asarray(meta_y)
print('Meta ', meta_X.shape, meta_y.shape)

meta_model = LinearRegression()
meta_model.fit(meta_X, meta_y)
meta_train = []

for m in models:
    m.fit(train,target)
    yhat = m.predict_proba(test)[:,1]
    meta_train.append(yhat.reshape(len(yhat),1))
meta_train = np.hstack(meta_train)
final_preds = meta_model.predict(meta_train)

print(final_preds)

In [None]:
my_submission = pd.DataFrame({'Id': test_and_id.id, 'claim': final_preds})
my_submission.to_csv('submission.csv', index=False)