In [None]:
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression

In [None]:
import pandas as pd
import numpy as np

In [None]:
train_data = pd.read_csv('../input/tabular-playground-series-sep-2021/train.csv')
test_data = pd.read_csv('../input/tabular-playground-series-sep-2021/test.csv')


## Changing datatype of features for memory reduction.

i.e. float64 to float 32.

In [None]:
def int_downcast(df):
    int_cols = df.select_dtypes(include=['int64'])

    for col in int_cols.columns:
        print(col, 'min:',df[col].min(),'; max:',df[col].max())
        df[col] = pd.to_numeric(df[col], downcast ='integer')
    return df

int_downcast(train_data)
# test_data['claim'].dtype

In [None]:
def float_downcast(df):
  float_cols = df.select_dtypes(include = ['float64'])
  for col in float_cols.columns:
    df[col] = pd.to_numeric(df[col],downcast = 'float')
  return df

float_downcast(train_data)
train_data.info()

## Preprocessing and Feature engineering.

In [None]:
features = [x for x in train_data.columns.values if x[0]=="f"]
train_data['n_missing'] = train_data[features].isna().sum(axis=1)
test_data['n_missing'] = test_data[features].isna().sum(axis=1)

In [None]:
train_data['kurt'] = train_data[features].kurtosis(axis=1)
test_data['kurt'] = test_data[features].kurtosis(axis=1)

In [None]:
from sklearn.impute import SimpleImputer
imputer =SimpleImputer(np.nan , strategy = 'mean')

In [None]:
train_data

In [None]:
target = train_data.loc[:,'claim']

In [None]:
target

In [None]:
train_data = train_data.drop(['id','claim'],axis=1)

In [None]:
test_data.drop('id',axis=1,inplace=True)

In [None]:
 train_data['std'] = train_data[features].std(axis=1)
test_data['std'] = test_data[features].std(axis=1)

train_data['min'] = train_data[features].min(axis=1)
test_data['min'] = test_data[features].min(axis=1)

train_data['max'] = train_data[features].max(axis =1)
test_data['max'] = test_data[features].max(axis=1)


In [None]:
train_data = imputer.fit_transform(train_data)

In [None]:
test_data = imputer.transform(test_data)

In [None]:
from sklearn.preprocessing import RobustScaler

In [None]:
scaler = RobustScaler()

In [None]:
train_data = scaler.fit_transform(train_data)
test_data = scaler.transform(test_data)

In [None]:
features = features + ['N_missing','std_row','min','max','kurt']

In [None]:
train_data = pd.DataFrame(train_data,columns =features)

In [None]:
test_data = pd.DataFrame(test_data,columns =features)

In [None]:
from sklearn.model_selection import train_test_split


In [None]:
xtrain,xval, ytrain,yval= train_test_split(train_data, target, test_size= 0.3 ,random_state=1)

In [None]:
! pip install catboost

In [None]:

!pip uninstall -y lightgbm
!apt-get install -y libboost-all-dev
!git clone --recursive https://github.com/Microsoft/LightGBM

In [None]:
%%bash
cd LightGBM
rm -r build
mkdir build
cd build
cmake -DUSE_GPU=1 -DOpenCL_LIBRARY=/usr/local/cuda/lib64/libOpenCL.so -DOpenCL_INCLUDE_DIR=/usr/local/cuda/include/ ..
make -j$(nproc)

In [None]:
!cd LightGBM/python-package/;python setup.py install --precompile


In [None]:
!mkdir -p /etc/OpenCL/vendors && echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd
!rm -r LightGBM

In [None]:
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
from xgboost import XGBClassifier

In [None]:
LGBMClassifier().get_params()

In [None]:
# paramsXGB = {'max_depth': 2, 'learning_rate': 0.021537077920105466, 'n_estimators': 3000, 'min_child_weight': 150, 'gamma': 0.11611920725914951, 'alpha': 0.0021839958087869794, 'lambda': 0.0018567979557499344, 'colsample_bytree': 0.7139742731494992, 'subsample': 0.6258627743440968,
#              'tree_method': 'gpu_hist',
#              'booster': 'gbtree',
#              'random_state': 228,
#              'use_label_encoder': False,
#              'objective': 'binary:logistic',
#              'eval_metric': 'auc',
#               }

In [None]:
paramsXGB = {'max_depth': 3, 'learning_rate': 0.015702659398906191, 'n_estimators': 10000, 'min_child_weight': 25, 'gamma': 0.00010151247994797229, 'alpha': 7.148020356730985, 'lambda': 0.1378423649746119, 'colsample_bytree': 0.7969227570988136, 'subsample': 0.6382893449313995,
             'tree_method': 'gpu_hist',
             'booster': 'gbtree',
             'random_state': 228,
             'use_label_encoder': False,
             'eval_metric': 'auc'}

In [None]:
# paramsCB = {'depth': 3, 'learning_rate': 0.014530866870832323, 'iterations': 6000, 'max_bin': 265, 'min_data_in_leaf': 14, 'l2_leaf_reg': 0.004427550682515904, 'subsample': 0.5402586792667279, 'grow_policy': 'SymmetricTree', 'leaf_estimation_method': 'Gradient',
#             'bootstrap_type': 'Bernoulli',
#             'random_seed': 228,
#             'loss_function': 'Logloss',
#             'eval_metric': 'AUC',}
#             'task_type': 'GPU' }

In [None]:
paramsCB = {'depth': 3, 'learning_rate': 0.017585381726501453, 'iterations': 11636, 'max_bin': 461, 'min_data_in_leaf': 162, 'l2_leaf_reg': 0.02724781040038058, 'subsample': 0.6892384815879177, 'grow_policy': 'Depthwise', 'leaf_estimation_method': 'Gradient',
            'bootstrap_type': 'Bernoulli',
            'random_seed': 228,
            'loss_function': 'Logloss',
            'eval_metric': 'AUC',
            'task_type': 'GPU' }

In [None]:
paramsLGBM = {'n_estimators':11990 , 'max_depth': 3, 'learning_rate': 0.016501612373246877, 'reg_alpha': 7.555087388180319, 'reg_lambda': 0.9534606245427513, 'num_leaves': 155, 'min_data_per_group': 177, 'min_child_samples': 150, 'colsample_bytree': 0.22781593823447946,
            'boosting_type': 'gbdt',
            'objective': 'binary',
            'random_state': 228,
            'metric': 'auc',
            'device_type': 'gpu'}

In [None]:
# model=StackNetClassifier(models, metric="auc", folds=3,
# 	restacking=False,use_retraining=True, use_proba=True, 
# 	random_state=12345,n_jobs=1, verbose=1)

In [None]:
from sklearn.ensemble import StackingClassifier

In [None]:
estimators = [('lgb',LGBMClassifier(**paramsLGBM)),
               ('xgb',XGBClassifier(**paramsXGB)),
             ('cb',CatBoostClassifier(**paramsCB)),
             ('log',LogisticRegression(max_iter =200,random_state=1,solver='liblinear')  )]
final_layer = StackingClassifier(
               estimators=[('lgb',LGBMClassifier(n_estimators=  5000,objective= 'binary',
                                                 random_state= 1,
                                                 metric= 'auc',
                                                 device_type= 'gpu')),
                            ('cat',CatBoostClassifier(bootstrap_type= 'Bernoulli',
                                                      random_seed= 1,
                                                      loss_function= 'Logloss',
                                                      eval_metric= 'AUC',
                                                      task_type= 'GPU',
                                                      n_estimators=5000))],
              final_estimator=LogisticRegression(max_iter=200,solver = 'liblinear')
     )

In [None]:
model = StackingClassifier(estimators =estimators , final_estimator = final_layer,cv=3,verbose=1)

In [None]:
model.fit(train_data,target)

In [None]:
preds=model.predict_proba(test_data)

In [None]:
submission = pd.read_csv('../input/tabular-playground-series-sep-2021/sample_solution.csv')

In [None]:
submission.claim = preds[:,1]

In [None]:
submission

In [None]:
submission.to_csv('submission13.csv',index=False)