#BLENDING

In [1]:
input_path = '../input/'
output_path = './'

In [2]:
import pandas as pd
import numpy as np
from sklearn.metrics import roc_auc_score
from sklearn import model_selection

def load_raw_data(folder_name,train_or_test="train"):
    file_name = f'{input_path}/tbr_apr_2022_{folder_name}/{train_or_test}.csv'
    df = pd.read_csv(file_name)
    return df

def load_label(train_or_test='train'):
    file_name = input_path +"/tabular-playground-series-apr-2022/"+ ('train_labels.csv' if train_or_test=='train' else 'sample_submission.csv')
    df = pd.read_csv(file_name)
    return df['state'].values

def competition_metric(y_true, y_score):
    return roc_auc_score(y_true, y_score)

def evaluate(model, X, y):
    return competition_metric(y, model.predict_proba(X)[:, 1])


def submit(arr):
    df = pd.read_csv(f'{input_path}/tabular-playground-series-apr-2022/sample_submission.csv')
    df['state'] = arr
    df.to_csv(f'{output_path}/submission.csv', index=False)


def to_csv(arr,train_or_test='train',name=None):
    df = pd.DataFrame(arr)
    if type(name)==str:
        df.to_csv(f'{output_path}/{name}_{train_or_test}.csv', index = False )
    else:
        df.to_csv(f'{output_path}/{train_or_test}.csv', index = False )



In [3]:
def short_test(x,y,n):
    return x.loc[x.sequence<n] ,y[:n]

In [4]:
df_NN_pure = pd.read_csv("../input/tbr-apr-22-nn-pure/train.csv").to_numpy()
            


In [5]:
L_df_F_dart_v = []
for i in range(5):
    L_df_F_dart_v.append(pd.read_csv(f"../input/tbr-apr-2022-dart-non-nn/tbr_apr_2022_dart_non_nn/train_v{i}.csv").to_numpy()/5)
df_F_dart_v = np.zeros(L_df_F_dart_v[0].shape)
for j in L_df_F_dart_v:
    df_F_dart_v+=j

df_test_NN_pure = pd.read_csv("../input/tbr-apr-22-nn-pure/test.csv").to_numpy()

L_df_test_F_dart_v = []
for i in range(5):
    L_df_test_F_dart_v.append(pd.read_csv(f"../input/tbr-apr-2022-dart-non-nn/tbr_apr_2022_dart_non_nn/test_v{i}.csv").to_numpy()/5)
df_test_F_dart_v = np.zeros(L_df_test_F_dart_v[0].shape)
for j in L_df_test_F_dart_v:
    df_test_F_dart_v+=j

In [6]:

y = load_label("train")
y_test = load_label('test')
yy = np.concatenate([y,y], axis=0)

In [7]:
valids = np.concatenate([
                          df_NN_pure,
                          df_F_dart_v
                            ], axis=1)

In [8]:
valids =valids[:,:-1]

In [9]:
tests= np.concatenate([
                          df_test_NN_pure,
                          df_test_F_dart_v
                            ], axis=1)

In [10]:
tests=tests[:,:-1]

In [11]:
from lightgbm import LGBMClassifier


In [12]:
import sklearn.metrics as metrics

In [13]:
for i in range(len(valids[0])):
    print(metrics.roc_auc_score(y,valids[:,i]))


0.9777898181122935
0.980460950723985
0.9613977609984228
0.9790575589205852
0.9791202875330113
0.9786032020859757
0.977540991650445
0.9669728457653909
0.9763716947241221
0.964018921298494
0.9660085601807571
0.9806624445179511
0.9933647245698443
0.9909257174830474
0.9893312421660414
0.9846474227265947


In [14]:
y_test.shape

(12218,)

In [15]:
n_splits = 5
K_fold = model_selection.KFold(n_splits=n_splits,shuffle =True, random_state=97)
test_preds_array = np.zeros(len(y_test))
valid_preds_array = np.zeros(len(y))
scores_valid = []
scores_train = []

for fold, (train_idx, valid_idx) in enumerate(K_fold.split(valids)):
    X_train , y_train = valids[train_idx, :], y[train_idx]
    X_valid, y_valid = valids[valid_idx, :], y[valid_idx]


    clf = LGBMClassifier(num_leaves=25,
                        objective="binary",
                        metric='auc',
                        subsample=0.7,
                        learning_rate=0.03,
                        n_estimators=10000,
                        n_jobs=-1,
                        boosting_type="dart")
    clf.fit(X_train,y_train,
            eval_set=[(X_valid,y_valid)],
            verbose = 100,
            early_stopping_rounds=100)

    valid_preds = clf.predict_proba(X_valid)[:, -1]
    train_preds = clf.predict_proba(X_train)[:, -1]
    test_preds = clf.predict_proba(tests)[:, -1]
    test_preds_array += test_preds / n_splits
    valid_preds_array[valid_idx] = valid_preds
    
    score_valid = metrics.roc_auc_score(y_valid, valid_preds)
    score_train = metrics.roc_auc_score(y_train, train_preds)
    
    scores_valid.append(score_valid)
    scores_train.append(score_train)



[100]	valid_0's auc: 0.995327
[200]	valid_0's auc: 0.996863
[300]	valid_0's auc: 0.99757
[400]	valid_0's auc: 0.99774
[500]	valid_0's auc: 0.998038
[600]	valid_0's auc: 0.998188
[700]	valid_0's auc: 0.998208
[800]	valid_0's auc: 0.998242
[900]	valid_0's auc: 0.998274
[1000]	valid_0's auc: 0.998263
[1100]	valid_0's auc: 0.998288
[1200]	valid_0's auc: 0.998277
[1300]	valid_0's auc: 0.998266
[1400]	valid_0's auc: 0.998253
[1500]	valid_0's auc: 0.998243
[1600]	valid_0's auc: 0.998251
[1700]	valid_0's auc: 0.998264
[1800]	valid_0's auc: 0.998275
[1900]	valid_0's auc: 0.998273
[2000]	valid_0's auc: 0.998262
[2100]	valid_0's auc: 0.998251
[2200]	valid_0's auc: 0.998239
[2300]	valid_0's auc: 0.998229
[2400]	valid_0's auc: 0.998208
[2500]	valid_0's auc: 0.998202
[2600]	valid_0's auc: 0.998187
[2700]	valid_0's auc: 0.998172
[2800]	valid_0's auc: 0.998173
[2900]	valid_0's auc: 0.998182
[3000]	valid_0's auc: 0.998186
[3100]	valid_0's auc: 0.998188
[3200]	valid_0's auc: 0.998182
[3300]	valid_0's au



[100]	valid_0's auc: 0.996126
[200]	valid_0's auc: 0.996647
[300]	valid_0's auc: 0.99767
[400]	valid_0's auc: 0.997983
[500]	valid_0's auc: 0.998187
[600]	valid_0's auc: 0.998203
[700]	valid_0's auc: 0.998216
[800]	valid_0's auc: 0.998356
[900]	valid_0's auc: 0.998396
[1000]	valid_0's auc: 0.998399
[1100]	valid_0's auc: 0.998407
[1200]	valid_0's auc: 0.998391
[1300]	valid_0's auc: 0.998381
[1400]	valid_0's auc: 0.998389
[1500]	valid_0's auc: 0.998373
[1600]	valid_0's auc: 0.998376
[1700]	valid_0's auc: 0.998373
[1800]	valid_0's auc: 0.998385
[1900]	valid_0's auc: 0.99838
[2000]	valid_0's auc: 0.998375
[2100]	valid_0's auc: 0.998358
[2200]	valid_0's auc: 0.998368
[2300]	valid_0's auc: 0.998361
[2400]	valid_0's auc: 0.998371
[2500]	valid_0's auc: 0.998374
[2600]	valid_0's auc: 0.998385
[2700]	valid_0's auc: 0.998373
[2800]	valid_0's auc: 0.998373
[2900]	valid_0's auc: 0.998366
[3000]	valid_0's auc: 0.99836
[3100]	valid_0's auc: 0.998359
[3200]	valid_0's auc: 0.998358
[3300]	valid_0's auc



[100]	valid_0's auc: 0.996246
[200]	valid_0's auc: 0.997201
[300]	valid_0's auc: 0.998158
[400]	valid_0's auc: 0.998412
[500]	valid_0's auc: 0.998683
[600]	valid_0's auc: 0.998749
[700]	valid_0's auc: 0.998772
[800]	valid_0's auc: 0.998785
[900]	valid_0's auc: 0.998772
[1000]	valid_0's auc: 0.998748
[1100]	valid_0's auc: 0.998746
[1200]	valid_0's auc: 0.998718
[1300]	valid_0's auc: 0.998684
[1400]	valid_0's auc: 0.998661
[1500]	valid_0's auc: 0.998648
[1600]	valid_0's auc: 0.998636
[1700]	valid_0's auc: 0.998623
[1800]	valid_0's auc: 0.998607
[1900]	valid_0's auc: 0.9986
[2000]	valid_0's auc: 0.998603
[2100]	valid_0's auc: 0.998584
[2200]	valid_0's auc: 0.998561
[2300]	valid_0's auc: 0.998539
[2400]	valid_0's auc: 0.998513
[2500]	valid_0's auc: 0.998504
[2600]	valid_0's auc: 0.998492
[2700]	valid_0's auc: 0.998487
[2800]	valid_0's auc: 0.998478
[2900]	valid_0's auc: 0.998476
[3000]	valid_0's auc: 0.998466
[3100]	valid_0's auc: 0.998469
[3200]	valid_0's auc: 0.998462
[3300]	valid_0's au



[100]	valid_0's auc: 0.997554
[200]	valid_0's auc: 0.997974
[300]	valid_0's auc: 0.998763
[400]	valid_0's auc: 0.998897
[500]	valid_0's auc: 0.999018
[600]	valid_0's auc: 0.999055
[700]	valid_0's auc: 0.999067
[800]	valid_0's auc: 0.999083
[900]	valid_0's auc: 0.999085
[1000]	valid_0's auc: 0.999075
[1100]	valid_0's auc: 0.999084
[1200]	valid_0's auc: 0.9991
[1300]	valid_0's auc: 0.999099
[1400]	valid_0's auc: 0.999079
[1500]	valid_0's auc: 0.999066
[1600]	valid_0's auc: 0.999062
[1700]	valid_0's auc: 0.99905
[1800]	valid_0's auc: 0.999042
[1900]	valid_0's auc: 0.999036
[2000]	valid_0's auc: 0.999037
[2100]	valid_0's auc: 0.999039
[2200]	valid_0's auc: 0.999042
[2300]	valid_0's auc: 0.999038
[2400]	valid_0's auc: 0.99904
[2500]	valid_0's auc: 0.999041
[2600]	valid_0's auc: 0.999048
[2700]	valid_0's auc: 0.999038
[2800]	valid_0's auc: 0.999042
[2900]	valid_0's auc: 0.99904
[3000]	valid_0's auc: 0.999042
[3100]	valid_0's auc: 0.999044
[3200]	valid_0's auc: 0.999045
[3300]	valid_0's auc: 



[100]	valid_0's auc: 0.997697
[200]	valid_0's auc: 0.997879
[300]	valid_0's auc: 0.998357
[400]	valid_0's auc: 0.998464
[500]	valid_0's auc: 0.998689
[600]	valid_0's auc: 0.99884
[700]	valid_0's auc: 0.998909
[800]	valid_0's auc: 0.99893
[900]	valid_0's auc: 0.99897
[1000]	valid_0's auc: 0.99898
[1100]	valid_0's auc: 0.998988
[1200]	valid_0's auc: 0.998983
[1300]	valid_0's auc: 0.998991
[1400]	valid_0's auc: 0.998992
[1500]	valid_0's auc: 0.99899
[1600]	valid_0's auc: 0.998989
[1700]	valid_0's auc: 0.998998
[1800]	valid_0's auc: 0.999
[1900]	valid_0's auc: 0.998996
[2000]	valid_0's auc: 0.998998
[2100]	valid_0's auc: 0.998999
[2200]	valid_0's auc: 0.999007
[2300]	valid_0's auc: 0.998997
[2400]	valid_0's auc: 0.998997
[2500]	valid_0's auc: 0.998998
[2600]	valid_0's auc: 0.999008
[2700]	valid_0's auc: 0.999017
[2800]	valid_0's auc: 0.999024
[2900]	valid_0's auc: 0.999016
[3000]	valid_0's auc: 0.999013
[3100]	valid_0's auc: 0.999015
[3200]	valid_0's auc: 0.999021
[3300]	valid_0's auc: 0.9

In [16]:
submit(test_preds_array)
