# Setup

In [1]:
import gc
import time
import warnings
import subprocess

gc.enable()
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd

pd.set_option('display.max_columns', None)

from sklearn.metrics import f1_score
from sklearn.model_selection import StratifiedKFold

import xgboost as xgb
import optuna

SEED = 55

In [2]:
assert xgb.__version__ == '2.0.2', 'XGBoost version differs from original notebook.' 

In [3]:
#Check GPU availability
try:
    subprocess.check_output('nvidia-smi')
    DEVICE = 'cuda'
except Exception:
    DEVICE = 'cpu'

print(f'Available device: {DEVICE}')

Available device: cuda


In [4]:
DATA_DIR = '/kaggle/input/bitgrit-ai-generated-text-classification/ai-text-competition'
train = pd.read_csv(f'{DATA_DIR}/training_set.csv')
test = pd.read_csv(f'{DATA_DIR}/test_set.csv')
sample_sub = pd.read_csv(f'{DATA_DIR}/solution_format.csv')

# Data overview

In [5]:
train.sample()

Unnamed: 0,ID,feature_0,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,feature_10,feature_11,feature_12,feature_13,feature_14,feature_15,feature_16,feature_17,feature_18,feature_19,feature_20,feature_21,feature_22,feature_23,feature_24,feature_25,feature_26,feature_27,feature_28,feature_29,feature_30,feature_31,feature_32,feature_33,feature_34,feature_35,feature_36,feature_37,feature_38,feature_39,feature_40,feature_41,feature_42,feature_43,feature_44,feature_45,feature_46,feature_47,feature_48,feature_49,feature_50,feature_51,feature_52,feature_53,feature_54,feature_55,feature_56,feature_57,feature_58,feature_59,feature_60,feature_61,feature_62,feature_63,feature_64,feature_65,feature_66,feature_67,feature_68,feature_69,feature_70,feature_71,feature_72,feature_73,feature_74,feature_75,feature_76,feature_77,feature_78,feature_79,feature_80,feature_81,feature_82,feature_83,feature_84,feature_85,feature_86,feature_87,feature_88,feature_89,feature_90,feature_91,feature_92,feature_93,feature_94,feature_95,feature_96,feature_97,feature_98,feature_99,feature_100,feature_101,feature_102,feature_103,feature_104,feature_105,feature_106,feature_107,feature_108,feature_109,feature_110,feature_111,feature_112,feature_113,feature_114,feature_115,feature_116,feature_117,feature_118,feature_119,feature_120,feature_121,feature_122,feature_123,feature_124,feature_125,feature_126,feature_127,feature_128,feature_129,feature_130,feature_131,feature_132,feature_133,feature_134,feature_135,feature_136,feature_137,feature_138,feature_139,feature_140,feature_141,feature_142,feature_143,feature_144,feature_145,feature_146,feature_147,feature_148,feature_149,feature_150,feature_151,feature_152,feature_153,feature_154,feature_155,feature_156,feature_157,feature_158,feature_159,feature_160,feature_161,feature_162,feature_163,feature_164,feature_165,feature_166,feature_167,feature_168,feature_169,feature_170,feature_171,feature_172,feature_173,feature_174,feature_175,feature_176,feature_177,feature_178,feature_179,feature_180,feature_181,feature_182,feature_183,feature_184,feature_185,feature_186,feature_187,feature_188,feature_189,feature_190,feature_191,feature_192,feature_193,feature_194,feature_195,feature_196,feature_197,feature_198,feature_199,feature_200,feature_201,feature_202,feature_203,feature_204,feature_205,feature_206,feature_207,feature_208,feature_209,feature_210,feature_211,feature_212,feature_213,feature_214,feature_215,feature_216,feature_217,feature_218,feature_219,feature_220,feature_221,feature_222,feature_223,feature_224,feature_225,feature_226,feature_227,feature_228,feature_229,feature_230,feature_231,feature_232,feature_233,feature_234,feature_235,feature_236,feature_237,feature_238,feature_239,feature_240,feature_241,feature_242,feature_243,feature_244,feature_245,feature_246,feature_247,feature_248,feature_249,feature_250,feature_251,feature_252,feature_253,feature_254,feature_255,feature_256,feature_257,feature_258,feature_259,feature_260,feature_261,feature_262,feature_263,feature_264,feature_265,feature_266,feature_267,feature_268,feature_269,feature_270,feature_271,feature_272,feature_273,feature_274,feature_275,feature_276,feature_277,feature_278,feature_279,feature_280,feature_281,feature_282,feature_283,feature_284,feature_285,feature_286,feature_287,feature_288,feature_289,feature_290,feature_291,feature_292,feature_293,feature_294,feature_295,feature_296,feature_297,feature_298,feature_299,feature_300,feature_301,feature_302,feature_303,feature_304,feature_305,feature_306,feature_307,feature_308,feature_309,feature_310,feature_311,feature_312,feature_313,feature_314,feature_315,feature_316,feature_317,feature_318,feature_319,feature_320,feature_321,feature_322,feature_323,feature_324,feature_325,feature_326,feature_327,feature_328,feature_329,feature_330,feature_331,feature_332,feature_333,feature_334,feature_335,feature_336,feature_337,feature_338,feature_339,feature_340,feature_341,feature_342,feature_343,feature_344,feature_345,feature_346,feature_347,feature_348,feature_349,feature_350,feature_351,feature_352,feature_353,feature_354,feature_355,feature_356,feature_357,feature_358,feature_359,feature_360,feature_361,feature_362,feature_363,feature_364,feature_365,feature_366,feature_367,feature_368,feature_369,feature_370,feature_371,feature_372,feature_373,feature_374,feature_375,feature_376,feature_377,feature_378,feature_379,feature_380,feature_381,feature_382,feature_383,feature_384,feature_385,feature_386,feature_387,feature_388,feature_389,feature_390,feature_391,feature_392,feature_393,feature_394,feature_395,feature_396,feature_397,feature_398,feature_399,feature_400,feature_401,feature_402,feature_403,feature_404,feature_405,feature_406,feature_407,feature_408,feature_409,feature_410,feature_411,feature_412,feature_413,feature_414,feature_415,feature_416,feature_417,feature_418,feature_419,feature_420,feature_421,feature_422,feature_423,feature_424,feature_425,feature_426,feature_427,feature_428,feature_429,feature_430,feature_431,feature_432,feature_433,feature_434,feature_435,feature_436,feature_437,feature_438,feature_439,feature_440,feature_441,feature_442,feature_443,feature_444,feature_445,feature_446,feature_447,feature_448,feature_449,feature_450,feature_451,feature_452,feature_453,feature_454,feature_455,feature_456,feature_457,feature_458,feature_459,feature_460,feature_461,feature_462,feature_463,feature_464,feature_465,feature_466,feature_467,feature_468,feature_469,feature_470,feature_471,feature_472,feature_473,feature_474,feature_475,feature_476,feature_477,feature_478,feature_479,feature_480,feature_481,feature_482,feature_483,feature_484,feature_485,feature_486,feature_487,feature_488,feature_489,feature_490,feature_491,feature_492,feature_493,feature_494,feature_495,feature_496,feature_497,feature_498,feature_499,feature_500,feature_501,feature_502,feature_503,feature_504,feature_505,feature_506,feature_507,feature_508,feature_509,feature_510,feature_511,feature_512,feature_513,feature_514,feature_515,feature_516,feature_517,feature_518,feature_519,feature_520,feature_521,feature_522,feature_523,feature_524,feature_525,feature_526,feature_527,feature_528,feature_529,feature_530,feature_531,feature_532,feature_533,feature_534,feature_535,feature_536,feature_537,feature_538,feature_539,feature_540,feature_541,feature_542,feature_543,feature_544,feature_545,feature_546,feature_547,feature_548,feature_549,feature_550,feature_551,feature_552,feature_553,feature_554,feature_555,feature_556,feature_557,feature_558,feature_559,feature_560,feature_561,feature_562,feature_563,feature_564,feature_565,feature_566,feature_567,feature_568,feature_569,feature_570,feature_571,feature_572,feature_573,feature_574,feature_575,feature_576,feature_577,feature_578,feature_579,feature_580,feature_581,feature_582,feature_583,feature_584,feature_585,feature_586,feature_587,feature_588,feature_589,feature_590,feature_591,feature_592,feature_593,feature_594,feature_595,feature_596,feature_597,feature_598,feature_599,feature_600,feature_601,feature_602,feature_603,feature_604,feature_605,feature_606,feature_607,feature_608,feature_609,feature_610,feature_611,feature_612,feature_613,feature_614,feature_615,feature_616,feature_617,feature_618,feature_619,feature_620,feature_621,feature_622,feature_623,feature_624,feature_625,feature_626,feature_627,feature_628,feature_629,feature_630,feature_631,feature_632,feature_633,feature_634,feature_635,feature_636,feature_637,feature_638,feature_639,feature_640,feature_641,feature_642,feature_643,feature_644,feature_645,feature_646,feature_647,feature_648,feature_649,feature_650,feature_651,feature_652,feature_653,feature_654,feature_655,feature_656,feature_657,feature_658,feature_659,feature_660,feature_661,feature_662,feature_663,feature_664,feature_665,feature_666,feature_667,feature_668,feature_669,feature_670,feature_671,feature_672,feature_673,feature_674,feature_675,feature_676,feature_677,feature_678,feature_679,feature_680,feature_681,feature_682,feature_683,feature_684,feature_685,feature_686,feature_687,feature_688,feature_689,feature_690,feature_691,feature_692,feature_693,feature_694,feature_695,feature_696,feature_697,feature_698,feature_699,feature_700,feature_701,feature_702,feature_703,feature_704,feature_705,feature_706,feature_707,feature_708,feature_709,feature_710,feature_711,feature_712,feature_713,feature_714,feature_715,feature_716,feature_717,feature_718,feature_719,feature_720,feature_721,feature_722,feature_723,feature_724,feature_725,feature_726,feature_727,feature_728,feature_729,feature_730,feature_731,feature_732,feature_733,feature_734,feature_735,feature_736,feature_737,feature_738,feature_739,feature_740,feature_741,feature_742,feature_743,feature_744,feature_745,feature_746,feature_747,feature_748,feature_749,feature_750,feature_751,feature_752,feature_753,feature_754,feature_755,feature_756,feature_757,feature_758,feature_759,feature_760,feature_761,feature_762,feature_763,feature_764,feature_765,feature_766,feature_767,word_count,punc_num,ind
833,834,0.41402,-0.211115,-0.542835,0.896299,-0.192674,-0.425347,-0.044464,-0.207561,-0.150235,-1.290869,-0.030934,0.672473,0.463257,1.023528,0.925962,-0.533944,-0.208382,0.252772,-0.063508,-0.322795,0.621087,0.17163,0.762714,0.60621,-0.192086,-0.274257,-0.695927,0.931262,-0.386792,-3.515397,0.005377,0.462879,-0.332741,0.362495,-1.630477,0.423551,0.021005,0.651017,0.281251,0.309308,1.031338,-0.423041,0.214852,0.108982,0.60597,0.427271,0.101028,0.678833,-0.383192,0.904867,0.08037,0.870021,-0.957932,0.338204,-0.086591,-0.138038,1.078137,-0.090818,0.289745,0.62071,-0.091079,0.077216,0.180344,0.173244,-0.598949,-0.123513,0.35203,0.533909,-0.344323,0.115894,-1.519462,-1.136882,-0.61557,0.603213,-1.325865,-0.033487,0.184995,-0.113002,0.431285,0.367384,-0.363776,1.059848,0.694832,-0.423929,0.408611,-0.728513,-0.23101,-0.932097,-0.440387,0.507319,0.197318,-0.669899,0.061944,0.338764,-0.100117,0.271391,-0.467201,-1.092311,0.060493,-0.308073,0.621335,1.833385,-1.158783,0.470117,0.666723,0.474807,1.144038,-0.097792,0.364004,0.046096,-0.36752,-0.479899,0.159016,-0.218281,-0.483236,-0.886863,1.804223,-0.536402,2.189987,0.077321,1.005472,1.246561,-1.213178,0.651832,0.418483,-0.000855,-0.417139,0.043001,-0.736589,-0.193248,0.877064,0.718176,0.305983,0.49472,0.380716,-0.238301,-0.058658,0.034142,0.705011,0.170205,-0.07588,0.054413,-0.784271,0.284808,0.358358,-0.26929,0.081674,0.142388,-0.078451,-0.410475,-0.980174,-0.357513,0.000248,0.677407,0.731376,-0.004135,-0.892564,0.199987,0.398944,-0.174991,-0.259562,-0.778651,1.483641,-0.042264,0.481113,-0.150417,0.412872,-0.077124,0.334844,0.336016,0.036229,-0.100859,-1.296666,0.109232,-0.155821,0.188526,-0.47285,0.100613,0.452625,-0.012218,0.863033,-0.157464,0.592987,0.067508,0.068705,0.719633,0.510567,0.181362,0.219931,-0.265595,0.970196,-0.292108,0.193529,0.25617,-0.429508,-0.033058,0.086595,1.433937,1.134597,-1.419709,0.753579,-0.295652,-0.085913,0.170835,0.316365,0.09791,-0.164237,-0.167086,-0.237938,0.034516,0.756636,0.156071,-0.080315,-0.386792,0.069442,0.392459,-0.473456,-1.174651,-0.654227,-0.047667,0.481141,-0.464258,0.689747,0.042263,0.348786,1.101672,0.098807,1.375736,0.740411,0.027911,-0.079157,-0.084778,0.231215,0.055349,0.435232,0.758216,0.253321,0.503151,-0.3715,-0.016674,0.458857,1.296194,0.631635,1.16894,-0.682112,0.870992,-0.595158,0.116517,0.638597,-0.758019,0.184635,-0.125847,-1.46758,-0.563515,-0.580162,-0.447981,0.718146,0.628284,0.83056,0.109144,0.652051,-0.818344,-0.515745,-0.478372,0.07584,-0.262004,-0.054722,0.145553,0.032812,-0.380523,0.014531,0.369456,-1.18397,-0.091224,-0.765781,0.22141,0.684387,0.827938,-0.886831,0.260366,0.379912,1.107598,0.369293,-0.091749,-0.021374,-0.621916,0.291121,-0.333547,0.512281,0.385213,0.455046,-0.127441,-0.789591,0.409175,-0.29694,-0.280123,-1.824022,0.13947,-0.290617,0.41604,0.554359,0.315595,0.343314,0.233453,0.638615,0.407741,0.418619,1.064611,-0.270929,-0.512826,-0.300933,-0.672953,0.068041,0.835963,0.478261,0.981018,0.561953,-0.153128,0.398378,0.949836,0.039739,0.021524,-0.143767,-0.423817,0.315542,-0.051077,0.718249,0.18306,0.689048,0.409469,-0.071204,-0.843249,0.958536,0.316143,0.439777,0.251167,0.227383,0.098643,-0.004001,-0.09509,0.153389,0.377048,0.394949,0.631512,0.973243,-0.033557,0.341429,-1.376284,-0.079207,0.654685,-0.518306,-1.510298,0.54964,0.211154,0.154335,-0.033523,0.580713,-0.089844,-0.401882,-0.281183,0.808501,-0.263304,0.008482,-0.171718,-0.802821,0.128745,0.12259,0.938912,0.510383,-0.695606,0.790225,0.316343,-0.38773,0.420931,0.191397,-0.159797,0.191247,0.68632,0.777134,1.171477,-1.164415,-0.264297,-0.176827,0.031221,0.614688,-0.196633,0.690069,-0.69939,-1.313078,-0.284796,0.507564,-0.34526,0.382839,-1.148362,1.139614,0.593462,-0.274983,0.028374,0.883085,0.61663,0.634831,0.526421,0.505498,0.639954,-0.450291,0.801123,0.337242,0.371182,-0.165385,-0.123729,0.520124,0.580865,0.502214,1.841873,0.538769,-0.034808,-0.013899,0.537476,-0.48179,-0.72875,-0.019402,0.119541,-0.260824,-0.925817,0.039135,0.414851,0.514653,0.610804,-0.099647,1.728938,0.061755,0.320859,-0.435901,0.531435,0.221982,0.678095,0.262474,-0.571343,-1.025581,0.809823,0.212023,-0.010581,-0.45873,-0.134558,-0.687269,-0.337856,1.089451,0.286052,0.454763,1.2003,-0.139102,-0.093876,-0.160592,-0.632902,0.760659,0.499407,0.099048,0.658605,-0.685994,-0.23032,0.11117,-0.46751,-1.140183,0.709575,-0.238591,-0.028701,0.235656,-0.046934,-0.014815,-0.082576,-0.623214,-0.483148,0.075351,-0.175206,0.022032,0.865935,0.469935,0.038166,1.458851,-0.153023,0.142607,0.358606,-0.70753,1.010455,-0.332265,0.17057,1.190524,-0.198252,0.617282,1.08017,0.786698,-0.382947,0.143503,0.369978,0.378086,0.716019,-0.748203,2.773726,-0.018383,0.966321,-0.543926,0.291932,0.566261,0.242664,0.657731,0.039383,0.491434,0.170973,0.555846,0.663868,-1.273844,-0.421569,0.369744,-0.664434,-0.657428,0.105889,-0.062712,0.28465,-0.548065,0.1271,0.181665,-0.340763,0.761905,-0.33909,0.016716,-0.41174,0.421228,-0.080052,0.557611,0.295294,-0.294127,-0.422513,-0.045168,0.217112,0.225421,-0.051739,0.134845,-0.051139,-0.116575,-0.224281,-0.884974,0.306275,-0.117023,0.705204,0.052762,-0.251307,0.13667,-0.08181,0.878977,0.36985,-0.048589,-0.627069,0.301949,0.015266,-1.066789,-0.297769,-0.032443,0.940052,0.442279,0.254881,-0.098997,0.224483,0.115401,0.550974,-0.540535,-0.142587,-0.424359,-0.637575,0.248967,0.001666,0.420118,0.103062,-1.106021,0.799821,-5.777321,-0.668695,-0.473862,1.458275,-1.110842,1.127766,0.802806,0.21882,0.283155,-0.647439,-0.476882,-0.551988,1.002572,-0.70039,0.658218,0.614094,0.063225,0.440963,-0.507736,-1.269953,0.245025,-0.397629,0.910444,0.890278,-0.503741,-0.024489,-0.219819,0.835634,0.238652,0.34729,-0.422116,-0.014529,0.099564,0.016086,-0.399335,0.043892,-0.567478,-0.576587,-0.272353,-1.205071,0.043227,0.319697,-0.541157,0.365989,-0.190721,0.524701,-0.416743,-0.139706,-0.26725,-0.304516,0.463398,-0.704375,-0.347091,0.707607,-0.125757,1.288085,-0.911992,-0.19887,-0.059182,1.175936,0.706815,-0.239567,0.147186,-0.463118,-0.536549,-0.547454,0.582501,-0.092,0.01072,0.175563,0.174867,0.57235,-0.406864,0.149599,-0.314391,-0.132239,0.504657,-0.070219,-0.395279,0.278238,0.636049,-0.348848,0.48107,0.4495,0.786169,-0.198574,-0.194881,-0.119414,-0.008245,-0.511178,-0.398457,-0.028002,0.22562,0.658592,0.381188,-0.039984,-0.016215,-0.190175,0.139006,0.659664,-0.97361,-0.251526,-0.209068,0.636039,0.778909,-0.469332,0.452632,0.321501,-0.067667,0.620906,0.349333,0.011656,-0.36671,-0.132167,-1.021827,-0.297466,-0.357843,-0.102109,0.972442,-0.33669,-0.444353,-0.62524,0.422322,0.125375,2.399484,-0.151335,0.57014,-0.244007,-0.334735,-0.051368,0.22138,1.156465,0.627731,0.316502,0.781909,-0.209593,0.286755,-0.625945,0.058002,0.206629,1.072657,0.59065,-0.452362,0.272913,0.419389,-0.337151,-1.165218,0.470745,0.081452,1.228373,0.222479,0.879701,0.177988,0.420731,0.022618,0.10612,-0.738845,0.146839,0.14027,0.030748,0.035907,0.567382,-0.474634,-0.230101,-0.107407,0.526047,-0.235907,0.129717,0.816129,0.766018,-0.433946,0.340324,0.329732,0.015976,0.023313,0.100774,0.006856,-0.469997,0.078984,0.227086,-0.838715,0.135514,0.410342,0.019354,0.439374,0.834615,0.267236,0.638203,0.739519,0.279298,0.291648,0.439581,-0.14234,0.046414,19,9,1


In [6]:
test.sample()

Unnamed: 0,ID,feature_0,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,feature_10,feature_11,feature_12,feature_13,feature_14,feature_15,feature_16,feature_17,feature_18,feature_19,feature_20,feature_21,feature_22,feature_23,feature_24,feature_25,feature_26,feature_27,feature_28,feature_29,feature_30,feature_31,feature_32,feature_33,feature_34,feature_35,feature_36,feature_37,feature_38,feature_39,feature_40,feature_41,feature_42,feature_43,feature_44,feature_45,feature_46,feature_47,feature_48,feature_49,feature_50,feature_51,feature_52,feature_53,feature_54,feature_55,feature_56,feature_57,feature_58,feature_59,feature_60,feature_61,feature_62,feature_63,feature_64,feature_65,feature_66,feature_67,feature_68,feature_69,feature_70,feature_71,feature_72,feature_73,feature_74,feature_75,feature_76,feature_77,feature_78,feature_79,feature_80,feature_81,feature_82,feature_83,feature_84,feature_85,feature_86,feature_87,feature_88,feature_89,feature_90,feature_91,feature_92,feature_93,feature_94,feature_95,feature_96,feature_97,feature_98,feature_99,feature_100,feature_101,feature_102,feature_103,feature_104,feature_105,feature_106,feature_107,feature_108,feature_109,feature_110,feature_111,feature_112,feature_113,feature_114,feature_115,feature_116,feature_117,feature_118,feature_119,feature_120,feature_121,feature_122,feature_123,feature_124,feature_125,feature_126,feature_127,feature_128,feature_129,feature_130,feature_131,feature_132,feature_133,feature_134,feature_135,feature_136,feature_137,feature_138,feature_139,feature_140,feature_141,feature_142,feature_143,feature_144,feature_145,feature_146,feature_147,feature_148,feature_149,feature_150,feature_151,feature_152,feature_153,feature_154,feature_155,feature_156,feature_157,feature_158,feature_159,feature_160,feature_161,feature_162,feature_163,feature_164,feature_165,feature_166,feature_167,feature_168,feature_169,feature_170,feature_171,feature_172,feature_173,feature_174,feature_175,feature_176,feature_177,feature_178,feature_179,feature_180,feature_181,feature_182,feature_183,feature_184,feature_185,feature_186,feature_187,feature_188,feature_189,feature_190,feature_191,feature_192,feature_193,feature_194,feature_195,feature_196,feature_197,feature_198,feature_199,feature_200,feature_201,feature_202,feature_203,feature_204,feature_205,feature_206,feature_207,feature_208,feature_209,feature_210,feature_211,feature_212,feature_213,feature_214,feature_215,feature_216,feature_217,feature_218,feature_219,feature_220,feature_221,feature_222,feature_223,feature_224,feature_225,feature_226,feature_227,feature_228,feature_229,feature_230,feature_231,feature_232,feature_233,feature_234,feature_235,feature_236,feature_237,feature_238,feature_239,feature_240,feature_241,feature_242,feature_243,feature_244,feature_245,feature_246,feature_247,feature_248,feature_249,feature_250,feature_251,feature_252,feature_253,feature_254,feature_255,feature_256,feature_257,feature_258,feature_259,feature_260,feature_261,feature_262,feature_263,feature_264,feature_265,feature_266,feature_267,feature_268,feature_269,feature_270,feature_271,feature_272,feature_273,feature_274,feature_275,feature_276,feature_277,feature_278,feature_279,feature_280,feature_281,feature_282,feature_283,feature_284,feature_285,feature_286,feature_287,feature_288,feature_289,feature_290,feature_291,feature_292,feature_293,feature_294,feature_295,feature_296,feature_297,feature_298,feature_299,feature_300,feature_301,feature_302,feature_303,feature_304,feature_305,feature_306,feature_307,feature_308,feature_309,feature_310,feature_311,feature_312,feature_313,feature_314,feature_315,feature_316,feature_317,feature_318,feature_319,feature_320,feature_321,feature_322,feature_323,feature_324,feature_325,feature_326,feature_327,feature_328,feature_329,feature_330,feature_331,feature_332,feature_333,feature_334,feature_335,feature_336,feature_337,feature_338,feature_339,feature_340,feature_341,feature_342,feature_343,feature_344,feature_345,feature_346,feature_347,feature_348,feature_349,feature_350,feature_351,feature_352,feature_353,feature_354,feature_355,feature_356,feature_357,feature_358,feature_359,feature_360,feature_361,feature_362,feature_363,feature_364,feature_365,feature_366,feature_367,feature_368,feature_369,feature_370,feature_371,feature_372,feature_373,feature_374,feature_375,feature_376,feature_377,feature_378,feature_379,feature_380,feature_381,feature_382,feature_383,feature_384,feature_385,feature_386,feature_387,feature_388,feature_389,feature_390,feature_391,feature_392,feature_393,feature_394,feature_395,feature_396,feature_397,feature_398,feature_399,feature_400,feature_401,feature_402,feature_403,feature_404,feature_405,feature_406,feature_407,feature_408,feature_409,feature_410,feature_411,feature_412,feature_413,feature_414,feature_415,feature_416,feature_417,feature_418,feature_419,feature_420,feature_421,feature_422,feature_423,feature_424,feature_425,feature_426,feature_427,feature_428,feature_429,feature_430,feature_431,feature_432,feature_433,feature_434,feature_435,feature_436,feature_437,feature_438,feature_439,feature_440,feature_441,feature_442,feature_443,feature_444,feature_445,feature_446,feature_447,feature_448,feature_449,feature_450,feature_451,feature_452,feature_453,feature_454,feature_455,feature_456,feature_457,feature_458,feature_459,feature_460,feature_461,feature_462,feature_463,feature_464,feature_465,feature_466,feature_467,feature_468,feature_469,feature_470,feature_471,feature_472,feature_473,feature_474,feature_475,feature_476,feature_477,feature_478,feature_479,feature_480,feature_481,feature_482,feature_483,feature_484,feature_485,feature_486,feature_487,feature_488,feature_489,feature_490,feature_491,feature_492,feature_493,feature_494,feature_495,feature_496,feature_497,feature_498,feature_499,feature_500,feature_501,feature_502,feature_503,feature_504,feature_505,feature_506,feature_507,feature_508,feature_509,feature_510,feature_511,feature_512,feature_513,feature_514,feature_515,feature_516,feature_517,feature_518,feature_519,feature_520,feature_521,feature_522,feature_523,feature_524,feature_525,feature_526,feature_527,feature_528,feature_529,feature_530,feature_531,feature_532,feature_533,feature_534,feature_535,feature_536,feature_537,feature_538,feature_539,feature_540,feature_541,feature_542,feature_543,feature_544,feature_545,feature_546,feature_547,feature_548,feature_549,feature_550,feature_551,feature_552,feature_553,feature_554,feature_555,feature_556,feature_557,feature_558,feature_559,feature_560,feature_561,feature_562,feature_563,feature_564,feature_565,feature_566,feature_567,feature_568,feature_569,feature_570,feature_571,feature_572,feature_573,feature_574,feature_575,feature_576,feature_577,feature_578,feature_579,feature_580,feature_581,feature_582,feature_583,feature_584,feature_585,feature_586,feature_587,feature_588,feature_589,feature_590,feature_591,feature_592,feature_593,feature_594,feature_595,feature_596,feature_597,feature_598,feature_599,feature_600,feature_601,feature_602,feature_603,feature_604,feature_605,feature_606,feature_607,feature_608,feature_609,feature_610,feature_611,feature_612,feature_613,feature_614,feature_615,feature_616,feature_617,feature_618,feature_619,feature_620,feature_621,feature_622,feature_623,feature_624,feature_625,feature_626,feature_627,feature_628,feature_629,feature_630,feature_631,feature_632,feature_633,feature_634,feature_635,feature_636,feature_637,feature_638,feature_639,feature_640,feature_641,feature_642,feature_643,feature_644,feature_645,feature_646,feature_647,feature_648,feature_649,feature_650,feature_651,feature_652,feature_653,feature_654,feature_655,feature_656,feature_657,feature_658,feature_659,feature_660,feature_661,feature_662,feature_663,feature_664,feature_665,feature_666,feature_667,feature_668,feature_669,feature_670,feature_671,feature_672,feature_673,feature_674,feature_675,feature_676,feature_677,feature_678,feature_679,feature_680,feature_681,feature_682,feature_683,feature_684,feature_685,feature_686,feature_687,feature_688,feature_689,feature_690,feature_691,feature_692,feature_693,feature_694,feature_695,feature_696,feature_697,feature_698,feature_699,feature_700,feature_701,feature_702,feature_703,feature_704,feature_705,feature_706,feature_707,feature_708,feature_709,feature_710,feature_711,feature_712,feature_713,feature_714,feature_715,feature_716,feature_717,feature_718,feature_719,feature_720,feature_721,feature_722,feature_723,feature_724,feature_725,feature_726,feature_727,feature_728,feature_729,feature_730,feature_731,feature_732,feature_733,feature_734,feature_735,feature_736,feature_737,feature_738,feature_739,feature_740,feature_741,feature_742,feature_743,feature_744,feature_745,feature_746,feature_747,feature_748,feature_749,feature_750,feature_751,feature_752,feature_753,feature_754,feature_755,feature_756,feature_757,feature_758,feature_759,feature_760,feature_761,feature_762,feature_763,feature_764,feature_765,feature_766,feature_767,word_count,punc_num
1100,1101,0.307808,-0.786172,-1.561329,-0.670072,-0.242286,-0.026701,-0.022787,0.163617,1.208101,-1.364719,-0.189766,1.296737,0.360455,1.575003,0.587943,0.675638,-0.723825,0.352012,0.071418,-0.909582,0.238893,0.023727,-0.177057,1.008744,-0.167005,0.320222,-0.462056,1.662165,0.229634,-3.749775,0.276218,0.673991,-0.479723,-0.46212,-1.999454,-0.913919,0.569086,0.94158,0.576404,-0.488134,0.3664,-0.968323,0.368948,0.762702,1.190127,-0.676469,0.614995,0.805395,0.042942,0.979006,-0.769091,0.015604,1.945538,0.660968,0.693836,0.045512,0.75123,-0.527835,0.676591,0.230991,0.106094,0.591942,0.70331,0.6074,-0.197197,0.162199,-0.013876,0.43851,-1.147846,0.177865,-0.064095,-1.918758,-0.736752,0.817975,-0.876155,-0.45395,0.184613,-0.560381,-0.110392,0.029972,-0.076208,0.235355,1.799777,-0.088586,-0.156762,-0.933552,-0.285036,-0.392335,-1.29177,0.556375,-0.841981,-1.027009,-0.037054,-0.46451,0.474991,0.006598,-0.22986,-0.259191,-1.183398,-0.047375,0.447286,0.739205,-0.409573,0.909124,0.839154,-0.284412,0.639457,0.57002,0.235954,1.339269,0.210084,0.6311,1.126008,-0.191556,0.234378,-0.604263,-1.292198,-0.668224,1.675276,-0.278499,1.83673,1.235197,-0.75079,1.080551,0.87019,-0.17156,-0.174968,-0.310027,0.710439,-0.308355,1.406976,0.91565,-0.63565,-0.038235,0.914507,-1.096971,0.596505,-0.262716,0.508294,0.401064,0.295432,0.691014,-0.14717,-0.700034,0.770114,0.931449,-0.619413,-0.198776,0.743409,0.133863,0.514517,0.671378,0.231927,0.253108,1.205051,0.914893,-1.212938,-0.105317,0.082389,-0.252908,-0.961513,-0.253303,0.776516,-0.299306,-0.053612,0.404241,0.264902,-0.350596,-0.77933,-0.397946,0.310922,-0.407866,-1.743524,0.125725,-0.087035,-0.031432,0.505738,0.510008,-0.29467,0.121217,0.180375,0.696264,0.122675,-0.093858,-0.768359,1.177965,1.163585,1.092394,-0.345249,-0.787289,0.03342,-0.381117,0.828632,0.051923,0.324509,-0.051287,0.258317,0.094975,1.434422,-1.001876,0.655464,0.026001,0.162589,-0.394187,1.080247,-0.282962,-0.460398,0.395018,-0.518255,0.150621,1.121299,1.212766,-0.065542,0.658464,0.49764,-0.238905,-1.072027,-1.798038,-0.310988,-0.612467,-0.179585,-1.726777,-0.182014,-0.299677,0.096801,0.500402,0.548008,1.819748,-1.125219,0.281703,0.569385,0.314744,-0.294608,1.239386,0.093865,0.009209,0.481006,0.665567,0.086436,-1.414618,1.591303,1.960396,0.191963,0.705839,-0.996394,0.625152,0.217575,0.56715,1.386176,1.021917,0.711997,0.122052,-0.593028,-0.481557,-0.172196,0.704344,-0.295686,1.580058,0.151449,0.450599,-0.23466,-0.468053,-0.705263,0.417446,0.455318,-0.324487,0.631933,0.457327,-0.30845,0.486788,0.126213,0.048044,-0.831789,0.551683,-0.658261,0.065106,0.968252,0.940463,-0.048947,0.127222,-0.18625,1.380204,0.715522,-1.026978,-1.311366,-1.271726,-1.099811,0.237657,-0.02802,-0.421046,0.255065,1.028917,-0.599811,0.724929,0.725926,-0.913286,-0.42178,1.43842,0.161323,0.817743,0.753328,0.59719,0.59551,0.139575,-0.752436,-0.031696,-0.080565,0.134299,-0.887254,-0.089794,0.501069,-0.831169,-0.29465,0.523953,0.679585,1.406828,1.111514,0.707558,1.008039,0.797073,-0.689165,-0.408494,-0.179714,-0.156031,1.043237,0.22355,0.822633,0.445637,-0.220862,0.825709,0.042721,-0.124025,1.347099,0.272355,0.535365,-0.535626,-0.58054,-0.182365,-0.456508,-0.348546,0.019569,-0.239851,-0.62792,-0.188808,1.300405,1.000178,0.712684,-0.465431,-0.736207,0.766349,0.396124,-1.558108,-0.222656,0.646235,0.830496,0.282447,-0.29348,0.839664,0.810179,-0.46046,0.323613,-0.48044,-1.055966,0.715987,-1.047475,0.133532,0.832001,1.145657,0.190551,0.817555,0.509947,0.398076,0.737004,0.089762,0.373677,-0.431608,0.416727,0.356137,0.518018,1.164312,-1.082152,0.015543,0.221747,1.178969,0.790418,0.088777,1.025413,-0.793541,-2.09387,-0.487555,0.495834,0.610355,1.070535,-1.317952,1.793927,-0.904483,0.212579,0.083637,0.03615,0.12599,1.330311,-0.346852,0.512352,0.090257,-0.522716,1.2332,0.453648,0.399313,-0.545159,0.428235,0.338208,0.452575,-0.524294,0.553846,-0.67891,-0.355526,-0.67132,0.617557,-1.105249,-0.514725,-0.215273,-0.148861,-0.221483,-0.326862,0.935279,0.34655,0.415074,0.185389,0.161271,0.498708,-0.369164,0.668252,0.354004,1.356853,-0.083679,1.1065,-0.315444,-0.294432,-0.888849,0.874232,-0.636483,-0.138127,-0.677575,-0.641756,-0.037537,0.044408,1.655196,1.047784,0.489279,0.587565,-0.815423,-0.550733,0.009405,-0.705793,1.013933,0.681506,-0.641894,-0.957222,-0.359445,0.022266,-0.49834,-1.157427,-1.383441,0.07289,0.266356,-1.063681,-0.797832,0.15115,0.954761,-0.483674,-0.969356,0.657596,0.314101,-1.002267,-0.150086,0.567389,1.076071,0.298208,0.677618,0.226173,-0.046767,0.232576,-1.14115,0.926293,-1.220172,0.542743,-0.058011,0.022709,1.910938,1.07296,1.048508,-0.168474,1.134597,-0.012346,0.061242,1.206956,-0.427313,-1.439577,-0.11905,-0.039941,0.158572,0.481233,-0.528406,-0.345609,0.989375,0.11798,-0.198424,1.438674,0.966398,0.576953,-0.630815,0.68172,0.809063,-0.478146,-0.591655,-0.211116,0.00373,0.095886,-0.975099,-0.79081,0.168627,0.474245,0.494969,0.777483,0.128819,-0.524626,-0.129301,0.265128,0.086628,-0.029625,0.238718,-0.715322,0.032569,-0.880753,-1.491039,0.292804,0.084199,-0.102652,-1.542738,-0.205956,-1.408636,-0.359172,0.584839,0.395916,0.060746,-0.032919,0.450607,-0.298234,0.607139,0.225808,-0.685163,-1.320842,0.130312,-0.417995,-1.368195,0.192381,-0.181132,1.20864,0.850642,0.556653,0.083411,0.16664,-1.273346,0.363429,0.17219,-0.627423,1.005353,-0.713615,0.306941,0.418307,-0.845486,0.678971,-0.700391,1.15624,-3.575197,-0.44941,0.239369,0.230234,-1.227211,-0.645375,0.507681,0.639299,0.473194,0.132331,-0.594236,0.780919,1.219836,0.465357,-0.40448,1.192689,-1.264155,0.30042,-0.25085,-1.156979,0.39947,-0.215173,1.406709,0.703866,-0.378805,-1.044648,-0.417251,0.527026,-0.148499,-0.5438,-0.637205,0.253694,0.385764,0.276347,-0.110862,-0.661265,-0.983016,-1.221584,0.465552,-0.658297,-0.542181,0.476227,0.378827,1.133728,-0.980553,0.376026,-0.618858,0.438528,0.103531,0.291321,0.45926,0.38234,-0.334136,0.371798,0.43249,1.313176,-0.96392,-0.569255,-0.008427,0.403189,1.41061,-0.397883,0.465572,-0.73041,0.521171,-0.766606,-0.362348,1.073019,0.84802,0.372878,0.74561,0.570786,-0.355852,0.349873,-0.564169,0.101529,0.306887,0.318301,0.359895,-0.211221,0.612701,-0.537352,-0.055698,0.240444,0.311593,0.941498,0.015375,-0.036686,0.307078,-0.882615,-0.292049,-0.332619,-0.747255,0.491202,0.010559,0.205279,0.089963,0.444725,0.428601,0.605586,-0.070249,-0.504288,-1.099142,0.097041,0.955719,0.076179,0.971924,0.838466,-0.65898,0.557972,0.298349,0.801761,0.443536,-0.138522,-0.758143,-0.916182,-0.826998,0.032774,1.198611,-0.008556,-2.616447,-1.692981,0.35,-0.530282,3.558833,-0.186301,0.873197,-0.426804,0.060297,-1.416381,-0.43935,1.021185,0.253246,0.981619,0.759514,-0.342154,-0.042723,-0.557349,-1.001715,0.729983,1.756632,1.257782,0.33888,0.951786,-0.098566,0.291582,-0.68896,0.285626,0.760059,-1.16756,-0.038282,0.833756,0.304237,-0.386746,0.323763,0.76105,0.406018,0.099372,-0.00869,0.045719,-0.769652,-0.360762,-2.513613,-0.262304,0.3845,-1.213924,-0.28269,-0.110591,1.25753,-0.3663,0.865467,0.294629,-0.811364,-0.041597,-0.023515,-0.589815,-0.463525,1.177264,-0.107322,0.081022,-0.265983,0.34464,0.380686,-0.375821,1.985067,0.644705,-1.063448,0.810812,1.015124,0.339781,0.41187,0.370197,-0.223838,-0.189811,30,4


In [7]:
sample_sub.head()

Unnamed: 0,ID,ind
0,1,0
1,2,0
2,3,0
3,4,1
4,5,1


In [8]:
train.shape, test.shape, sample_sub.shape

((11144, 772), (2786, 771), (2786, 2))

In [9]:
train.isna().sum().sum(), test.isna().sum().sum()

(0, 0)

In [10]:
train = train.drop('ID', axis=1)
test = test.drop('ID', axis=1)

features = ['word_count', 'punc_num'] + [f'feature_{i}' for i in range(768)]
TARGET = 'ind'

In [11]:
train[TARGET].value_counts(normalize=True)

ind
0    0.901292
1    0.098708
Name: proportion, dtype: float64

# Modeling framework

In [12]:
def probs_to_labels(probs, threshold=0.5):
    return (probs >= threshold).astype('int')

In [13]:
def get_best_threshold(y_true, y_probs):
    candidates = np.arange(0, 1, 0.0005)
    scores = [f1_score(y_true, probs_to_labels(y_probs, t)) for t in candidates]
    best_threshold = candidates[np.argmax(scores)]
    return best_threshold

In [14]:
def custom_cv(features, model, folds=7, seed=SEED):
    X, y = train[features], train[TARGET]
    X_test = test[features]
    
    oof_probs = {}
    oof_preds = {}
    test_probs = {}
    test_preds = {}
    scores = []

    cv = StratifiedKFold(n_splits=folds, shuffle=True, random_state=seed)
    for fold, (train_ids, val_ids) in enumerate(cv.split(X, y)):
        X_train, y_train = X.iloc[train_ids], y.iloc[train_ids]
        X_val, y_val = X.iloc[val_ids], y.iloc[val_ids]

        model.fit(
            X_train, y_train,
            eval_set=[(X_val, y_val)],
            verbose=False)

        val_probs = model.predict_proba(X_val)[:, 1]
        oof_probs.update(dict(zip(val_ids, val_probs)))
        
        best_threshold = get_best_threshold(y_val, val_probs)
        val_preds = probs_to_labels(val_probs, best_threshold)
        oof_preds.update(dict(zip(val_ids, val_preds)))
        
        test_probs[f'fold{fold}'] = model.predict_proba(X_test)[:, 1]
        test_preds[f'fold{fold}'] = probs_to_labels(test_probs[f'fold{fold}'], best_threshold)

        score = f1_score(y_val, val_preds)
        scores.append(score)
        print(f'Fold #{fold}: {score:.5f} ({model.best_iteration} rounds)')        
        _ = gc.collect()

    test_probs = pd.DataFrame.from_dict(test_probs)
    test_probs['mean'] = test_probs.mean(axis=1) # mean of fold-wise probabilities
    
    test_preds = pd.DataFrame.from_dict(test_preds)
    test_preds['mode'] = test_preds.mode(axis=1)[0].astype('int') # mode of fold-wise predictions
    
    oof_probs = pd.Series(oof_probs).sort_index()
    oof_preds = pd.Series(oof_preds).sort_index()
    print(f'\nAvg. score: {np.mean(scores):.5f} +/- {np.std(scores):.5f}')
    print(f'OOF score: {f1_score(y, oof_preds):.5f}\n')
    
    return oof_probs, oof_preds, test_probs, test_preds

# Experiments

### Expt 1 - eval_metric='aucpr', full features

In [15]:
model1 = xgb.XGBClassifier(
    n_estimators=10000,
    learning_rate=0.01,
    early_stopping_rounds=200,
    objective='binary:logistic',
    eval_metric='aucpr',
    booster='gbtree',
    tree_method='hist',
    device=DEVICE,
    verbosity=0,
    n_jobs=-1,
    random_state=SEED)

In [16]:
%%time
preds1 = custom_cv(features, model1)

Fold #0: 0.62731 (856 rounds)
Fold #1: 0.61864 (3815 rounds)
Fold #2: 0.69403 (6018 rounds)
Fold #3: 0.71014 (2932 rounds)
Fold #4: 0.66421 (3651 rounds)
Fold #5: 0.66667 (2807 rounds)
Fold #6: 0.71942 (5582 rounds)

Avg. score: 0.67149 +/- 0.03610
OOF score: 0.67271

CPU times: user 10min 31s, sys: 1.5 s, total: 10min 32s
Wall time: 10min 14s


In [17]:
threshold1 = get_best_threshold(train[TARGET], preds1[0])
print(f'Best threshold: {threshold1}')
op1 = probs_to_labels(preds1[0], threshold1)
print(f'OOF score: {f1_score(train[TARGET], op1):.5f}\n')
tp1 = probs_to_labels(preds1[2]['mean'], threshold1)

Best threshold: 0.1315
OOF score: 0.65788



### Expt 2 - eval_metric='auc', full features

In [18]:
model2 = xgb.XGBClassifier(
    n_estimators=10000,
    learning_rate=0.01,
    early_stopping_rounds=200,
    objective='binary:logistic',
    eval_metric='auc',
    booster='gbtree',
    tree_method='hist',
    device=DEVICE,
    verbosity=0,
    n_jobs=-1,
    random_state=SEED)

In [19]:
%%time
preds2 = custom_cv(features, model2)

Fold #0: 0.64516 (2024 rounds)
Fold #1: 0.61603 (3500 rounds)
Fold #2: 0.66667 (1857 rounds)
Fold #3: 0.70290 (3001 rounds)
Fold #4: 0.64727 (2097 rounds)
Fold #5: 0.66667 (1840 rounds)
Fold #6: 0.71942 (5750 rounds)

Avg. score: 0.66630 +/- 0.03273
OOF score: 0.66774

CPU times: user 8min 51s, sys: 1.11 s, total: 8min 52s
Wall time: 8min 34s


In [20]:
threshold2 = get_best_threshold(train[TARGET], preds2[0])
print(f'Best threshold: {threshold2}')
op2 = probs_to_labels(preds2[0], threshold2)
print(f'OOF score: {f1_score(train[TARGET], op2):.5f}\n')
tp2 = probs_to_labels(preds2[2]['mean'], threshold2)

Best threshold: 0.1275
OOF score: 0.65650



### Expt 3 - eval_metric='aucpr', only embedding features

In [22]:
features[2], features[-1]

('feature_0', 'feature_767')

In [24]:
%%time
# using same model as Expt 1
preds3 = custom_cv(features[2:], model1)

Fold #0: 0.64567 (3001 rounds)
Fold #1: 0.61475 (1987 rounds)
Fold #2: 0.71429 (4172 rounds)
Fold #3: 0.66929 (1195 rounds)
Fold #4: 0.65414 (3344 rounds)
Fold #5: 0.66202 (2038 rounds)
Fold #6: 0.71062 (8824 rounds)

Avg. score: 0.66725 +/- 0.03277
OOF score: 0.66880

CPU times: user 10min 6s, sys: 1.09 s, total: 10min 8s
Wall time: 9min 49s


In [25]:
threshold3 = get_best_threshold(train[TARGET], preds3[0])
print(f'Best threshold: {threshold3}')
op3 = probs_to_labels(preds3[0], threshold3)
print(f'OOF score: {f1_score(train[TARGET], op3):.5f}\n')
tp3 = probs_to_labels(preds3[2]['mean'], threshold3)

Best threshold: 0.125
OOF score: 0.64916



### Expt 4 - eval_metric='aucpr', only non-embedding features

In [26]:
%%time
# using same model as Expt 1
preds4 = custom_cv(features[:2], model1)

Fold #0: 0.31351 (418 rounds)
Fold #1: 0.32558 (428 rounds)
Fold #2: 0.30380 (135 rounds)
Fold #3: 0.30978 (165 rounds)
Fold #4: 0.33952 (591 rounds)
Fold #5: 0.31325 (134 rounds)
Fold #6: 0.30693 (16 rounds)

Avg. score: 0.31605 +/- 0.01152
OOF score: 0.31508

CPU times: user 42.8 s, sys: 68 ms, total: 42.8 s
Wall time: 41.2 s


In [27]:
threshold4 = get_best_threshold(train[TARGET], preds4[0])
print(f'Best threshold: {threshold4}')
op4 = probs_to_labels(preds4[0], threshold4)
print(f'OOF score: {f1_score(train[TARGET], op4):.5f}\n')
tp4 = probs_to_labels(preds4[2]['mean'], threshold4)

Best threshold: 0.1665
OOF score: 0.30280



### Expt 5 - eval_metric='aucpr' + class balancing, full features

In [34]:
class_ratio = train[TARGET].value_counts()[0] / train[TARGET].value_counts()[1]

In [35]:
model5 = xgb.XGBClassifier(
    scale_pos_weight=class_ratio,
    n_estimators=10000,
    learning_rate=0.01,
    early_stopping_rounds=200,
    objective='binary:logistic',
    eval_metric='aucpr',
    booster='gbtree',
    tree_method='hist',
    device=DEVICE,
    verbosity=0,
    n_jobs=-1,
    random_state=SEED)

In [36]:
%%time
preds5 = custom_cv(features, model5)

Fold #0: 0.64662 (3772 rounds)
Fold #1: 0.62614 (2419 rounds)
Fold #2: 0.72535 (3745 rounds)
Fold #3: 0.68750 (4938 rounds)
Fold #4: 0.67568 (4021 rounds)
Fold #5: 0.64706 (2038 rounds)
Fold #6: 0.71739 (7608 rounds)

Avg. score: 0.67510 +/- 0.03477
OOF score: 0.67384

CPU times: user 12min 5s, sys: 1.27 s, total: 12min 7s
Wall time: 11min 48s


In [39]:
threshold5 = get_best_threshold(train[TARGET], preds5[0])
print(f'Best threshold: {threshold5}')
op5 = probs_to_labels(preds5[0], threshold5)
print(f'OOF score: {f1_score(train[TARGET], op5):.5f}\n')
tp5 = probs_to_labels(preds5[2]['mean'], threshold5)

Best threshold: 0.1685
OOF score: 0.66369



### Expt 6 - eval_metric='auc' + class balancing, full features

In [37]:
model6 = xgb.XGBClassifier(
    scale_pos_weight=class_ratio,
    n_estimators=10000,
    learning_rate=0.01,
    early_stopping_rounds=200,
    objective='binary:logistic',
    eval_metric='auc',
    booster='gbtree',
    tree_method='hist',
    device=DEVICE,
    verbosity=0,
    n_jobs=-1,
    random_state=SEED)

In [38]:
%%time
preds6 = custom_cv(features, model6)

Fold #0: 0.61600 (1113 rounds)
Fold #1: 0.61176 (1021 rounds)
Fold #2: 0.67153 (1716 rounds)
Fold #3: 0.69314 (3734 rounds)
Fold #4: 0.66667 (2222 rounds)
Fold #5: 0.65781 (2317 rounds)
Fold #6: 0.71475 (7499 rounds)

Avg. score: 0.66167 +/- 0.03489
OOF score: 0.66359

CPU times: user 8min 57s, sys: 774 ms, total: 8min 58s
Wall time: 8min 40s


In [40]:
threshold6 = get_best_threshold(train[TARGET], preds6[0])
print(f'Best threshold: {threshold6}')
op6 = probs_to_labels(preds6[0], threshold6)
print(f'OOF score: {f1_score(train[TARGET], op6):.5f}\n')
tp6 = probs_to_labels(preds6[2]['mean'], threshold6)

Best threshold: 0.281
OOF score: 0.64175



### Expt 7 - best config + different folds

In [41]:
%%time
preds7_1 = custom_cv(features, model5 ,folds=5)

Fold #0: 0.63980 (7404 rounds)
Fold #1: 0.67532 (5597 rounds)
Fold #2: 0.65641 (2081 rounds)
Fold #3: 0.67513 (6731 rounds)
Fold #4: 0.66508 (5166 rounds)

Avg. score: 0.66235 +/- 0.01329
OOF score: 0.66230

CPU times: user 10min 14s, sys: 627 ms, total: 10min 15s
Wall time: 10min 2s


In [42]:
%%time
preds7_2 = custom_cv(features, model5 ,folds=10)

Fold #0: 0.66667 (3776 rounds)
Fold #1: 0.63810 (2045 rounds)
Fold #2: 0.65909 (6429 rounds)
Fold #3: 0.72889 (2220 rounds)
Fold #4: 0.64975 (3267 rounds)
Fold #5: 0.69000 (1979 rounds)
Fold #6: 0.67677 (3808 rounds)
Fold #7: 0.66667 (2410 rounds)
Fold #8: 0.64130 (2490 rounds)
Fold #9: 0.71717 (3710 rounds)

Avg. score: 0.67344 +/- 0.02901
OOF score: 0.67441

CPU times: user 14min 40s, sys: 1.09 s, total: 14min 41s
Wall time: 14min 15s


In [43]:
%%time
preds7_3 = custom_cv(features, model5 ,folds=15)

Fold #0: 0.68702 (2078 rounds)
Fold #1: 0.67626 (2846 rounds)
Fold #2: 0.58209 (1147 rounds)
Fold #3: 0.60870 (2525 rounds)
Fold #4: 0.72581 (4169 rounds)
Fold #5: 0.77027 (1963 rounds)
Fold #6: 0.64062 (2587 rounds)
Fold #7: 0.71724 (2474 rounds)
Fold #8: 0.63492 (780 rounds)
Fold #9: 0.68657 (1709 rounds)
Fold #10: 0.65693 (1526 rounds)
Fold #11: 0.67187 (1073 rounds)
Fold #12: 0.65625 (1600 rounds)
Fold #13: 0.72464 (3650 rounds)
Fold #14: 0.71642 (5829 rounds)

Avg. score: 0.67704 +/- 0.04785
OOF score: 0.67873

CPU times: user 17min 46s, sys: 1.47 s, total: 17min 48s
Wall time: 17min 8s


### Expt 8 - best config + best folds + different seeds

In [46]:
%%time
preds8_1 = custom_cv(features, model5, folds=15, seed=2311)

Fold #0: 0.64103 (2098 rounds)
Fold #1: 0.61111 (2147 rounds)
Fold #2: 0.72868 (1197 rounds)
Fold #3: 0.64662 (1165 rounds)
Fold #4: 0.60000 (105 rounds)
Fold #5: 0.68702 (2300 rounds)
Fold #6: 0.75362 (2502 rounds)
Fold #7: 0.64912 (1404 rounds)
Fold #8: 0.66667 (1876 rounds)
Fold #9: 0.68750 (2414 rounds)
Fold #10: 0.72464 (2274 rounds)
Fold #11: 0.75385 (1191 rounds)
Fold #12: 0.70588 (2444 rounds)
Fold #13: 0.64286 (1528 rounds)
Fold #14: 0.63448 (1089 rounds)

Avg. score: 0.67554 +/- 0.04766
OOF score: 0.67414

CPU times: user 14min 16s, sys: 1.32 s, total: 14min 17s
Wall time: 13min 38s


In [47]:
%%time
preds8_2 = custom_cv(features, model5, folds=15, seed=152)

Fold #0: 0.66225 (3487 rounds)
Fold #1: 0.75556 (1301 rounds)
Fold #2: 0.64748 (2142 rounds)
Fold #3: 0.68148 (2342 rounds)
Fold #4: 0.64052 (1586 rounds)
Fold #5: 0.68376 (2344 rounds)
Fold #6: 0.65487 (2274 rounds)
Fold #7: 0.68531 (2522 rounds)
Fold #8: 0.63946 (1934 rounds)
Fold #9: 0.63866 (932 rounds)
Fold #10: 0.64384 (1337 rounds)
Fold #11: 0.68056 (1882 rounds)
Fold #12: 0.66667 (2609 rounds)
Fold #13: 0.65672 (2168 rounds)
Fold #14: 0.73171 (3248 rounds)

Avg. score: 0.67126 +/- 0.03289
OOF score: 0.67062

CPU times: user 16min 41s, sys: 1.49 s, total: 16min 43s
Wall time: 16min 4s


# Submission files

In [44]:
def create_submission_files(preds, config):
    sub = sample_sub.copy()
    sub[TARGET] = preds[-1]['mode']
    sub.to_csv(f'{config}.csv', index=False)

In [45]:
create_submission_files(preds1, 'exp1')
create_submission_files(preds2, 'exp2')
create_submission_files(preds3, 'exp3')
create_submission_files(preds4, 'exp4')
create_submission_files(preds5, 'exp5')
create_submission_files(preds6, 'exp6')
create_submission_files(preds7_1, 'exp7_1')
create_submission_files(preds7_2, 'exp7_2')
create_submission_files(preds7_3, 'exp7_3')

In [48]:
create_submission_files(preds8_1, 'exp8_1')
create_submission_files(preds8_2, 'exp8_2')

**Time to submit!**