In [1]:
import pandas as pd
import numpy as np
import glob
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import TimeSeriesSplit
from sklearn.model_selection import StratifiedKFold
from sklearn.externals import joblib
import lightgbm as lgb
import xgboost as xgb
from scipy.sparse import vstack, csr_matrix, save_npz, load_npz
import warnings
warnings.filterwarnings("ignore")
import gc

In [8]:
dict_dtypes = {
    'IsBeta': np.int8,                                                              
    'RtpStateBitfield': np.int8,                                                    
    'IsSxsPassiveMode': np.int8,                                                    
    'DefaultBrowsersIdentifier': np.int16,                                          
    'AVProductStatesIdentifier': np.int32,                                          
    'AVProductsInstalled': np.int8,                                                 
    'AVProductsEnabled': np.int8,                                                   
    'HasTpm': np.int8,                                                              
    'CountryIdentifier': np.int16,                                                  
    'CityIdentifier': np.int32,                                                     
    'OrganizationIdentifier': np.int8,                                              
    'GeoNameIdentifier': np.int16,                                                  
    'LocaleEnglishNameIdentifier': np.int16,                                        
    'OsBuild': np.int16,                                                            
    'OsSuite': np.int16,                                                            
    'IsProtected': np.int8,                                                         
    'AutoSampleOptIn': np.int8,                                                     
    'SMode': np.int8,                                                               
    'IeVerIdentifier': np.int16,                                                    
    'Firewall': np.int8,                                                            
    'UacLuaenable': np.int32,                                                       
    'Census_OEMNameIdentifier': np.int16,                                           
    'Census_OEMModelIdentifier': np.int32,                                          
    'Census_ProcessorCoreCount': np.int16,                                          
    'Census_ProcessorManufacturerIdentifier': np.int8,                              
    'Census_ProcessorModelIdentifier': np.int16,                                    
    'Census_PrimaryDiskTotalCapacity': np.int32,                                    
    'Census_SystemVolumeTotalCapacity': np.int32,                                   
    'Census_HasOpticalDiskDrive': np.int8,                                          
    'Census_TotalPhysicalRAM': np.int32,                                            
    'Census_InternalPrimaryDiagonalDisplaySizeInInches': np.float16,                  
    'Census_InternalPrimaryDisplayResolutionHorizontal': np.int16,                  
    'Census_InternalPrimaryDisplayResolutionVertical': np.int16,                    
    'Census_InternalBatteryNumberOfCharges': np.int32,                              
    'Census_OSBuildNumber': np.int16,                                               
    'Census_OSBuildRevision': np.int32,                                             
    'Census_OSInstallLanguageIdentifier': np.int8,                                  
    'Census_OSUILocaleIdentifier': np.int16,                                        
    'Census_IsPortableOperatingSystem': np.int8,                                    
    'Census_IsFlightingInternal': np.int8,                                          
    'Census_IsFlightsDisabled': np.int8,                                            
    'Census_ThresholdOptIn': np.int8,                                               
    'Census_FirmwareManufacturerIdentifier': np.int16,                              
    'Census_FirmwareVersionIdentifier': np.int32,                                   
    'Census_IsSecureBootEnabled': np.int8,                                          
    'Census_IsWIMBootEnabled': np.int8,                                             
    'Census_IsVirtualDevice': np.int8,                                              
    'Census_IsTouchEnabled': np.int8,                                               
    'Census_IsPenCapable': np.int8,                                                 
    'Census_IsAlwaysOnAlwaysConnectedCapable': np.int8,                             
    'Wdft_IsGamer': np.int8,                                                        
    'Wdft_RegionIdentifier': np.int8,                                               
    'HasDetections': np.int8,                                                       
    'Census_InternalBatteryType_informed': np.int8,                                 
    'ProductName_index': np.int8,                                                   
    'Platform_index': np.int8,                                                      
    'Processor_index': np.int8,                                                     
    'SkuEdition_index': np.int8,                                                    
    'PuaMode_index': np.int8,                                                       
    'SmartScreen_index': np.int8,                                                   
    'Census_MDC2FormFactor_index': np.int8,                                         
    'Census_DeviceFamily_index': np.int8,                                           
    'Census_ProcessorClass_index': np.int8,                                         
    'Census_PrimaryDiskTypeName_index': np.int8,                                    
    'Census_ChassisTypeName_index': np.int8,                                        
    'Census_PowerPlatformRoleName_index': np.int8,                                    
    'Census_OSBranch_index': np.int8,                                               
    'Census_OSSkuName_index': np.int8,                                              
    'Census_OSInstallTypeName_index': np.int8,                                      
    'Census_OSWUAutoUpdateOptionsName_index': np.int8,                              
    'Census_GenuineStateName_index': np.int8,                                       
    'Census_ActivationChannel_index': np.int8,                                      
    'Census_FlightRing_index': np.int8,                                             
    'ProductName_windowsintune': np.int8,                                           
    'ProductName_mse': np.int8,                                                     
    'ProductName_mseprerelease': np.int8,                                           
    'ProductName_scep': np.int8,                                                    
    'ProductName_fep': np.int8,                                                     
    'ProductName_win8defender': np.int8,                                            
    'Platform_windows10': np.int8,                                                  
    'Platform_windows2016': np.int8,                                                
    'Platform_windows8': np.int8,                                                   
    'Platform_windows7': np.int8,                                                   
    'Processor_x86': np.int8,                                                       
    'Processor_x64': np.int8,                                                       
    'Processor_arm64': np.int8,                                                     
    'SkuEdition_Education': np.int8,                                                
    'SkuEdition_Home': np.int8,                                                     
    'SkuEdition_Pro': np.int8,                                                      
    'SkuEdition_Server': np.int8,                                                   
    'SkuEdition_Invalid': np.int8,                                                  
    'SkuEdition_Enterprise': np.int8,                                               
    'SkuEdition_Cloud': np.int8,                                                    
    'SkuEdition_Enterprise LTSB': np.int8,                                          
    'PuaMode_on': np.int8,                                                          
    'PuaMode_UNKNOWN': np.int8,                                                     
    'PuaMode_audit': np.int8,                                                       
    'SmartScreen_OFF': np.int8,                                                     
    'SmartScreen_Prompt': np.int8,                                                  
    'SmartScreen_Unknown': np.int8,                                                 
    'SmartScreen_Warn': np.int8,                                                    
    'SmartScreen_ON': np.int8,                                                      
    'SmartScreen_RequiredAdmin': np.int8,                                           
    'SmartScreen_BLOCK': np.int8,                                                   
    'SmartScreen_Deny': np.int8,                                                    
    'SmartScreen_ExistsNotSet': np.int8,                                            
    'Census_MDC2FormFactor_Convertible': np.int8,                                   
    'Census_MDC2FormFactor_PCOther': np.int8,                                       
    'Census_MDC2FormFactor_Other': np.int8,                                         
    'Census_MDC2FormFactor_LargeServer': np.int8,                                   
    'Census_MDC2FormFactor_IoTOther': np.int8,                                      
    'Census_MDC2FormFactor_Detachable': np.int8,                                    
    'Census_MDC2FormFactor_SmallServer': np.int8,                                   
    'Census_MDC2FormFactor_Notebook': np.int8,                                      
    'Census_MDC2FormFactor_ServerOther': np.int8,                                   
    'Census_MDC2FormFactor_MediumServer': np.int8,                                  
    'Census_MDC2FormFactor_Desktop': np.int8,                                       
    'Census_MDC2FormFactor_LargeTablet': np.int8,                                   
    'Census_MDC2FormFactor_AllInOne': np.int8,                                      
    'Census_MDC2FormFactor_SmallTablet': np.int8,
    'Census_DeviceFamily_Windows': np.int8,                                         
    'Census_ProcessorClass_low': np.int8,                                           
    'Census_ProcessorClass_UNKNOWN': np.int8,                                       
    'Census_ProcessorClass_high': np.int8,                                          
    'Census_ProcessorClass_mid': np.int8,                                           
    'Census_PrimaryDiskTypeName_SSD': np.int8,                                      
    'Census_PrimaryDiskTypeName_HDD': np.int8,                                      
    'Census_PrimaryDiskTypeName_UNKNOWN': np.int8,                                  
    'Census_ChassisTypeName_AllinOne': np.int8,                                     
    'Census_ChassisTypeName_Tablet': np.int8,                                       
    'Census_ChassisTypeName_PeripheralChassis': np.int8,                            
    'Census_ChassisTypeName_Numerico': np.int8,                                     
    'Census_ChassisTypeName_EmbeddedPC': np.int8,                                   
    'Census_ChassisTypeName_SpaceSaving': np.int8,                                  
    'Census_ChassisTypeName_HandHeld': np.int8,                                     
    'Census_ChassisTypeName_StickPC': np.int8,                                      
    'Census_ChassisTypeName_Convertible': np.int8,                                  
    'Census_ChassisTypeName_SubChassis': np.int8,                                   
    'Census_ChassisTypeName_Portable': np.int8,                                     
    'Census_ChassisTypeName_MiniPC': np.int8,                                       
    'Census_ChassisTypeName_UNKNOWN': np.int8,                                      
    'Census_ChassisTypeName_Desktop': np.int8,                                      
    'Census_ChassisTypeName_LowProfileDesktop': np.int8,                            
    'Census_ChassisTypeName_SealedCasePC': np.int8,                                 
    'Census_ChassisTypeName_ExpansionChassis': np.int8,                             
    'Census_ChassisTypeName_Detachable': np.int8,                                   
    'Census_ChassisTypeName_RackMountChassis': np.int8,                             
    'Census_ChassisTypeName_SubNotebook': np.int8,                                  
    'Census_ChassisTypeName_BusExpansionChassis': np.int8,                          
    'Census_ChassisTypeName_Notebook': np.int8,                                     
    'Census_ChassisTypeName_Laptop': np.int8,                                       
    'Census_ChassisTypeName_Tower': np.int8,                                        
    'Census_ChassisTypeName_BladeEnclosure': np.int8,                               
    'Census_ChassisTypeName_IoTGateway': np.int8,                                   
    'Census_ChassisTypeName_LunchBox': np.int8,                                     
    'Census_ChassisTypeName_MultisystemChassis': np.int8,                           
    'Census_ChassisTypeName_DockingStation': np.int8,                               
    'Census_ChassisTypeName_CompactPCI': np.int8,                                   
    'Census_ChassisTypeName_MiniTower': np.int8,                                    
    'Census_ChassisTypeName_MainServerChassis': np.int8,                            
    'Census_ChassisTypeName_PizzaBox': np.int8,                                     
    'Census_ChassisTypeName_Blade': np.int8,                                        
    'Census_PowerPlatformRoleName_SOHOServer': np.int8,                             
    'Census_PowerPlatformRoleName_EnterpriseServer': np.int8,                       
    'Census_PowerPlatformRoleName_AppliancePC': np.int8,                            
    'Census_PowerPlatformRoleName_UNKNOWN': np.int8,                                
    'Census_PowerPlatformRoleName_Desktop': np.int8,                                
    'Census_PowerPlatformRoleName_PerformanceServer': np.int8,                      
    'Census_PowerPlatformRoleName_Workstation': np.int8,                            
    'Census_PowerPlatformRoleName_Mobile': np.int8,                                 
    'Census_PowerPlatformRoleName_Slate': np.int8,                                  
    'Census_OSBranch_rs5_release_sigma': np.int8,                                   
    'Census_OSBranch_win7sp1_ldr': np.int8,                                         
    'Census_OSBranch_rs5_release_sign': np.int8,                                    
    'Census_OSBranch_th2_release_sec': np.int8,                                     
    'Census_OSBranch_rs5_release': np.int8,                                         
    'Census_OSBranch_rs1_release_sec': np.int8,                                     
    'Census_OSBranch_rs_shell': np.int8,                                            
    'Census_OSBranch_rs4_release': np.int8,                                         
    'Census_OSBranch_rs_prerelease_flt': np.int8,                                   
    'Census_OSBranch_win7sp1_gdr': np.int8,                                         
    'Census_OSBranch_rs_xbox': np.int8,                                             
    'Census_OSBranch_rs3_release_svc_escrow': np.int8,                              
    'Census_OSBranch_rs1_release_inmarket': np.int8,                                
    'Census_OSBranch_win8_gdr': np.int8,                                            
    'Census_OSBranch_rs1_release_srvmedia': np.int8,                                
    'Census_OSBranch_rs_onecore_base_cobalt': np.int8,                              
    'Census_OSBranch_th1_st1': np.int8,                                             
    'Census_OSBranch_winblue_ltsb': np.int8,                                        
    'Census_OSBranch_Khmer OS': np.int8,                                            
    'Census_OSBranch_winblue_ltsb_escrow': np.int8,                                 
    'Census_OSBranch_rs_onecore_stack_per1': np.int8,                               
    'Census_OSBranch_rs_onecore_sigma_dplat_d7': np.int8,                           
    'Census_OSBranch_win7sp1_ldr_escrow': np.int8,                                  
    'Census_OSBranch_rs_edge': np.int8,                                             
    'Census_OSBranch_rs3_release_svc_escrow_im': np.int8,                           
    'Census_OSBranch_rs3_release_svc': np.int8,                                     
    'Census_OSBranch_rs2_release': np.int8,                                         
    'Census_OSBranch_rs2_release_svc_d': np.int8,                                   
    'Census_OSBranch_rs5_release_sigma_dev': np.int8,                               
    'Census_OSBranch_th1': np.int8,                                                 
    'Census_OSBranch_rs1_release': np.int8,                                         
    'Census_OSBranch_rs_prerelease': np.int8,                                       
    'Census_OSBranch_rs5_release_edge': np.int8,                                    
    'Census_OSBranch_rsmaster': np.int8,                                            
    'Census_OSBranch_win8_ldr': np.int8,                                            
    'Census_OSBranch_rs_onecore_dep': np.int8,                                      
    'Census_OSBranch_th2_release': np.int8,                                         
    'Census_OSBranch_rs3_release': np.int8,                                         
    'Census_OSBranch_rs1_release_svc': np.int8,                                     
    'Census_OSBranch_rs_onecore_sigma_grfx_dev': np.int8,                           
    'Census_OSSkuName_ULTIMATE': np.int8,                                           
    'Census_OSSkuName_CORE_COUNTRYSPECIFIC': np.int8,                               
    'Census_OSSkuName_EDUCATION_N': np.int8,                                        
    'Census_OSSkuName_UNLICENSED': np.int8,                                         
    'Census_OSSkuName_ENTERPRISEG': np.int8,                                        
    'Census_OSSkuName_PRO_CHINA': np.int8,                                          
    'Census_OSSkuName_PROFESSIONAL_N': np.int8,                                     
    'Census_OSSkuName_DATACENTER_SERVER': np.int8,                                  
    'Census_OSSkuName_ENTERPRISE_S_N': np.int8,                                     
    'Census_OSSkuName_ENTERPRISE': np.int8,                                         
    'Census_OSSkuName_PRO_WORKSTATION_N': np.int8,                                  
    'Census_OSSkuName_CORE_SINGLELANGUAGE': np.int8,                                
    'Census_OSSkuName_HOME_BASIC': np.int8,                                         
    'Census_OSSkuName_SB_SOLUTION_SERVER': np.int8,                                 
    'Census_OSSkuName_DATACENTER_EVALUATION_SERVER': np.int8,                       
    'Census_OSSkuName_PROFESSIONAL': np.int8,                                       
    'Census_OSSkuName_STANDARD_SERVER': np.int8,                                    
    'Census_OSSkuName_HOME_PREMIUM': np.int8,                                       
    'Census_OSSkuName_PRO_WORKSTATION': np.int8,                                    
    'Census_OSSkuName_CORE': np.int8,                                               
    'Census_OSSkuName_STANDARD_EVALUATION_SERVER': np.int8,                         
    'Census_OSSkuName_ENTERPRISE_N': np.int8,                                       
    'Census_OSSkuName_PRO_FOR_EDUCATION': np.int8,                                  
    'Census_OSSkuName_SERVERRDSH': np.int8,                                         
    'Census_OSSkuName_CLOUD': np.int8,                                              
    'Census_OSSkuName_ENTERPRISE_S': np.int8,                                       
    'Census_OSSkuName_EDUCATION': np.int8,                                          
    'Census_OSSkuName_CORE_N': np.int8,                                             
    'Census_OSSkuName_PRO_SINGLE_LANGUAGE': np.int8,                                
    'Census_OSSkuName_CLOUDN': np.int8,                                             
    'Census_OSSkuName_UNDEFINED': np.int8,                                          
    'Census_OSSkuName_STARTER': np.int8,                                            
    'Census_OSInstallTypeName_Reset': np.int8,                                      
    'Census_OSInstallTypeName_Refresh': np.int8,                                    
    'Census_OSInstallTypeName_Clean': np.int8,                                      
    'Census_OSInstallTypeName_Other': np.int8,                                      
    'Census_OSInstallTypeName_UUPUpgrade': np.int8,                                 
    'Census_OSInstallTypeName_IBSClean': np.int8,                                   
    'Census_OSInstallTypeName_CleanPCRefresh': np.int8,                             
    'Census_OSInstallTypeName_Update': np.int8,                                     
    'Census_OSInstallTypeName_Upgrade': np.int8,                                    
    'Census_OSWUAutoUpdateOptionsName_DownloadNotify': np.int8,                     
    'Census_OSWUAutoUpdateOptionsName_UNKNOWN': np.int8,                            
    'Census_OSWUAutoUpdateOptionsName_Off': np.int8,                                
    'Census_OSWUAutoUpdateOptionsName_FullAuto': np.int8,                           
    'Census_OSWUAutoUpdateOptionsName_Notify': np.int8,                             
    'Census_OSWUAutoUpdateOptionsName_AutoInstallAndRebootAtMaintenanceTime': np.int8,
    'Census_GenuineStateName_IS_GENUINE': np.int8,                                  
    'Census_GenuineStateName_UNKNOWN': np.int8,                                     
    'Census_GenuineStateName_TAMPERED': np.int8,                                    
    'Census_GenuineStateName_INVALID_LICENSE': np.int8,                             
    'Census_GenuineStateName_OFFLINE': np.int8,                                     
    'Census_ActivationChannel_Retail': np.int8,                                     
    'Census_ActivationChannel_Volume:MAK': np.int8,                                 
    'Census_ActivationChannel_OEM:NONSLP': np.int8,                                 
    'Census_ActivationChannel_Retail:TB:Eval': np.int8,                             
    'Census_ActivationChannel_OEM:DM': np.int8,                                     
    'Census_ActivationChannel_Volume:GVLK': np.int8,                                
    'Census_FlightRing_OSG': np.int8,                                               
    'Census_FlightRing_Unknown': np.int8,                                           
    'Census_FlightRing_Canary': np.int8,                                            
    'Census_FlightRing_Retail': np.int8,                                            
    'Census_FlightRing_Invalid': np.int8,                                           
    'Census_FlightRing_WIF': np.int8,                                               
    'Census_FlightRing_Disabled': np.int8,                                          
    'Census_FlightRing_RP': np.int8,                                                
    'Census_FlightRing_WIS': np.int8,                                               
    'Census_FlightRing_CBCanary': np.int8,                                          
    'Census_FlightRing_NOT_SET': np.int8,                                           
    'Census_OSVersion_0': np.int8,                                                  
    'Census_OSVersion_1': np.int8,                                                  
    'Census_OSVersion_2': np.int16,                                                 
    'Census_OSVersion_3': np.int32,                                                 
    'EngineVersion_2': np.int16,                                                    
    'EngineVersion_3': np.int8,                                                     
    'AppVersion_1': np.int8,                                                        
    'AppVersion_2': np.int16,                                                       
    'AppVersion_3': np.int16,                                                       
    'AvSigVersion_0': np.int8,                                                      
    'AvSigVersion_1': np.int16,                                                     
    'AvSigVersion_2': np.int16,                                                     
    'OsVer_0': np.int8,                                                             
    'OsVer_1': np.int8,                                                             
    'OsVer_2': np.int16,                                                            
    'OsVer_3': np.int16,                                                            
    'OsBuildLab_diff': np.int8,                                                     
    'AvSigVersion_diff': np.int16,                                                  
    'OSVersion_diff': np.int8,                                                      
    'OSBuild_fulldiff': np.int8,                                                    
    'AvSigVersion_fulldiff': np.int8,                                               
    'OsBuildLab_difftotal': np.int16,                                               
    'DateAvSigVersion_difftotal': np.int16,                                         
    'DateOSVersion_difftotal': np.int16,                                            
    'DateAvSigVersion_fulldifftotal': np.int8,                                      
    'OsBuildLab_fulldifftotal': np.int8,                                            
    'DateAvSigVersion_ratio': np.float16,                                              
    'OsBuildLab_ratio': np.float16,                                                    
    'OSVersion_ratio': np.float16,                                                     
    'DateAvSigVersion_fullratio': np.float16,                                          
    'OsBuildLab_fullratio': np.float16,                                                
    'OsBuildLab_dayOfWeek': np.int8,                                                
    'AvSigVersion_dayOfWeek': np.int8,                                              
    'prediction_2': np.int8,                                                        
    'prediction_4': np.int8,                                                        
    'prediction_8': np.int8,                                                        
    'prediction_16': np.int8,                                                       
    'prediction_32': np.int8,                                                       
    'prediction_64': np.int8
}

In [9]:
path = '../../data/train_final_2'

In [10]:
allFiles = glob.glob(path + "/*.csv")

list_ = []
n = 0

for file_ in allFiles:
#     mask = file_['Platform_index'] != 0
    df = pd.read_csv(file_, dtype=dict_dtypes, low_memory=True)
    list_.append(df)
    if n%10==0:
        print(n)
    n+=1

train = pd.concat(list_, axis = 0, ignore_index = True)

0
10
20
30
40
50
60
70
80
90
100
110
120
130
140
150
160
170
180
190
200
210
220
230
240
250
260
270
280
290
300
310
320
330
340
350
360
370
380
390


In [11]:
train.shape

(8921483, 322)

In [8]:
train_sub = train.sample(100000)
del train
gc.collect()

10

In [9]:
train_sub.shape

(100000, 100)

In [12]:
sel_cols = [c for c in train.columns if c not in ['MachineIdentifier',
                                                      'HasDetections',
                                                      'Census_DeviceFamily_Windows.Server',
                                                      'Census_DeviceFamily_Windows.Desktop'
                                                     ]]

In [14]:
X_train = train.loc[:, sel_cols]
y_train = train.loc[:,'HasDetections']

In [15]:
del train
gc.collect()

60

In [16]:
train_ids = X_train.index

In [17]:
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
skf.get_n_splits(train_ids, y_train)

5

Busqueda mejores parametros

In [25]:
params = {
    'learning_rate' : [0.1, 0.05],
    'num_leaves' : [91, 103],
    'feature_fraction': [0.6, 1, 0.3], 
    'bagging_fraction': [0.8, 1],
    'max_depth': [19, -1],
    'min_data_in_leaf': [30, 20],
    'colsample_bytree': [0.9, 0.5, 0.3]
}

kFolds = 3

In [30]:
mdl_lgb = lgb.LGBMClassifier(n_jobs=-1,
                             verbose = 50,
                             n_estimators=300,
                             objective='binary',
                             bagging_freq=5,
                             boosting_type='gbdt',
                             metric='auc'
                            )

In [31]:
lgb_model = GridSearchCV(
    estimator=mdl_lgb,
    param_grid=params,
    n_jobs=1,
    cv=kFolds,
    verbose=100,
    scoring='roc_auc'
)

In [32]:
X_train2 = X_train.replace(-99, 0)

In [33]:
lgb_model.fit(X_train2, y_train)

Fitting 3 folds for each of 288 candidates, totalling 864 fits
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[CV] bagging_fraction=0.8, colsample_bytree=0.9, feature_fraction=0.6, learning_rate=0.1, max_depth=19, min_data_in_leaf=30, num_leaves=91 
[CV]  bagging_fraction=0.8, colsample_bytree=0.9, feature_fraction=0.6, learning_rate=0.1, max_depth=19, min_data_in_leaf=30, num_leaves=91, score=0.6977874262683735, total=   2.6s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    2.9s remaining:    0.0s
[CV] bagging_fraction=0.8, colsample_bytree=0.9, feature_fraction=0.6, learning_rate=0.1, max_depth=19, min_data_in_leaf=30, num_leaves=91 
[CV]  bagging_fraction=0.8, colsample_bytree=0.9, feature_fraction=0.6, learning_rate=0.1, max_depth=19, min_data_in_leaf=30, num_leaves=91, score=0.6999611285293632, total=   2.5s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    5.8s remaining:    0.0s
[CV] bagging_fraction=0.8, colsample_bytree=0.9, featu

[CV]  bagging_fraction=0.8, colsample_bytree=0.9, feature_fraction=0.6, learning_rate=0.1, max_depth=-1, min_data_in_leaf=20, num_leaves=91, score=0.6998841120836143, total=   4.1s
[Parallel(n_jobs=1)]: Done  21 out of  21 | elapsed:  1.5min remaining:    0.0s
[CV] bagging_fraction=0.8, colsample_bytree=0.9, feature_fraction=0.6, learning_rate=0.1, max_depth=-1, min_data_in_leaf=20, num_leaves=103 
[CV]  bagging_fraction=0.8, colsample_bytree=0.9, feature_fraction=0.6, learning_rate=0.1, max_depth=-1, min_data_in_leaf=20, num_leaves=103, score=0.6949604988727252, total=   4.9s
[Parallel(n_jobs=1)]: Done  22 out of  22 | elapsed:  1.6min remaining:    0.0s
[CV] bagging_fraction=0.8, colsample_bytree=0.9, feature_fraction=0.6, learning_rate=0.1, max_depth=-1, min_data_in_leaf=20, num_leaves=103 
[CV]  bagging_fraction=0.8, colsample_bytree=0.9, feature_fraction=0.6, learning_rate=0.1, max_depth=-1, min_data_in_leaf=20, num_leaves=103, score=0.6950948577429962, total=   5.8s
[Parallel(n_j

[CV]  bagging_fraction=0.8, colsample_bytree=0.9, feature_fraction=0.6, learning_rate=0.05, max_depth=-1, min_data_in_leaf=30, num_leaves=103, score=0.7090917336394378, total=   4.0s
[Parallel(n_jobs=1)]: Done  42 out of  42 | elapsed:  3.3min remaining:    0.0s
[CV] bagging_fraction=0.8, colsample_bytree=0.9, feature_fraction=0.6, learning_rate=0.05, max_depth=-1, min_data_in_leaf=20, num_leaves=91 
[CV]  bagging_fraction=0.8, colsample_bytree=0.9, feature_fraction=0.6, learning_rate=0.05, max_depth=-1, min_data_in_leaf=20, num_leaves=91, score=0.7071373192200363, total=   3.7s
[Parallel(n_jobs=1)]: Done  43 out of  43 | elapsed:  3.3min remaining:    0.0s
[CV] bagging_fraction=0.8, colsample_bytree=0.9, feature_fraction=0.6, learning_rate=0.05, max_depth=-1, min_data_in_leaf=20, num_leaves=91 
[CV]  bagging_fraction=0.8, colsample_bytree=0.9, feature_fraction=0.6, learning_rate=0.05, max_depth=-1, min_data_in_leaf=20, num_leaves=91, score=0.7084026069136267, total=   3.8s
[Parallel(n

[CV]  bagging_fraction=0.8, colsample_bytree=0.9, feature_fraction=1, learning_rate=0.1, max_depth=-1, min_data_in_leaf=30, num_leaves=91, score=0.6982668351243464, total=   4.3s
[Parallel(n_jobs=1)]: Done  63 out of  63 | elapsed:  4.9min remaining:    0.0s
[CV] bagging_fraction=0.8, colsample_bytree=0.9, feature_fraction=1, learning_rate=0.1, max_depth=-1, min_data_in_leaf=30, num_leaves=103 
[CV]  bagging_fraction=0.8, colsample_bytree=0.9, feature_fraction=1, learning_rate=0.1, max_depth=-1, min_data_in_leaf=30, num_leaves=103, score=0.6931194365233111, total=   4.4s
[Parallel(n_jobs=1)]: Done  64 out of  64 | elapsed:  5.0min remaining:    0.0s
[CV] bagging_fraction=0.8, colsample_bytree=0.9, feature_fraction=1, learning_rate=0.1, max_depth=-1, min_data_in_leaf=30, num_leaves=103 
[CV]  bagging_fraction=0.8, colsample_bytree=0.9, feature_fraction=1, learning_rate=0.1, max_depth=-1, min_data_in_leaf=30, num_leaves=103, score=0.6953890524295835, total=   4.7s
[Parallel(n_jobs=1)]: D

[CV]  bagging_fraction=0.8, colsample_bytree=0.9, feature_fraction=1, learning_rate=0.05, max_depth=19, min_data_in_leaf=20, num_leaves=103, score=0.7064528736501665, total=   5.3s
[Parallel(n_jobs=1)]: Done  84 out of  84 | elapsed:  6.9min remaining:    0.0s
[CV] bagging_fraction=0.8, colsample_bytree=0.9, feature_fraction=1, learning_rate=0.05, max_depth=-1, min_data_in_leaf=30, num_leaves=91 
[CV]  bagging_fraction=0.8, colsample_bytree=0.9, feature_fraction=1, learning_rate=0.05, max_depth=-1, min_data_in_leaf=30, num_leaves=91, score=0.7037600127244996, total=   4.5s
[Parallel(n_jobs=1)]: Done  85 out of  85 | elapsed:  7.0min remaining:    0.0s
[CV] bagging_fraction=0.8, colsample_bytree=0.9, feature_fraction=1, learning_rate=0.05, max_depth=-1, min_data_in_leaf=30, num_leaves=91 
[CV]  bagging_fraction=0.8, colsample_bytree=0.9, feature_fraction=1, learning_rate=0.05, max_depth=-1, min_data_in_leaf=30, num_leaves=91, score=0.7067561657311543, total=   4.6s
[Parallel(n_jobs=1)]:

[CV]  bagging_fraction=0.8, colsample_bytree=0.9, feature_fraction=0.3, learning_rate=0.1, max_depth=19, min_data_in_leaf=20, num_leaves=103, score=0.6966168559703805, total=   2.9s
[CV] bagging_fraction=0.8, colsample_bytree=0.9, feature_fraction=0.3, learning_rate=0.1, max_depth=19, min_data_in_leaf=20, num_leaves=103 
[CV]  bagging_fraction=0.8, colsample_bytree=0.9, feature_fraction=0.3, learning_rate=0.1, max_depth=19, min_data_in_leaf=20, num_leaves=103, score=0.701966810588382, total=   3.0s
[CV] bagging_fraction=0.8, colsample_bytree=0.9, feature_fraction=0.3, learning_rate=0.1, max_depth=19, min_data_in_leaf=20, num_leaves=103 
[CV]  bagging_fraction=0.8, colsample_bytree=0.9, feature_fraction=0.3, learning_rate=0.1, max_depth=19, min_data_in_leaf=20, num_leaves=103, score=0.7035421790521157, total=   3.0s
[CV] bagging_fraction=0.8, colsample_bytree=0.9, feature_fraction=0.3, learning_rate=0.1, max_depth=-1, min_data_in_leaf=30, num_leaves=91 
[CV]  bagging_fraction=0.8, colsa

[CV]  bagging_fraction=0.8, colsample_bytree=0.9, feature_fraction=0.3, learning_rate=0.05, max_depth=19, min_data_in_leaf=20, num_leaves=103, score=0.7113779336927895, total=   3.2s
[CV] bagging_fraction=0.8, colsample_bytree=0.9, feature_fraction=0.3, learning_rate=0.05, max_depth=-1, min_data_in_leaf=30, num_leaves=91 
[CV]  bagging_fraction=0.8, colsample_bytree=0.9, feature_fraction=0.3, learning_rate=0.05, max_depth=-1, min_data_in_leaf=30, num_leaves=91, score=0.70619194900662, total=   3.0s
[CV] bagging_fraction=0.8, colsample_bytree=0.9, feature_fraction=0.3, learning_rate=0.05, max_depth=-1, min_data_in_leaf=30, num_leaves=91 
[CV]  bagging_fraction=0.8, colsample_bytree=0.9, feature_fraction=0.3, learning_rate=0.05, max_depth=-1, min_data_in_leaf=30, num_leaves=91, score=0.7100274800352153, total=   3.0s
[CV] bagging_fraction=0.8, colsample_bytree=0.9, feature_fraction=0.3, learning_rate=0.05, max_depth=-1, min_data_in_leaf=30, num_leaves=91 
[CV]  bagging_fraction=0.8, cols

[CV]  bagging_fraction=0.8, colsample_bytree=0.5, feature_fraction=0.6, learning_rate=0.1, max_depth=-1, min_data_in_leaf=30, num_leaves=91, score=0.7010818144770772, total=   3.4s
[CV] bagging_fraction=0.8, colsample_bytree=0.5, feature_fraction=0.6, learning_rate=0.1, max_depth=-1, min_data_in_leaf=30, num_leaves=91 
[CV]  bagging_fraction=0.8, colsample_bytree=0.5, feature_fraction=0.6, learning_rate=0.1, max_depth=-1, min_data_in_leaf=30, num_leaves=91, score=0.699906335358499, total=   3.5s
[CV] bagging_fraction=0.8, colsample_bytree=0.5, feature_fraction=0.6, learning_rate=0.1, max_depth=-1, min_data_in_leaf=30, num_leaves=103 
[CV]  bagging_fraction=0.8, colsample_bytree=0.5, feature_fraction=0.6, learning_rate=0.1, max_depth=-1, min_data_in_leaf=30, num_leaves=103, score=0.6959698170113058, total=   3.6s
[CV] bagging_fraction=0.8, colsample_bytree=0.5, feature_fraction=0.6, learning_rate=0.1, max_depth=-1, min_data_in_leaf=30, num_leaves=103 
[CV]  bagging_fraction=0.8, colsamp

[CV]  bagging_fraction=0.8, colsample_bytree=0.5, feature_fraction=0.6, learning_rate=0.05, max_depth=-1, min_data_in_leaf=30, num_leaves=103, score=0.7069299780443057, total=   3.9s
[CV] bagging_fraction=0.8, colsample_bytree=0.5, feature_fraction=0.6, learning_rate=0.05, max_depth=-1, min_data_in_leaf=30, num_leaves=103 
[CV]  bagging_fraction=0.8, colsample_bytree=0.5, feature_fraction=0.6, learning_rate=0.05, max_depth=-1, min_data_in_leaf=30, num_leaves=103, score=0.7092995912811062, total=   3.9s
[CV] bagging_fraction=0.8, colsample_bytree=0.5, feature_fraction=0.6, learning_rate=0.05, max_depth=-1, min_data_in_leaf=30, num_leaves=103 
[CV]  bagging_fraction=0.8, colsample_bytree=0.5, feature_fraction=0.6, learning_rate=0.05, max_depth=-1, min_data_in_leaf=30, num_leaves=103, score=0.7090917336394378, total=   4.0s
[CV] bagging_fraction=0.8, colsample_bytree=0.5, feature_fraction=0.6, learning_rate=0.05, max_depth=-1, min_data_in_leaf=20, num_leaves=91 
[CV]  bagging_fraction=0.8

[CV]  bagging_fraction=0.8, colsample_bytree=0.5, feature_fraction=1, learning_rate=0.1, max_depth=-1, min_data_in_leaf=30, num_leaves=103, score=0.6987808793088537, total=   4.5s
[CV] bagging_fraction=0.8, colsample_bytree=0.5, feature_fraction=1, learning_rate=0.1, max_depth=-1, min_data_in_leaf=20, num_leaves=91 
[CV]  bagging_fraction=0.8, colsample_bytree=0.5, feature_fraction=1, learning_rate=0.1, max_depth=-1, min_data_in_leaf=20, num_leaves=91, score=0.6956620613216973, total=   4.1s
[CV] bagging_fraction=0.8, colsample_bytree=0.5, feature_fraction=1, learning_rate=0.1, max_depth=-1, min_data_in_leaf=20, num_leaves=91 
[CV]  bagging_fraction=0.8, colsample_bytree=0.5, feature_fraction=1, learning_rate=0.1, max_depth=-1, min_data_in_leaf=20, num_leaves=91, score=0.6951444956036976, total=   4.2s
[CV] bagging_fraction=0.8, colsample_bytree=0.5, feature_fraction=1, learning_rate=0.1, max_depth=-1, min_data_in_leaf=20, num_leaves=91 
[CV]  bagging_fraction=0.8, colsample_bytree=0.5

[CV]  bagging_fraction=0.8, colsample_bytree=0.5, feature_fraction=1, learning_rate=0.05, max_depth=-1, min_data_in_leaf=20, num_leaves=91, score=0.7067540344856121, total=   4.6s
[CV] bagging_fraction=0.8, colsample_bytree=0.5, feature_fraction=1, learning_rate=0.05, max_depth=-1, min_data_in_leaf=20, num_leaves=91 
[CV]  bagging_fraction=0.8, colsample_bytree=0.5, feature_fraction=1, learning_rate=0.05, max_depth=-1, min_data_in_leaf=20, num_leaves=91, score=0.7064324972147464, total=   5.0s
[CV] bagging_fraction=0.8, colsample_bytree=0.5, feature_fraction=1, learning_rate=0.05, max_depth=-1, min_data_in_leaf=20, num_leaves=103 
[CV]  bagging_fraction=0.8, colsample_bytree=0.5, feature_fraction=1, learning_rate=0.05, max_depth=-1, min_data_in_leaf=20, num_leaves=103, score=0.7034955633589017, total=   4.7s
[CV] bagging_fraction=0.8, colsample_bytree=0.5, feature_fraction=1, learning_rate=0.05, max_depth=-1, min_data_in_leaf=20, num_leaves=103 
[CV]  bagging_fraction=0.8, colsample_by

[CV]  bagging_fraction=0.8, colsample_bytree=0.5, feature_fraction=0.3, learning_rate=0.1, max_depth=-1, min_data_in_leaf=20, num_leaves=103, score=0.6971882438572121, total=   2.8s
[CV] bagging_fraction=0.8, colsample_bytree=0.5, feature_fraction=0.3, learning_rate=0.1, max_depth=-1, min_data_in_leaf=20, num_leaves=103 
[CV]  bagging_fraction=0.8, colsample_bytree=0.5, feature_fraction=0.3, learning_rate=0.1, max_depth=-1, min_data_in_leaf=20, num_leaves=103, score=0.6997464307415292, total=   2.8s
[CV] bagging_fraction=0.8, colsample_bytree=0.5, feature_fraction=0.3, learning_rate=0.1, max_depth=-1, min_data_in_leaf=20, num_leaves=103 
[CV]  bagging_fraction=0.8, colsample_bytree=0.5, feature_fraction=0.3, learning_rate=0.1, max_depth=-1, min_data_in_leaf=20, num_leaves=103, score=0.7039397031467287, total=   2.9s
[CV] bagging_fraction=0.8, colsample_bytree=0.5, feature_fraction=0.3, learning_rate=0.05, max_depth=19, min_data_in_leaf=30, num_leaves=91 
[CV]  bagging_fraction=0.8, col

[CV]  bagging_fraction=0.8, colsample_bytree=0.5, feature_fraction=0.3, learning_rate=0.05, max_depth=-1, min_data_in_leaf=20, num_leaves=103, score=0.7114512744599937, total=   3.1s
[CV] bagging_fraction=0.8, colsample_bytree=0.3, feature_fraction=0.6, learning_rate=0.1, max_depth=19, min_data_in_leaf=30, num_leaves=91 
[CV]  bagging_fraction=0.8, colsample_bytree=0.3, feature_fraction=0.6, learning_rate=0.1, max_depth=19, min_data_in_leaf=30, num_leaves=91, score=0.6977874262683735, total=   3.4s
[CV] bagging_fraction=0.8, colsample_bytree=0.3, feature_fraction=0.6, learning_rate=0.1, max_depth=19, min_data_in_leaf=30, num_leaves=91 
[CV]  bagging_fraction=0.8, colsample_bytree=0.3, feature_fraction=0.6, learning_rate=0.1, max_depth=19, min_data_in_leaf=30, num_leaves=91, score=0.6999611285293632, total=   3.3s
[CV] bagging_fraction=0.8, colsample_bytree=0.3, feature_fraction=0.6, learning_rate=0.1, max_depth=19, min_data_in_leaf=30, num_leaves=91 
[CV]  bagging_fraction=0.8, colsamp

[CV]  bagging_fraction=0.8, colsample_bytree=0.3, feature_fraction=0.6, learning_rate=0.05, max_depth=19, min_data_in_leaf=30, num_leaves=91, score=0.7085402414547115, total=   3.6s
[CV] bagging_fraction=0.8, colsample_bytree=0.3, feature_fraction=0.6, learning_rate=0.05, max_depth=19, min_data_in_leaf=30, num_leaves=91 
[CV]  bagging_fraction=0.8, colsample_bytree=0.3, feature_fraction=0.6, learning_rate=0.05, max_depth=19, min_data_in_leaf=30, num_leaves=91, score=0.708871722138056, total=   3.6s
[CV] bagging_fraction=0.8, colsample_bytree=0.3, feature_fraction=0.6, learning_rate=0.05, max_depth=19, min_data_in_leaf=30, num_leaves=103 
[CV]  bagging_fraction=0.8, colsample_bytree=0.3, feature_fraction=0.6, learning_rate=0.05, max_depth=19, min_data_in_leaf=30, num_leaves=103, score=0.7056008014845019, total=   3.8s
[CV] bagging_fraction=0.8, colsample_bytree=0.3, feature_fraction=0.6, learning_rate=0.05, max_depth=19, min_data_in_leaf=30, num_leaves=103 
[CV]  bagging_fraction=0.8, c

[CV]  bagging_fraction=0.8, colsample_bytree=0.3, feature_fraction=1, learning_rate=0.1, max_depth=19, min_data_in_leaf=30, num_leaves=103, score=0.6932279615233066, total=   4.3s
[CV] bagging_fraction=0.8, colsample_bytree=0.3, feature_fraction=1, learning_rate=0.1, max_depth=19, min_data_in_leaf=30, num_leaves=103 
[CV]  bagging_fraction=0.8, colsample_bytree=0.3, feature_fraction=1, learning_rate=0.1, max_depth=19, min_data_in_leaf=30, num_leaves=103, score=0.6973407261345086, total=   4.3s
[CV] bagging_fraction=0.8, colsample_bytree=0.3, feature_fraction=1, learning_rate=0.1, max_depth=19, min_data_in_leaf=30, num_leaves=103 
[CV]  bagging_fraction=0.8, colsample_bytree=0.3, feature_fraction=1, learning_rate=0.1, max_depth=19, min_data_in_leaf=30, num_leaves=103, score=0.699260189951147, total=   4.4s
[CV] bagging_fraction=0.8, colsample_bytree=0.3, feature_fraction=1, learning_rate=0.1, max_depth=19, min_data_in_leaf=20, num_leaves=91 
[CV]  bagging_fraction=0.8, colsample_bytree=

[CV]  bagging_fraction=0.8, colsample_bytree=0.3, feature_fraction=1, learning_rate=0.05, max_depth=19, min_data_in_leaf=30, num_leaves=103, score=0.7060948387993914, total=   4.9s
[CV] bagging_fraction=0.8, colsample_bytree=0.3, feature_fraction=1, learning_rate=0.05, max_depth=19, min_data_in_leaf=20, num_leaves=91 
[CV]  bagging_fraction=0.8, colsample_bytree=0.3, feature_fraction=1, learning_rate=0.05, max_depth=19, min_data_in_leaf=20, num_leaves=91, score=0.7036252195411036, total=   4.4s
[CV] bagging_fraction=0.8, colsample_bytree=0.3, feature_fraction=1, learning_rate=0.05, max_depth=19, min_data_in_leaf=20, num_leaves=91 
[CV]  bagging_fraction=0.8, colsample_bytree=0.3, feature_fraction=1, learning_rate=0.05, max_depth=19, min_data_in_leaf=20, num_leaves=91, score=0.7067111107683839, total=   4.4s
[CV] bagging_fraction=0.8, colsample_bytree=0.3, feature_fraction=1, learning_rate=0.05, max_depth=19, min_data_in_leaf=20, num_leaves=91 
[CV]  bagging_fraction=0.8, colsample_bytr

[CV]  bagging_fraction=0.8, colsample_bytree=0.3, feature_fraction=0.3, learning_rate=0.1, max_depth=19, min_data_in_leaf=20, num_leaves=91, score=0.6989922002245281, total=   2.7s
[CV] bagging_fraction=0.8, colsample_bytree=0.3, feature_fraction=0.3, learning_rate=0.1, max_depth=19, min_data_in_leaf=20, num_leaves=91 
[CV]  bagging_fraction=0.8, colsample_bytree=0.3, feature_fraction=0.3, learning_rate=0.1, max_depth=19, min_data_in_leaf=20, num_leaves=91, score=0.7044278843785757, total=   2.7s
[CV] bagging_fraction=0.8, colsample_bytree=0.3, feature_fraction=0.3, learning_rate=0.1, max_depth=19, min_data_in_leaf=20, num_leaves=103 
[CV]  bagging_fraction=0.8, colsample_bytree=0.3, feature_fraction=0.3, learning_rate=0.1, max_depth=19, min_data_in_leaf=20, num_leaves=103, score=0.6966168559703805, total=   2.8s
[CV] bagging_fraction=0.8, colsample_bytree=0.3, feature_fraction=0.3, learning_rate=0.1, max_depth=19, min_data_in_leaf=20, num_leaves=103 
[CV]  bagging_fraction=0.8, colsam

[CV]  bagging_fraction=0.8, colsample_bytree=0.3, feature_fraction=0.3, learning_rate=0.05, max_depth=19, min_data_in_leaf=20, num_leaves=103, score=0.7067006165207801, total=   3.1s
[CV] bagging_fraction=0.8, colsample_bytree=0.3, feature_fraction=0.3, learning_rate=0.05, max_depth=19, min_data_in_leaf=20, num_leaves=103 
[CV]  bagging_fraction=0.8, colsample_bytree=0.3, feature_fraction=0.3, learning_rate=0.05, max_depth=19, min_data_in_leaf=20, num_leaves=103, score=0.7108874124109383, total=   3.0s
[CV] bagging_fraction=0.8, colsample_bytree=0.3, feature_fraction=0.3, learning_rate=0.05, max_depth=19, min_data_in_leaf=20, num_leaves=103 
[CV]  bagging_fraction=0.8, colsample_bytree=0.3, feature_fraction=0.3, learning_rate=0.05, max_depth=19, min_data_in_leaf=20, num_leaves=103, score=0.7113779336927895, total=   3.3s
[CV] bagging_fraction=0.8, colsample_bytree=0.3, feature_fraction=0.3, learning_rate=0.05, max_depth=-1, min_data_in_leaf=30, num_leaves=91 
[CV]  bagging_fraction=0.8

[CV]  bagging_fraction=1, colsample_bytree=0.9, feature_fraction=0.6, learning_rate=0.1, max_depth=19, min_data_in_leaf=20, num_leaves=103, score=0.7016415544380485, total=   3.3s
[CV] bagging_fraction=1, colsample_bytree=0.9, feature_fraction=0.6, learning_rate=0.1, max_depth=-1, min_data_in_leaf=30, num_leaves=91 
[CV]  bagging_fraction=1, colsample_bytree=0.9, feature_fraction=0.6, learning_rate=0.1, max_depth=-1, min_data_in_leaf=30, num_leaves=91, score=0.7015255935983072, total=   3.0s
[CV] bagging_fraction=1, colsample_bytree=0.9, feature_fraction=0.6, learning_rate=0.1, max_depth=-1, min_data_in_leaf=30, num_leaves=91 
[CV]  bagging_fraction=1, colsample_bytree=0.9, feature_fraction=0.6, learning_rate=0.1, max_depth=-1, min_data_in_leaf=30, num_leaves=91, score=0.702431263713188, total=   3.1s
[CV] bagging_fraction=1, colsample_bytree=0.9, feature_fraction=0.6, learning_rate=0.1, max_depth=-1, min_data_in_leaf=30, num_leaves=91 
[CV]  bagging_fraction=1, colsample_bytree=0.9, f

[CV]  bagging_fraction=1, colsample_bytree=0.9, feature_fraction=0.6, learning_rate=0.05, max_depth=-1, min_data_in_leaf=30, num_leaves=91, score=0.7092565199607237, total=   3.3s
[CV] bagging_fraction=1, colsample_bytree=0.9, feature_fraction=0.6, learning_rate=0.05, max_depth=-1, min_data_in_leaf=30, num_leaves=91 
[CV]  bagging_fraction=1, colsample_bytree=0.9, feature_fraction=0.6, learning_rate=0.05, max_depth=-1, min_data_in_leaf=30, num_leaves=91, score=0.7089032300113415, total=   3.3s
[CV] bagging_fraction=1, colsample_bytree=0.9, feature_fraction=0.6, learning_rate=0.05, max_depth=-1, min_data_in_leaf=30, num_leaves=103 
[CV]  bagging_fraction=1, colsample_bytree=0.9, feature_fraction=0.6, learning_rate=0.05, max_depth=-1, min_data_in_leaf=30, num_leaves=103, score=0.7067296745962094, total=   3.4s
[CV] bagging_fraction=1, colsample_bytree=0.9, feature_fraction=0.6, learning_rate=0.05, max_depth=-1, min_data_in_leaf=30, num_leaves=103 
[CV]  bagging_fraction=1, colsample_bytr

[CV]  bagging_fraction=1, colsample_bytree=0.9, feature_fraction=1, learning_rate=0.1, max_depth=-1, min_data_in_leaf=30, num_leaves=103, score=0.6986278713424767, total=   3.9s
[CV] bagging_fraction=1, colsample_bytree=0.9, feature_fraction=1, learning_rate=0.1, max_depth=-1, min_data_in_leaf=30, num_leaves=103 
[CV]  bagging_fraction=1, colsample_bytree=0.9, feature_fraction=1, learning_rate=0.1, max_depth=-1, min_data_in_leaf=30, num_leaves=103, score=0.6985874651758258, total=   4.0s
[CV] bagging_fraction=1, colsample_bytree=0.9, feature_fraction=1, learning_rate=0.1, max_depth=-1, min_data_in_leaf=30, num_leaves=103 
[CV]  bagging_fraction=1, colsample_bytree=0.9, feature_fraction=1, learning_rate=0.1, max_depth=-1, min_data_in_leaf=30, num_leaves=103, score=0.7006457623591666, total=   4.1s
[CV] bagging_fraction=1, colsample_bytree=0.9, feature_fraction=1, learning_rate=0.1, max_depth=-1, min_data_in_leaf=20, num_leaves=91 
[CV]  bagging_fraction=1, colsample_bytree=0.9, feature_

[CV]  bagging_fraction=1, colsample_bytree=0.9, feature_fraction=1, learning_rate=0.05, max_depth=-1, min_data_in_leaf=30, num_leaves=103, score=0.7075733831941108, total=   4.6s
[CV] bagging_fraction=1, colsample_bytree=0.9, feature_fraction=1, learning_rate=0.05, max_depth=-1, min_data_in_leaf=20, num_leaves=91 
[CV]  bagging_fraction=1, colsample_bytree=0.9, feature_fraction=1, learning_rate=0.05, max_depth=-1, min_data_in_leaf=20, num_leaves=91, score=0.7041218275219776, total=   4.1s
[CV] bagging_fraction=1, colsample_bytree=0.9, feature_fraction=1, learning_rate=0.05, max_depth=-1, min_data_in_leaf=20, num_leaves=91 
[CV]  bagging_fraction=1, colsample_bytree=0.9, feature_fraction=1, learning_rate=0.05, max_depth=-1, min_data_in_leaf=20, num_leaves=91, score=0.7073867156052484, total=   4.1s
[CV] bagging_fraction=1, colsample_bytree=0.9, feature_fraction=1, learning_rate=0.05, max_depth=-1, min_data_in_leaf=20, num_leaves=91 
[CV]  bagging_fraction=1, colsample_bytree=0.9, featur

[CV]  bagging_fraction=1, colsample_bytree=0.9, feature_fraction=0.3, learning_rate=0.1, max_depth=-1, min_data_in_leaf=20, num_leaves=91, score=0.7047668856226255, total=   2.3s
[CV] bagging_fraction=1, colsample_bytree=0.9, feature_fraction=0.3, learning_rate=0.1, max_depth=-1, min_data_in_leaf=20, num_leaves=91 
[CV]  bagging_fraction=1, colsample_bytree=0.9, feature_fraction=0.3, learning_rate=0.1, max_depth=-1, min_data_in_leaf=20, num_leaves=91, score=0.7063195628014737, total=   2.3s
[CV] bagging_fraction=1, colsample_bytree=0.9, feature_fraction=0.3, learning_rate=0.1, max_depth=-1, min_data_in_leaf=20, num_leaves=103 
[CV]  bagging_fraction=1, colsample_bytree=0.9, feature_fraction=0.3, learning_rate=0.1, max_depth=-1, min_data_in_leaf=20, num_leaves=103, score=0.7003593215339692, total=   2.4s
[CV] bagging_fraction=1, colsample_bytree=0.9, feature_fraction=0.3, learning_rate=0.1, max_depth=-1, min_data_in_leaf=20, num_leaves=103 
[CV]  bagging_fraction=1, colsample_bytree=0.9

[CV]  bagging_fraction=1, colsample_bytree=0.9, feature_fraction=0.3, learning_rate=0.05, max_depth=-1, min_data_in_leaf=20, num_leaves=103, score=0.7077694727551888, total=   2.7s
[CV] bagging_fraction=1, colsample_bytree=0.9, feature_fraction=0.3, learning_rate=0.05, max_depth=-1, min_data_in_leaf=20, num_leaves=103 
[CV]  bagging_fraction=1, colsample_bytree=0.9, feature_fraction=0.3, learning_rate=0.05, max_depth=-1, min_data_in_leaf=20, num_leaves=103, score=0.7113577516615237, total=   2.7s
[CV] bagging_fraction=1, colsample_bytree=0.9, feature_fraction=0.3, learning_rate=0.05, max_depth=-1, min_data_in_leaf=20, num_leaves=103 
[CV]  bagging_fraction=1, colsample_bytree=0.9, feature_fraction=0.3, learning_rate=0.05, max_depth=-1, min_data_in_leaf=20, num_leaves=103, score=0.7117290779963219, total=   2.8s
[CV] bagging_fraction=1, colsample_bytree=0.5, feature_fraction=0.6, learning_rate=0.1, max_depth=19, min_data_in_leaf=30, num_leaves=91 
[CV]  bagging_fraction=1, colsample_byt

[CV]  bagging_fraction=1, colsample_bytree=0.5, feature_fraction=0.6, learning_rate=0.1, max_depth=-1, min_data_in_leaf=20, num_leaves=103, score=0.7006247415099763, total=   3.3s
[CV] bagging_fraction=1, colsample_bytree=0.5, feature_fraction=0.6, learning_rate=0.05, max_depth=19, min_data_in_leaf=30, num_leaves=91 
[CV]  bagging_fraction=1, colsample_bytree=0.5, feature_fraction=0.6, learning_rate=0.05, max_depth=19, min_data_in_leaf=30, num_leaves=91, score=0.7071806543429324, total=   3.3s
[CV] bagging_fraction=1, colsample_bytree=0.5, feature_fraction=0.6, learning_rate=0.05, max_depth=19, min_data_in_leaf=30, num_leaves=91 
[CV]  bagging_fraction=1, colsample_bytree=0.5, feature_fraction=0.6, learning_rate=0.05, max_depth=19, min_data_in_leaf=30, num_leaves=91, score=0.7090270726577101, total=   3.2s
[CV] bagging_fraction=1, colsample_bytree=0.5, feature_fraction=0.6, learning_rate=0.05, max_depth=19, min_data_in_leaf=30, num_leaves=91 
[CV]  bagging_fraction=1, colsample_bytree=

[CV]  bagging_fraction=1, colsample_bytree=0.5, feature_fraction=1, learning_rate=0.1, max_depth=19, min_data_in_leaf=30, num_leaves=91, score=0.699560922377438, total=   3.8s
[CV] bagging_fraction=1, colsample_bytree=0.5, feature_fraction=1, learning_rate=0.1, max_depth=19, min_data_in_leaf=30, num_leaves=91 
[CV]  bagging_fraction=1, colsample_bytree=0.5, feature_fraction=1, learning_rate=0.1, max_depth=19, min_data_in_leaf=30, num_leaves=91, score=0.7012552373829166, total=   3.8s
[CV] bagging_fraction=1, colsample_bytree=0.5, feature_fraction=1, learning_rate=0.1, max_depth=19, min_data_in_leaf=30, num_leaves=103 
[CV]  bagging_fraction=1, colsample_bytree=0.5, feature_fraction=1, learning_rate=0.1, max_depth=19, min_data_in_leaf=30, num_leaves=103, score=0.6973462957404778, total=   3.9s
[CV] bagging_fraction=1, colsample_bytree=0.5, feature_fraction=1, learning_rate=0.1, max_depth=19, min_data_in_leaf=30, num_leaves=103 
[CV]  bagging_fraction=1, colsample_bytree=0.5, feature_fra

[CV]  bagging_fraction=1, colsample_bytree=0.5, feature_fraction=1, learning_rate=0.05, max_depth=19, min_data_in_leaf=30, num_leaves=103, score=0.7071852228995912, total=   4.4s
[CV] bagging_fraction=1, colsample_bytree=0.5, feature_fraction=1, learning_rate=0.05, max_depth=19, min_data_in_leaf=30, num_leaves=103 
[CV]  bagging_fraction=1, colsample_bytree=0.5, feature_fraction=1, learning_rate=0.05, max_depth=19, min_data_in_leaf=30, num_leaves=103, score=0.707111630127002, total=   4.5s
[CV] bagging_fraction=1, colsample_bytree=0.5, feature_fraction=1, learning_rate=0.05, max_depth=19, min_data_in_leaf=20, num_leaves=91 
[CV]  bagging_fraction=1, colsample_bytree=0.5, feature_fraction=1, learning_rate=0.05, max_depth=19, min_data_in_leaf=20, num_leaves=91, score=0.7032823832091439, total=   4.2s
[CV] bagging_fraction=1, colsample_bytree=0.5, feature_fraction=1, learning_rate=0.05, max_depth=19, min_data_in_leaf=20, num_leaves=91 
[CV]  bagging_fraction=1, colsample_bytree=0.5, featu

[CV]  bagging_fraction=1, colsample_bytree=0.5, feature_fraction=0.3, learning_rate=0.1, max_depth=19, min_data_in_leaf=20, num_leaves=91, score=0.7011557043133186, total=   2.3s
[CV] bagging_fraction=1, colsample_bytree=0.5, feature_fraction=0.3, learning_rate=0.1, max_depth=19, min_data_in_leaf=20, num_leaves=91 
[CV]  bagging_fraction=1, colsample_bytree=0.5, feature_fraction=0.3, learning_rate=0.1, max_depth=19, min_data_in_leaf=20, num_leaves=91, score=0.7062785579252486, total=   2.3s
[CV] bagging_fraction=1, colsample_bytree=0.5, feature_fraction=0.3, learning_rate=0.1, max_depth=19, min_data_in_leaf=20, num_leaves=91 
[CV]  bagging_fraction=1, colsample_bytree=0.5, feature_fraction=0.3, learning_rate=0.1, max_depth=19, min_data_in_leaf=20, num_leaves=91, score=0.7055599645697709, total=   2.4s
[CV] bagging_fraction=1, colsample_bytree=0.5, feature_fraction=0.3, learning_rate=0.1, max_depth=19, min_data_in_leaf=20, num_leaves=103 
[CV]  bagging_fraction=1, colsample_bytree=0.5, 

[CV]  bagging_fraction=1, colsample_bytree=0.5, feature_fraction=0.3, learning_rate=0.05, max_depth=19, min_data_in_leaf=20, num_leaves=91, score=0.712881178215321, total=   2.6s
[CV] bagging_fraction=1, colsample_bytree=0.5, feature_fraction=0.3, learning_rate=0.05, max_depth=19, min_data_in_leaf=20, num_leaves=103 
[CV]  bagging_fraction=1, colsample_bytree=0.5, feature_fraction=0.3, learning_rate=0.05, max_depth=19, min_data_in_leaf=20, num_leaves=103, score=0.7081671565645071, total=   2.7s
[CV] bagging_fraction=1, colsample_bytree=0.5, feature_fraction=0.3, learning_rate=0.05, max_depth=19, min_data_in_leaf=20, num_leaves=103 
[CV]  bagging_fraction=1, colsample_bytree=0.5, feature_fraction=0.3, learning_rate=0.05, max_depth=19, min_data_in_leaf=20, num_leaves=103, score=0.711368076682157, total=   2.7s
[CV] bagging_fraction=1, colsample_bytree=0.5, feature_fraction=0.3, learning_rate=0.05, max_depth=19, min_data_in_leaf=20, num_leaves=103 
[CV]  bagging_fraction=1, colsample_bytr

[CV]  bagging_fraction=1, colsample_bytree=0.3, feature_fraction=0.6, learning_rate=0.1, max_depth=19, min_data_in_leaf=20, num_leaves=103, score=0.7012562958055337, total=   3.2s
[CV] bagging_fraction=1, colsample_bytree=0.3, feature_fraction=0.6, learning_rate=0.1, max_depth=19, min_data_in_leaf=20, num_leaves=103 
[CV]  bagging_fraction=1, colsample_bytree=0.3, feature_fraction=0.6, learning_rate=0.1, max_depth=19, min_data_in_leaf=20, num_leaves=103, score=0.7016415544380485, total=   3.3s
[CV] bagging_fraction=1, colsample_bytree=0.3, feature_fraction=0.6, learning_rate=0.1, max_depth=-1, min_data_in_leaf=30, num_leaves=91 
[CV]  bagging_fraction=1, colsample_bytree=0.3, feature_fraction=0.6, learning_rate=0.1, max_depth=-1, min_data_in_leaf=30, num_leaves=91, score=0.7015255935983072, total=   2.9s
[CV] bagging_fraction=1, colsample_bytree=0.3, feature_fraction=0.6, learning_rate=0.1, max_depth=-1, min_data_in_leaf=30, num_leaves=91 
[CV]  bagging_fraction=1, colsample_bytree=0.3

[CV]  bagging_fraction=1, colsample_bytree=0.3, feature_fraction=0.6, learning_rate=0.05, max_depth=-1, min_data_in_leaf=30, num_leaves=91, score=0.7066076465187935, total=   3.3s
[CV] bagging_fraction=1, colsample_bytree=0.3, feature_fraction=0.6, learning_rate=0.05, max_depth=-1, min_data_in_leaf=30, num_leaves=91 
[CV]  bagging_fraction=1, colsample_bytree=0.3, feature_fraction=0.6, learning_rate=0.05, max_depth=-1, min_data_in_leaf=30, num_leaves=91, score=0.7092565199607237, total=   3.3s
[CV] bagging_fraction=1, colsample_bytree=0.3, feature_fraction=0.6, learning_rate=0.05, max_depth=-1, min_data_in_leaf=30, num_leaves=91 
[CV]  bagging_fraction=1, colsample_bytree=0.3, feature_fraction=0.6, learning_rate=0.05, max_depth=-1, min_data_in_leaf=30, num_leaves=91, score=0.7089032300113415, total=   3.3s
[CV] bagging_fraction=1, colsample_bytree=0.3, feature_fraction=0.6, learning_rate=0.05, max_depth=-1, min_data_in_leaf=30, num_leaves=103 
[CV]  bagging_fraction=1, colsample_bytree

[CV]  bagging_fraction=1, colsample_bytree=0.3, feature_fraction=1, learning_rate=0.1, max_depth=-1, min_data_in_leaf=30, num_leaves=91, score=0.7026562505208862, total=   3.8s
[CV] bagging_fraction=1, colsample_bytree=0.3, feature_fraction=1, learning_rate=0.1, max_depth=-1, min_data_in_leaf=30, num_leaves=103 
[CV]  bagging_fraction=1, colsample_bytree=0.3, feature_fraction=1, learning_rate=0.1, max_depth=-1, min_data_in_leaf=30, num_leaves=103, score=0.6986278713424767, total=   4.0s
[CV] bagging_fraction=1, colsample_bytree=0.3, feature_fraction=1, learning_rate=0.1, max_depth=-1, min_data_in_leaf=30, num_leaves=103 
[CV]  bagging_fraction=1, colsample_bytree=0.3, feature_fraction=1, learning_rate=0.1, max_depth=-1, min_data_in_leaf=30, num_leaves=103, score=0.6985874651758258, total=   4.0s
[CV] bagging_fraction=1, colsample_bytree=0.3, feature_fraction=1, learning_rate=0.1, max_depth=-1, min_data_in_leaf=30, num_leaves=103 
[CV]  bagging_fraction=1, colsample_bytree=0.3, feature_

[CV]  bagging_fraction=1, colsample_bytree=0.3, feature_fraction=1, learning_rate=0.05, max_depth=-1, min_data_in_leaf=30, num_leaves=103, score=0.7070830779168753, total=   4.4s
[CV] bagging_fraction=1, colsample_bytree=0.3, feature_fraction=1, learning_rate=0.05, max_depth=-1, min_data_in_leaf=30, num_leaves=103 
[CV]  bagging_fraction=1, colsample_bytree=0.3, feature_fraction=1, learning_rate=0.05, max_depth=-1, min_data_in_leaf=30, num_leaves=103, score=0.7075733831941108, total=   4.5s
[CV] bagging_fraction=1, colsample_bytree=0.3, feature_fraction=1, learning_rate=0.05, max_depth=-1, min_data_in_leaf=20, num_leaves=91 
[CV]  bagging_fraction=1, colsample_bytree=0.3, feature_fraction=1, learning_rate=0.05, max_depth=-1, min_data_in_leaf=20, num_leaves=91, score=0.7041218275219776, total=   4.1s
[CV] bagging_fraction=1, colsample_bytree=0.3, feature_fraction=1, learning_rate=0.05, max_depth=-1, min_data_in_leaf=20, num_leaves=91 
[CV]  bagging_fraction=1, colsample_bytree=0.3, feat

[CV]  bagging_fraction=1, colsample_bytree=0.3, feature_fraction=0.3, learning_rate=0.1, max_depth=-1, min_data_in_leaf=20, num_leaves=91, score=0.7001471817439489, total=   2.3s
[CV] bagging_fraction=1, colsample_bytree=0.3, feature_fraction=0.3, learning_rate=0.1, max_depth=-1, min_data_in_leaf=20, num_leaves=91 
[CV]  bagging_fraction=1, colsample_bytree=0.3, feature_fraction=0.3, learning_rate=0.1, max_depth=-1, min_data_in_leaf=20, num_leaves=91, score=0.7047668856226255, total=   2.3s
[CV] bagging_fraction=1, colsample_bytree=0.3, feature_fraction=0.3, learning_rate=0.1, max_depth=-1, min_data_in_leaf=20, num_leaves=91 
[CV]  bagging_fraction=1, colsample_bytree=0.3, feature_fraction=0.3, learning_rate=0.1, max_depth=-1, min_data_in_leaf=20, num_leaves=91, score=0.7063195628014737, total=   2.3s
[CV] bagging_fraction=1, colsample_bytree=0.3, feature_fraction=0.3, learning_rate=0.1, max_depth=-1, min_data_in_leaf=20, num_leaves=103 
[CV]  bagging_fraction=1, colsample_bytree=0.3, 

[CV]  bagging_fraction=1, colsample_bytree=0.3, feature_fraction=0.3, learning_rate=0.05, max_depth=-1, min_data_in_leaf=20, num_leaves=91, score=0.7127098505542545, total=   2.6s
[CV] bagging_fraction=1, colsample_bytree=0.3, feature_fraction=0.3, learning_rate=0.05, max_depth=-1, min_data_in_leaf=20, num_leaves=103 
[CV]  bagging_fraction=1, colsample_bytree=0.3, feature_fraction=0.3, learning_rate=0.05, max_depth=-1, min_data_in_leaf=20, num_leaves=103, score=0.7077694727551888, total=   2.6s
[CV] bagging_fraction=1, colsample_bytree=0.3, feature_fraction=0.3, learning_rate=0.05, max_depth=-1, min_data_in_leaf=20, num_leaves=103 
[CV]  bagging_fraction=1, colsample_bytree=0.3, feature_fraction=0.3, learning_rate=0.05, max_depth=-1, min_data_in_leaf=20, num_leaves=103, score=0.7113577516615237, total=   2.7s
[CV] bagging_fraction=1, colsample_bytree=0.3, feature_fraction=0.3, learning_rate=0.05, max_depth=-1, min_data_in_leaf=20, num_leaves=103 
[CV]  bagging_fraction=1, colsample_by

GridSearchCV(cv=3, error_score='raise-deprecating',
       estimator=LGBMClassifier(bagging_freq=5, boosting_type='gbdt', class_weight=None,
        colsample_bytree=1.0, importance_type='split', learning_rate=0.1,
        max_depth=-1, metric='auc', min_child_samples=20,
        min_child_weight=0.001, min_split_gain=0.0, n_estimators=300,
        n_jobs=-1, num_leaves=31, objective='binary', random_state=None,
        reg_alpha=0.0, reg_lambda=0.0, silent=True, subsample=1.0,
        subsample_for_bin=200000, subsample_freq=0, verbose=50),
       fit_params=None, iid='warn', n_jobs=1,
       param_grid={'learning_rate': [0.1, 0.05], 'num_leaves': [91, 103], 'feature_fraction': [0.6, 1, 0.3], 'bagging_fraction': [0.8, 1], 'max_depth': [19, -1], 'min_data_in_leaf': [30, 20], 'colsample_bytree': [0.9, 0.5, 0.3]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring='roc_auc', verbose=100)

In [34]:
lgb_model.best_estimator_
# LGBMClassifier(bagging_fraction=1, bagging_freq=5, boosting_type='gbdt',
#         class_weight=None, colsample_bytree=0.9, feature_fraction=0.3,
#         importance_type='split', learning_rate=0.05, max_depth=19,
#         metric='auc', min_child_samples=20, min_child_weight=0.001,
#         min_data_in_leaf=20, min_split_gain=0.0, n_estimators=300,
#         n_jobs=-1, num_leaves=91, objective='binary', random_state=None,
#         reg_alpha=0.0, reg_lambda=0.0, silent=True, subsample=1.0,
#         subsample_for_bin=200000, subsample_freq=0, verbose=50)

LGBMClassifier(bagging_fraction=1, bagging_freq=5, boosting_type='gbdt',
        class_weight=None, colsample_bytree=0.9, feature_fraction=0.3,
        importance_type='split', learning_rate=0.05, max_depth=19,
        metric='auc', min_child_samples=20, min_child_weight=0.001,
        min_data_in_leaf=20, min_split_gain=0.0, n_estimators=300,
        n_jobs=-1, num_leaves=91, objective='binary', random_state=None,
        reg_alpha=0.0, reg_lambda=0.0, silent=True, subsample=1.0,
        subsample_for_bin=200000, subsample_freq=0, verbose=50)

In [35]:
lgb_model.best_score_
# 0.7109413879963548

0.7109413879963548

In [18]:
lgb_model = lgb.LGBMClassifier(max_depth=-1,
                                   n_estimators=2000,
                                   learning_rate=0.05,
                                   num_leaves=91,
                                   colsample_bytree=0.4,
                                   objective='binary', 
                                   n_jobs=-1)

In [19]:
counter = 0
for train_index, test_index in skf.split(train_ids, y_train):
    
    print('Fold {}\n'.format(counter + 1))
    
#     X_fit = vstack([train[train_index[i*m:(i+1)*m]] for i in range(train_index.shape[0] // m + 1)])
#     X_val = vstack([train[test_index[i*m:(i+1)*m]]  for i in range(test_index.shape[0] //  m + 1)])
#     X_fit, X_val = csr_matrix(X_fit, dtype='float32'), csr_matrix(X_val, dtype='float32')
    X_fit, X_val = X_train.iloc[train_index, :], X_train.iloc[test_index, :]
    y_fit, y_val = y_train.iloc[train_index], y_train.iloc[test_index]
    
                              
    #xgb_model = xgb.XGBClassifier(max_depth=6,
    #                              n_estimators=30000,
    #                              colsample_bytree=0.2,
    #                              learning_rate=0.1,
    #                              objective='binary:logistic', 
    #                              n_jobs=-1)
    
                               
    lgb_model.fit(X_fit, y_fit, eval_metric='auc', 
                  eval_set=[(X_val, y_val)], 
                  verbose=100, early_stopping_rounds=20)
                  
    #xgb_model.fit(X_fit, y_fit, eval_metric='auc', 
    #              eval_set=[(X_val, y_val)], 
    #              verbose=1000, early_stopping_rounds=300)

    #lgb_train_result[test_index] += lgb_model.predict_proba(X_val)[:,1]
    #xgb_train_result[test_index] += xgb_model.predict_proba(X_val)[:,1]
    
    del X_fit, X_val, y_fit, y_val, train_index, test_index
    gc.collect()
    
    counter += 1

Fold 1



KeyboardInterrupt: 

In [17]:
for i, j in zip(X_train.columns, lgb_model.feature_importances_):
    print(i, j)

IsBeta 0
RtpStateBitfield 1365
IsSxsPassiveMode 525
DefaultBrowsersIdentifier 1447
AVProductStatesIdentifier 8784
AVProductsInstalled 1449
AVProductsEnabled 751
HasTpm 191
CountryIdentifier 8252
CityIdentifier 5093
OrganizationIdentifier 1978
GeoNameIdentifier 4427
LocaleEnglishNameIdentifier 5066
OsBuild 1157
OsSuite 872
IsProtected 615
AutoSampleOptIn 5
SMode 624
IeVerIdentifier 2749
Firewall 636
UacLuaenable 367
Census_OEMNameIdentifier 3562
Census_OEMModelIdentifier 4840
Census_ProcessorCoreCount 1442
Census_ProcessorManufacturerIdentifier 280
Census_ProcessorModelIdentifier 5333
Census_PrimaryDiskTotalCapacity 3366
Census_SystemVolumeTotalCapacity 5609
Census_HasOpticalDiskDrive 492
Census_TotalPhysicalRAM 2240
Census_InternalPrimaryDiagonalDisplaySizeInInches 4819
Census_InternalPrimaryDisplayResolutionHorizontal 2152
Census_InternalPrimaryDisplayResolutionVertical 1984
Census_InternalBatteryNumberOfCharges 3044
Census_OSBuildNumber 1194
Census_OSBuildRevision 5395
Census_OSInsta

In [20]:
joblib.dump(lgb_model, '../saved_models/lgbc_model_0.pkl')

['../saved_models/lgbc_model_0.pkl']

In [5]:
model = joblib.load('../saved_models/lgbc_model_0.pkl')

In [23]:
del X_train
del y_train
gc.collect()

0

In [6]:
path = '../data/test_final_0'

In [7]:
allFiles = glob.glob(path + "/*.csv")

list_ = []
n = 0

for file_ in allFiles:
#     mask = file_['Platform_index'] != 0
    df = pd.read_csv(file_, dtype=dict_dtypes, low_memory=True)
    list_.append(df)
    if n%10==0:
        print(n)
    n+=1

test = pd.concat(list_, axis = 0, ignore_index = True)

0
10
20
30


In [9]:
test.shape

(7853253, 100)

In [8]:
sel_cols = [c for c in test.columns if c not in ['MachineIdentifier', 'HasDetections']]

In [10]:
X_test = test.loc[:, sel_cols]
X_machines = test.loc[:,'MachineIdentifier']
del test
gc.collect()

0

In [11]:
preds = model.predict_proba(X_test)

In [12]:
len(preds)

7853253

In [13]:
preds_1 = preds[:,1]

In [14]:
len(preds_1)

7853253

In [15]:
len(X_machines)

7853253

In [16]:
df_prds = pd.DataFrame({'MachineIdentifier': X_machines, 'HasDetections': preds_1})

In [17]:
df_prds.head()

Unnamed: 0,MachineIdentifier,HasDetections
0,000037d9ea7b7cfbf476b433586e5113,0.446491
1,000055ea914f7ef72f9ad26b5cdf4195,0.389861
2,00009cf494f93eab0732fe30814791ec,0.536218
3,00009dbf12762fca1d5d6624ca4a260c,0.482819
4,0000fae061a473c3ce173f5e545c36fe,0.590998


In [18]:
df_prds.to_csv('../submissions/lgb_model_0.csv', index=None)