In [1]:
import pandas as pd
from sklearn.pipeline import Pipeline
import dill
from sklearn.compose import ColumnTransformer, make_column_selector
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import FunctionTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
import logging
import gc
import dd as dd
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import cross_val_score
import numpy as np

#### Представим, что у нас есть пара сырых файлов паркет по которым нужно предсказать кредитную историю (сохранил 1 рандомный файл паркет из исходной выборки), у меня все прописано в модуле dd, который сразу энкодит и сохраняет датасет, который потом скармливается в модель и получает предикты (функция из модуля dd modify_data_pipe)

#### Промежуточные шаги по типу энкодинга не включены, так как изначально обрабатываются в функции dd.modify_data(), с помощью которой тренировочный фрейм был сохранен на этапе моделирования, предикт реализуется отдельной функцией

In [2]:
class PipeLine(object):
    
    def __init__(self, target, path, path_to_save_model):
        self.pipe = None
        self.target = target
        self.path = path
        self.path_to_save = path_to_save_model
        self.best_score = None
        self.columns = None
        
    def print_info(self):
        print(self.target, self.path)
        
    def pipeline_fit(self) -> None:
        
        print(f'Starting fitting for {self.target} in {self.path}')
        
        data = pd.read_csv(self.path, index_col=0)
        
        X = data.drop(self.target, axis=1)
        y = data[self.target]
        
        self.columns = list(X.columns.values)
        
        del data
        
        numerical_features = make_column_selector(dtype_include=['int64', 'float64'])
        categorical_features = make_column_selector(dtype_include=object)
        
        model_filename_xgb = f'{self.path_to_save}/XGBClassifier_cr_sc.pkl'
#         model_filename_cat = f'{self.path_to_save}/CatClassifier_cr_sc.pkl'
#         model_filename_lgbm = f'{self.path_to_save}/LGBMClassifier_cr_sc.pkl'
        
        with open(model_filename_xgb, 'rb') as file:
            XGBClassifier = dill.load(file)

#         with open(model_filename_cat, 'rb') as file:
#             CatBoostClassifier = dill.load(file)

#         with open(model_filename_lgbm, 'rb') as file:
#             LGBMClassifier = dill.load(file)
            
        models = [
        XGBClassifier,
#         CatBoostClassifier,
#         LGBMClassifier
        ]
        
        best_score = .0
        best_pipe = None
        
        for model in models:
            
            pipe = Pipeline([
                ('imputer', SimpleImputer(strategy='most_frequent')),
                ('classifier', model)
            ])
            
            score = cross_val_score(pipe, X, y, cv=3, scoring='roc_auc')
            
            if score.mean() > best_score:
                best_score = score.mean()
                best_pipe = model
                
        print(f'Best Model: {best_pipe}, roc_auc_mean: {best_score}')
        
        fitting_best_pipe = best_pipe.fit(X, y)
        
        self.pipe = fitting_best_pipe
        
        print(f'Model {self.pipe} fitting done')
        
        model_filename_save = f'{self.path_to_save}/best_model_ever.pkl'
        
        with open(model_filename_save, 'wb') as file:
            dill.dump(self.pipe, file)
            
        return self
            
        
    def predict_data(self, path_to_predict_data, path_to_save, target_path, parts):
        
        predict_data = dd.modify_data_pipe(path_to_dataset=path_to_predict_data,
                              num_parts_to_preprocess_at_once=1,
                              num_parts_total=3,
                              save_to_path=path_to_save,
                              target_path=target_path,
                              path_to_save=path_to_save,
                              num_parts_to_read=parts)
        
        empty_frame = pd.DataFrame(columns=self.columns)
        
        predict_data_full = empty_frame.reindex(columns=self.columns, fill_value=0).append(predict_data, ignore_index=True).fillna(np.uint8(0))
        
        display(predict_data_full.head(5))
        
        print('Predict data shapes', predict_data_full.shape)
        
        predictions = self.pipe.predict(predict_data_full.drop('id', axis=1))
        
        predictions = pd.DataFrame(predictions, columns=['target_pred'])
        
        return pd.concat([predictions, predict_data_full], axis=1)
        

        
        
        
        

In [3]:
pipe = PipeLine(target='flag', path='train_data_for_pipe.csv', path_to_save_model='pickle_model')

In [4]:
best_pipe = pipe.pipeline_fit()

Starting fitting for flag in train_data_for_pipe.csv


  mask |= (ar1 == a)


Best Model: XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=0.8, early_stopping_rounds=None,
              enable_categorical=False, eval_metric='auc', feature_types=None,
              gamma=None, gpu_id=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=0.1, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=4, max_leaves=None,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              n_estimators=1082, n_jobs=None, num_parallel_tree=None,
              predictor=None, random_state=42, ...), roc_auc_mean: 0.7670844420727212
Model XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=0.8, early_stopping_rounds=None,
              e

In [5]:
predictions = best_pipe.predict_data(path_to_predict_data='data_for_pipe/train_data',
                              target_path='data_for_pipe/process_data',
                              path_to_save='data_for_pipe/preprocess_train_data',
                                    parts=3)

Modify data!


Transforming transactions data:   0%|          | 0/3 [00:00<?, ?it/s]

['data_for_pipe/train_data\\train_data_11.pq', 'data_for_pipe/train_data\\train_data_2.pq', 'data_for_pipe/train_data\\train_data_5.pq']
Reading chunks!
Read chunk: data_for_pipe/train_data\train_data_11.pq


Reading chunks with pandas!:   0%|          | 0/1 [00:00<?, ?it/s]

Chunk data_for_pipe/train_data\train_data_11.pq
['data_for_pipe/train_data\\train_data_11.pq', 'data_for_pipe/train_data\\train_data_2.pq', 'data_for_pipe/train_data\\train_data_5.pq']
Reading chunks!
Read chunk: data_for_pipe/train_data\train_data_2.pq


Reading chunks with pandas!:   0%|          | 0/1 [00:00<?, ?it/s]

Chunk data_for_pipe/train_data\train_data_2.pq
['data_for_pipe/train_data\\train_data_11.pq', 'data_for_pipe/train_data\\train_data_2.pq', 'data_for_pipe/train_data\\train_data_5.pq']
Reading chunks!
Read chunk: data_for_pipe/train_data\train_data_5.pq


Reading chunks with pandas!:   0%|          | 0/1 [00:00<?, ?it/s]

Chunk data_for_pipe/train_data\train_data_5.pq


Transforming transactions data:   0%|          | 0/3 [00:00<?, ?it/s]

['data_for_pipe/train_data\\train_data_11.pq', 'data_for_pipe/train_data\\train_data_2.pq', 'data_for_pipe/train_data\\train_data_5.pq']
Reading chunks!
Read chunk: data_for_pipe/train_data\train_data_11.pq


Reading chunks with pandas!:   0%|          | 0/1 [00:00<?, ?it/s]

Chunk data_for_pipe/train_data\train_data_11.pq
['data_for_pipe/train_data\\train_data_11.pq', 'data_for_pipe/train_data\\train_data_2.pq', 'data_for_pipe/train_data\\train_data_5.pq']
Reading chunks!
Read chunk: data_for_pipe/train_data\train_data_2.pq


Reading chunks with pandas!:   0%|          | 0/1 [00:00<?, ?it/s]

Chunk data_for_pipe/train_data\train_data_2.pq
['data_for_pipe/train_data\\train_data_11.pq', 'data_for_pipe/train_data\\train_data_2.pq', 'data_for_pipe/train_data\\train_data_5.pq']
Reading chunks!
Read chunk: data_for_pipe/train_data\train_data_5.pq


Reading chunks with pandas!:   0%|          | 0/1 [00:00<?, ?it/s]

Chunk data_for_pipe/train_data\train_data_5.pq
['data_for_pipe/preprocess_train_data\\processed_chunk_count_agg_000.parquet', 'data_for_pipe/preprocess_train_data\\processed_chunk_count_agg_001.parquet', 'data_for_pipe/preprocess_train_data\\processed_chunk_count_agg_002.parquet']
Reading chunks!


Reading chunks in COUNT frames:   0%|          | 0/3 [00:00<?, ?it/s]

Reading chunks in WEIGHT frames:   0%|          | 0/3 [00:00<?, ?it/s]

Фрейм сохранен по пути data_for_pipe/process_data\train_data_w_target.csv


Unnamed: 0,pre_since_opened_0_x,pre_since_opened_1_x,pre_since_opened_2_x,pre_since_opened_3_x,pre_since_opened_4_x,pre_since_opened_5_x,pre_since_opened_6_x,pre_since_opened_7_x,pre_since_opened_8_x,pre_since_opened_9_x,pre_since_opened_10_x,pre_since_opened_11_x,pre_since_opened_12_x,pre_since_opened_13_x,pre_since_opened_14_x,pre_since_opened_15_x,pre_since_opened_16_x,pre_since_opened_17_x,pre_since_opened_18_x,pre_since_opened_19_x,pre_since_confirmed_0_x,pre_since_confirmed_1_x,pre_since_confirmed_2_x,pre_since_confirmed_3_x,pre_since_confirmed_4_x,pre_since_confirmed_5_x,pre_since_confirmed_6_x,pre_since_confirmed_7_x,pre_since_confirmed_8_x,pre_since_confirmed_9_x,pre_since_confirmed_10_x,pre_since_confirmed_11_x,pre_since_confirmed_12_x,pre_since_confirmed_13_x,pre_since_confirmed_14_x,pre_since_confirmed_15_x,pre_since_confirmed_16_x,pre_since_confirmed_17_x,pre_pterm_0_x,pre_pterm_1_x,pre_pterm_2_x,pre_pterm_3_x,pre_pterm_4_x,pre_pterm_5_x,pre_pterm_6_x,pre_pterm_7_x,pre_pterm_8_x,pre_pterm_9_x,pre_pterm_10_x,pre_pterm_11_x,pre_pterm_12_x,pre_pterm_13_x,pre_pterm_14_x,pre_pterm_15_x,pre_pterm_16_x,pre_pterm_17_x,pre_fterm_0_x,pre_fterm_1_x,pre_fterm_2_x,pre_fterm_3_x,pre_fterm_4_x,pre_fterm_5_x,pre_fterm_6_x,pre_fterm_7_x,pre_fterm_8_x,pre_fterm_9_x,pre_fterm_10_x,pre_fterm_11_x,pre_fterm_12_x,pre_fterm_13_x,pre_fterm_14_x,pre_fterm_15_x,pre_fterm_16_x,pre_till_pclose_0_x,pre_till_pclose_1_x,pre_till_pclose_2_x,pre_till_pclose_3_x,pre_till_pclose_4_x,pre_till_pclose_5_x,pre_till_pclose_6_x,pre_till_pclose_7_x,pre_till_pclose_8_x,pre_till_pclose_9_x,pre_till_pclose_10_x,pre_till_pclose_11_x,pre_till_pclose_12_x,pre_till_pclose_13_x,pre_till_pclose_14_x,pre_till_pclose_15_x,pre_till_pclose_16_x,pre_till_fclose_0_x,pre_till_fclose_1_x,pre_till_fclose_2_x,pre_till_fclose_3_x,pre_till_fclose_4_x,pre_till_fclose_5_x,pre_till_fclose_6_x,pre_till_fclose_7_x,pre_till_fclose_8_x,pre_till_fclose_9_x,pre_till_fclose_10_x,pre_till_fclose_11_x,pre_till_fclose_12_x,pre_till_fclose_13_x,pre_till_fclose_14_x,pre_till_fclose_15_x,pre_loans_credit_limit_0_x,pre_loans_credit_limit_1_x,pre_loans_credit_limit_2_x,pre_loans_credit_limit_3_x,pre_loans_credit_limit_4_x,pre_loans_credit_limit_5_x,pre_loans_credit_limit_6_x,pre_loans_credit_limit_7_x,pre_loans_credit_limit_8_x,pre_loans_credit_limit_9_x,pre_loans_credit_limit_10_x,pre_loans_credit_limit_11_x,pre_loans_credit_limit_12_x,pre_loans_credit_limit_13_x,pre_loans_credit_limit_14_x,pre_loans_credit_limit_15_x,pre_loans_credit_limit_16_x,pre_loans_credit_limit_17_x,pre_loans_credit_limit_18_x,pre_loans_credit_limit_19_x,pre_loans_next_pay_summ_0_x,pre_loans_next_pay_summ_1_x,pre_loans_next_pay_summ_2_x,pre_loans_next_pay_summ_3_x,pre_loans_next_pay_summ_4_x,pre_loans_next_pay_summ_5_x,pre_loans_next_pay_summ_6_x,pre_loans_outstanding_1_x,pre_loans_outstanding_2_x,pre_loans_outstanding_3_x,pre_loans_outstanding_4_x,pre_loans_outstanding_5_x,pre_loans_total_overdue_0_x,pre_loans_max_overdue_sum_1_x,pre_loans_max_overdue_sum_2_x,pre_loans_max_overdue_sum_3_x,pre_loans_credit_cost_rate_0_x,pre_loans_credit_cost_rate_1_x,pre_loans_credit_cost_rate_2_x,pre_loans_credit_cost_rate_3_x,pre_loans_credit_cost_rate_4_x,pre_loans_credit_cost_rate_5_x,pre_loans_credit_cost_rate_6_x,pre_loans_credit_cost_rate_7_x,pre_loans_credit_cost_rate_8_x,pre_loans_credit_cost_rate_9_x,pre_loans_credit_cost_rate_10_x,pre_loans_credit_cost_rate_11_x,pre_loans_credit_cost_rate_12_x,pre_loans_credit_cost_rate_13_x,pre_loans5_0_x,pre_loans5_2_x,pre_loans5_3_x,pre_loans5_5_x,pre_loans5_6_x,pre_loans5_7_x,pre_loans5_13_x,pre_loans5_16_x,pre_loans530_0_x,pre_loans530_1_x,pre_loans530_2_x,pre_loans530_3_x,pre_loans530_4_x,pre_loans530_6_x,pre_loans530_7_x,pre_loans530_10_x,pre_loans530_11_x,pre_loans530_12_x,pre_loans530_13_x,pre_loans530_14_x,pre_loans530_15_x,pre_loans530_16_x,pre_loans530_18_x,pre_loans3060_2_x,pre_loans3060_5_x,pre_loans3060_7_x,pre_loans3060_8_x,pre_loans3060_9_x,pre_loans6090_1_x,pre_loans6090_2_x,pre_loans6090_4_x,pre_loans90_8_x,pre_loans90_13_x,pre_loans90_14_x,pre_loans90_19_x,is_zero_loans5_0_x,is_zero_loans5_1_x,is_zero_loans530_0_x,is_zero_loans530_1_x,is_zero_loans3060_0_x,is_zero_loans3060_1_x,is_zero_loans6090_0_x,is_zero_loans6090_1_x,is_zero_loans90_0_x,is_zero_loans90_1_x,pre_util_0_x,pre_util_1_x,pre_util_2_x,pre_util_3_x,pre_util_4_x,pre_util_5_x,pre_util_6_x,pre_util_7_x,pre_util_8_x,pre_util_9_x,pre_util_10_x,pre_util_11_x,pre_util_12_x,pre_util_13_x,pre_util_14_x,pre_util_15_x,pre_util_16_x,pre_util_17_x,pre_util_18_x,pre_util_19_x,pre_over2limit_0_x,pre_over2limit_1_x,pre_over2limit_2_x,pre_over2limit_3_x,pre_over2limit_4_x,pre_over2limit_5_x,pre_over2limit_6_x,pre_over2limit_7_x,pre_over2limit_8_x,pre_over2limit_9_x,pre_over2limit_10_x,pre_over2limit_11_x,pre_over2limit_12_x,pre_over2limit_13_x,pre_over2limit_14_x,pre_over2limit_15_x,pre_over2limit_16_x,pre_over2limit_17_x,pre_over2limit_18_x,pre_over2limit_19_x,pre_maxover2limit_0_x,pre_maxover2limit_1_x,pre_maxover2limit_2_x,pre_maxover2limit_3_x,pre_maxover2limit_4_x,pre_maxover2limit_5_x,pre_maxover2limit_6_x,pre_maxover2limit_7_x,pre_maxover2limit_8_x,...,pre_loans530_10_y,pre_loans530_11_y,pre_loans530_12_y,pre_loans530_13_y,pre_loans530_14_y,pre_loans530_15_y,pre_loans530_16_y,pre_loans530_18_y,pre_loans3060_2_y,pre_loans3060_5_y,pre_loans3060_7_y,pre_loans3060_8_y,pre_loans3060_9_y,pre_loans6090_1_y,pre_loans6090_2_y,pre_loans6090_4_y,pre_loans90_8_y,pre_loans90_13_y,pre_loans90_14_y,pre_loans90_19_y,is_zero_loans5_0_y,is_zero_loans5_1_y,is_zero_loans530_0_y,is_zero_loans530_1_y,is_zero_loans3060_0_y,is_zero_loans3060_1_y,is_zero_loans6090_0_y,is_zero_loans6090_1_y,is_zero_loans90_0_y,is_zero_loans90_1_y,pre_util_0_y,pre_util_1_y,pre_util_2_y,pre_util_3_y,pre_util_4_y,pre_util_5_y,pre_util_6_y,pre_util_7_y,pre_util_8_y,pre_util_9_y,pre_util_10_y,pre_util_11_y,pre_util_12_y,pre_util_13_y,pre_util_14_y,pre_util_15_y,pre_util_16_y,pre_util_17_y,pre_util_18_y,pre_util_19_y,pre_over2limit_0_y,pre_over2limit_1_y,pre_over2limit_2_y,pre_over2limit_3_y,pre_over2limit_4_y,pre_over2limit_5_y,pre_over2limit_6_y,pre_over2limit_7_y,pre_over2limit_8_y,pre_over2limit_9_y,pre_over2limit_10_y,pre_over2limit_11_y,pre_over2limit_12_y,pre_over2limit_13_y,pre_over2limit_14_y,pre_over2limit_15_y,pre_over2limit_16_y,pre_over2limit_17_y,pre_over2limit_18_y,pre_over2limit_19_y,pre_maxover2limit_0_y,pre_maxover2limit_1_y,pre_maxover2limit_2_y,pre_maxover2limit_3_y,pre_maxover2limit_4_y,pre_maxover2limit_5_y,pre_maxover2limit_6_y,pre_maxover2limit_7_y,pre_maxover2limit_8_y,pre_maxover2limit_9_y,pre_maxover2limit_10_y,pre_maxover2limit_11_y,pre_maxover2limit_12_y,pre_maxover2limit_13_y,pre_maxover2limit_14_y,pre_maxover2limit_15_y,pre_maxover2limit_16_y,pre_maxover2limit_17_y,pre_maxover2limit_18_y,pre_maxover2limit_19_y,is_zero_util_0_y,is_zero_util_1_y,is_zero_over2limit_0_y,is_zero_over2limit_1_y,is_zero_maxover2limit_0_y,is_zero_maxover2limit_1_y,enc_paym_0_0_y,enc_paym_0_1_y,enc_paym_0_2_y,enc_paym_0_3_y,enc_paym_1_0_y,enc_paym_1_1_y,enc_paym_1_2_y,enc_paym_1_3_y,enc_paym_2_0_y,enc_paym_2_1_y,enc_paym_2_2_y,enc_paym_2_3_y,enc_paym_3_0_y,enc_paym_3_1_y,enc_paym_3_2_y,enc_paym_3_3_y,enc_paym_4_0_y,enc_paym_4_1_y,enc_paym_4_2_y,enc_paym_4_3_y,enc_paym_5_0_y,enc_paym_5_1_y,enc_paym_5_2_y,enc_paym_5_3_y,enc_paym_6_0_y,enc_paym_6_1_y,enc_paym_6_2_y,enc_paym_6_3_y,enc_paym_7_0_y,enc_paym_7_1_y,enc_paym_7_2_y,enc_paym_7_3_y,enc_paym_8_0_y,enc_paym_8_1_y,enc_paym_8_2_y,enc_paym_8_3_y,enc_paym_9_0_y,enc_paym_9_1_y,enc_paym_9_2_y,enc_paym_9_3_y,enc_paym_10_0_y,enc_paym_10_1_y,enc_paym_10_2_y,enc_paym_10_3_y,enc_paym_11_1_y,enc_paym_11_2_y,enc_paym_11_3_y,enc_paym_11_4_y,enc_paym_12_0_y,enc_paym_12_1_y,enc_paym_12_2_y,enc_paym_12_3_y,enc_paym_13_0_y,enc_paym_13_1_y,enc_paym_13_2_y,enc_paym_13_3_y,enc_paym_14_0_y,enc_paym_14_1_y,enc_paym_14_2_y,enc_paym_14_3_y,enc_paym_15_0_y,enc_paym_15_1_y,enc_paym_15_2_y,enc_paym_15_3_y,enc_paym_16_0_y,enc_paym_16_1_y,enc_paym_16_2_y,enc_paym_16_3_y,enc_paym_17_0_y,enc_paym_17_1_y,enc_paym_17_2_y,enc_paym_17_3_y,enc_paym_18_0_y,enc_paym_18_1_y,enc_paym_18_2_y,enc_paym_18_3_y,enc_paym_19_0_y,enc_paym_19_1_y,enc_paym_19_2_y,enc_paym_19_3_y,enc_paym_20_1_y,enc_paym_20_2_y,enc_paym_20_3_y,enc_paym_20_4_y,enc_paym_21_0_y,enc_paym_21_1_y,enc_paym_21_2_y,enc_paym_21_3_y,enc_paym_22_0_y,enc_paym_22_1_y,enc_paym_22_2_y,enc_paym_22_3_y,enc_paym_23_0_y,enc_paym_23_1_y,enc_paym_23_2_y,enc_paym_23_3_y,enc_paym_24_1_y,enc_paym_24_2_y,enc_paym_24_3_y,enc_paym_24_4_y,enc_loans_account_holder_type_0_y,enc_loans_account_holder_type_1_y,enc_loans_account_holder_type_2_y,enc_loans_account_holder_type_3_y,enc_loans_account_holder_type_4_y,enc_loans_account_holder_type_5_y,enc_loans_account_holder_type_6_y,enc_loans_credit_status_0_y,enc_loans_credit_status_1_y,enc_loans_credit_status_2_y,enc_loans_credit_status_3_y,enc_loans_credit_status_4_y,enc_loans_credit_status_5_y,enc_loans_credit_status_6_y,enc_loans_credit_type_0_y,enc_loans_credit_type_1_y,enc_loans_credit_type_2_y,enc_loans_credit_type_3_y,enc_loans_credit_type_4_y,enc_loans_credit_type_5_y,enc_loans_account_cur_0_y,enc_loans_account_cur_1_y,enc_loans_account_cur_2_y,enc_loans_account_cur_3_y,pclose_flag_0_y,pclose_flag_1_y,fclose_flag_0_y,fclose_flag_1_y,history_lenght,pre_loans5_1_y,pre_loans530_19_y,pre_loans3060_1_y,pre_loans6090_3_y,pre_loans90_2_y,pre_loans5_8_y,pre_loans5_9_y,pre_loans5_11_y,pre_loans530_5_y,pre_loans3060_6_y,pre_loans90_3_y,pre_loans90_10_y,enc_loans_credit_type_6_y,enc_loans_credit_type_7_y,pre_loans530_8_y,pre_loans530_9_y,pre_loans_total_overdue_1_y,pre_loans_max_overdue_sum_0_y,pre_loans3060_0_y,pre_loans3060_3_y,pre_loans3060_4_y,pre_loans6090_0_y,pre_loans5_10_y,pre_loans530_17_y,id
0,0,1,1,0,0,0,0,1,0,4,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,1,0,0,0,0,0,0.0,1,0,0,0,1,0,1,0,1,0,0,0,0,1,0,0,2,1,0,0,0,1,0,0,0,0,0,2,2,0,0,0,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0,2,0,2,0,1,0,0,0,0,0,0,2,0,0,0,2,0,0,3,0,0,0,0,0,0,0,1,1,1,0,0,0,1,1,1,0,0,1,0,0,0,0,0,0,1,2,1,1,0,2,0,1,4,0,2,7,0,7,0,1,1,1,3,0,0,0,0,0,0,0,0,0,1,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,7,0,0,0,0,0,7,7,0,0,0,0,7,3,4,0,7,0,7,0,7,0,0,0,0,0,0,0,0,0,1,1,2,1,0,0,0,1,0,1,0,0,0,6,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0001,0.0,0.0,1.0001,0.0,0.0,0.0,0.0,0.0,1.0001,1.0001,0.0,0.0,0.0,0.0,1.0001,0.3871,0.613,0.0,1.0001,0.0,1.0001,0.0,1.0001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.089,0.2915,0.1836,0.0191,0.0,0.0,0.0,0.182,0.0,0.2349,0.0,0.0,0.0,0.9111,0.0,0.0,0.089,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.089,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9111,0.0,0.0,0.8181,0.182,0.089,0.9111,0.089,0.9111,1.0001,0.0,0.0,0.0,1.0001,0.0,0.0,0.0,0.8479,0.1522,0.0,0.0,1.0001,0.0,0.0,0.0,0.7652,0.2349,0.0,0.0,1.0001,0.0,0.0,0.0,1.0001,0.0,0.0,0.0,0.6895,0.0191,0.0,0.2915,0.5564,0.1522,0.0,0.2915,0.2917,0.0,0.0,0.7084,0.2027,0.0,0.0,0.7974,0.0696,0.0,0.0,0.9305,0.0696,0.0,0.0,0.9305,0.0696,0.0,0.0,0.9305,0.0696,0.0,0.0,0.9305,0.0191,0.0,0.0,0.981,0.0191,0.0,0.0,0.981,0.0,0.0,0.0,1.0001,0.0,0.0,0.0,1.0001,0.0,0.0,0.0,1.0001,0.0,0.0,0.0,1.0001,0.0,0.0,0.0,1.0001,0.0,0.0,0.0,1.0001,0.0,0.0,0.0,1.0001,0.0,0.0,0.0,1.0001,0.0,1.0001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3215,0.3871,0.0,0.0,1.0001,0.0,0.0,0.9111,0.089,0.6196,0.3805,7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2915,0.0,0.0,0.0,0.0,0,0,0,0,0,0,2750000.0
1,2,0,1,1,1,0,0,1,0,0,0,1,0,0,1,2,0,0,0,0,0,1,1,0,2,0,0,0,1,0,1,1,1,0,0,0.0,0,2,0,0,0,0,0,0,5,1,1,0,0,0,0,1,1,1,0,0,1,4,1,0,0,0,0,1,0,0,0,0,0,1,1,0,1,5,0,1,0,0,0,0,0,0,1,1,1,0,0,0,0,1,0,1,0,0,5,0,0,1,1,0,0,1,1,0,0,0,0,0,2,0,0,1,0,1,0,1,0,1,0,0,3,1,0,0,0,0,0,1,8,0,0,1,0,1,0,8,0,1,10,0,10,0,0,0,7,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0,10,0,0,0,0,0,10,10,0,0,0,0,10,0,10,0,10,0,10,0,10,0,1,0,0,0,0,0,0,0,3,0,0,0,0,0,0,5,0,1,0,0,0,7,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.1564,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1431,0.0,0.0,0.0,0.0,0.0,0.0,0.4867,0.0,0.2138,0.0,0.0,0.0,0.8569,0.0,0.0,0.1431,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1431,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.8569,0.0,0.0,0.5133,0.4867,0.1431,0.8569,0.1431,0.8569,1.0,0.0,0.0,0.0,0.7919,0.0,0.0,0.2081,0.7523,0.0,0.0,0.2477,0.7523,0.0,0.0,0.2477,0.7523,0.0,0.0,0.2477,0.7523,0.0,0.0,0.2477,0.5632,0.0,0.0,0.4368,0.3494,0.0,0.0,0.6506,0.3494,0.0,0.0,0.6506,0.3409,0.0,0.0,0.6591,0.1564,0.0,0.0,0.8436,0.1564,0.0,0.0,0.8436,0.1564,0.0,0.0,0.8436,0.1564,0.0,0.0,0.8436,0.1564,0.0,0.0,0.8436,0.1564,0.0,0.0,0.8436,0.1564,0.0,0.0,0.8436,0.1564,0.0,0.0,0.8436,0.1564,0.0,0.0,0.8436,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4098,0.5902,0.0,0.0,0.0,0.2157,0.0,0.0,0.1431,0.6412,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,2750001.0
2,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0.0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,0,1,0,2,0,2,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,0,0,2,2,0,0,0,0,2,0,2,0,2,0,2,0,2,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.7252,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2748,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.2748,0.0,0.0,0.7252,0.2748,0.0,0.0,0.7252,0.2748,0.0,0.0,0.7252,0.2748,0.0,0.0,0.7252,0.2748,0.0,0.0,0.7252,0.2748,0.0,0.0,0.7252,0.2748,0.0,0.0,0.7252,0.2748,0.0,0.0,0.7252,0.2748,0.0,0.0,0.7252,0.2748,0.0,0.0,0.7252,0.2748,0.0,0.0,0.7252,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7252,0.2748,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.7252,0.2748,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,2750002.0
3,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0.0,0,0,0,0,0,0,0,0,1,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,2,0,0,1,0,1,0,1,1,0,3,0,3,0,0,0,0,0,2,0,0,0,0,0,0,1,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,3,0,0,0,0,0,3,3,0,0,0,1,2,0,3,0,3,0,3,0,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0001,0.0,0.0,1.0001,0.0,0.0,0.0,0.0,0.0,1.0001,1.0001,0.0,0.0,0.0,0.3182,0.6819,0.0,1.0001,0.0,1.0001,0.0,1.0001,0.0,1.0001,0.0,0.0,0.0,0.5613,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1206,0.0,0.3182,0.0,0.0,0.0,1.0001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0001,0.0,0.0,0.8795,0.1206,0.0,1.0001,0.0,1.0001,0.6819,0.3182,0.0,0.0,0.1206,0.3182,0.0,0.5613,0.1206,0.3182,0.0,0.5613,0.4388,0.0,0.0,0.5613,0.4388,0.0,0.0,0.5613,0.4388,0.0,0.0,0.5613,0.4388,0.0,0.0,0.5613,0.3182,0.0,0.0,0.6819,0.3182,0.0,0.0,0.6819,0.3182,0.0,0.0,0.6819,0.3182,0.0,0.0,0.6819,0.3182,0.0,0.0,0.6819,0.3182,0.0,0.0,0.6819,0.3182,0.0,0.0,0.6819,0.3182,0.0,0.0,0.6819,0.3182,0.0,0.0,0.6819,0.3182,0.0,0.0,0.6819,0.3182,0.0,0.0,0.6819,0.3182,0.0,0.0,0.6819,0.3182,0.0,0.0,0.6819,0.3182,0.0,0.0,0.6819,0.3182,0.0,0.0,0.6819,0.3182,0.0,0.0,0.6819,0.3182,0.0,0.0,0.6819,0.3182,0.0,0.0,0.6819,0.0,1.0001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4388,0.5613,0.0,0.0,1.0001,0.0,0.0,1.0001,0.0,0.5613,0.4388,3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,2750003.0
4,1,0,0,1,1,1,0,0,0,0,0,1,0,0,0,0,0,3,1,0,0,0,1,0,0,0,0,0,1,0,0,2,2,1,1,0.0,0,1,1,0,1,2,0,0,0,2,1,1,0,0,0,0,0,0,1,0,0,1,0,0,0,3,2,0,0,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,2,0,1,0,0,3,1,0,0,0,1,1,1,1,0,0,1,0,3,0,0,0,0,0,0,1,0,0,1,0,1,1,1,1,0,0,0,0,0,0,3,0,0,0,9,0,0,0,0,0,0,9,0,0,9,0,9,0,1,1,1,0,2,0,0,1,0,2,1,0,0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0,9,0,0,0,0,0,9,9,0,0,0,0,9,2,7,0,9,0,9,0,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0001,0.0,0.0,1.0001,0.0,0.0,0.0,0.0,0.0,1.0001,1.0001,0.0,0.0,0.0,0.0,1.0001,0.0612,0.9389,0.0,1.0001,0.0,1.0001,0.0,1.0001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0001,0.0,0.0,0.0,0.0,0.0,1.0001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0001,0.0,0.0,0.0,1.0001,0.0,1.0001,0.0,1.0001,1.0001,0.0,0.0,0.0,1.0001,0.0,0.0,0.0,0.9389,0.0612,0.0,0.0,1.0001,0.0,0.0,0.0,0.6565,0.0,0.0,0.3436,0.6565,0.0,0.0,0.3436,0.6565,0.0,0.0,0.3436,0.6279,0.0,0.0,0.3722,0.4949,0.0,0.0,0.5052,0.4949,0.0,0.0,0.5052,0.4445,0.0,0.0,0.5556,0.4445,0.0,0.0,0.5556,0.4445,0.0,0.0,0.5556,0.199,0.0,0.0,0.8011,0.199,0.0,0.0,0.8011,0.199,0.0,0.0,0.8011,0.199,0.0,0.0,0.8011,0.199,0.0,0.0,0.8011,0.199,0.0,0.0,0.8011,0.199,0.0,0.0,0.8011,0.199,0.0,0.0,0.8011,0.199,0.0,0.0,0.8011,0.199,0.0,0.0,0.8011,0.199,0.0,0.0,0.8011,0.199,0.0,0.0,0.8011,0.0,1.0001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.199,0.8011,0.0,0.0,0.0,0.0,0.0,0.0,0.199,0.4575,0.3436,0.0,1.0001,0.0,0.0,1.0001,0.0,1.0001,0.0,9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,2750004.0


Predict data shapes (750000, 840)


In [7]:
predictions.target_pred.value_counts()

0    749881
1       119
Name: target_pred, dtype: int64