I publish this kernel only at the request of Luis Teyerin, thanks to whose hosted this interesting contest. 
Please do not judge strictly for the lack of detailed comments and the abundance of bikes in the code.

For those who are too lazy to explore the code, I will briefly describe the approach:

1. Feats: nothing remarkable, everything is about the same as many in this competition.
2. Model. 
    * I built two pretty good base models: LogisticRegression (0.437) and LGBMClassifier (0.434). 
    * Then I strengthened each of them with something like a bagging (FoldsEstemator in code), teaching on different subsamples and averaging the result. 
    *  The final step was to mix the two models obtained through averaging (0.446 on LB). The peculiarity of the last step was the weighting of the result in proportion by probabilities of the classes and the use of additional hyperparameter regulating the weights of the basic classifiers in the final decision.

I borrowed the idea of averaging classes from Ilya V. Shchurov: https://www.kaggle.com/ischurov/more-feature-eng-lgb-5-fold-early-stopping# and developed it a bit by trying different averaging options. In the same place, I got the idea to use some new feats (dispersions/min/max).

I hid the source code of some tools to not complicate the code.

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os
print(os.listdir("../input"))

# Any results you write to the current directory are saved as output.

In [None]:
from sklearn.base import clone, BaseEstimator, TransformerMixin
from sklearn.model_selection import StratifiedKFold
import numpy as np
from collections import Counter


class FoldsEstimator(BaseEstimator, TransformerMixin):

    def __init__(self, base_clf, cv=None, predict_mode='vote', proba_mode='mean'):
        self.base_clf = base_clf
        cv = cv if cv is not None else StratifiedKFold(n_splits=5, shuffle=True, random_state=25)
        self.cv = cv
        self.clfs = []
        self.predict_mode = predict_mode
        self.proba_mode = proba_mode

    def fit(self, X, y):
        if self.predict_mode == 'off':
            self.base_clf.fit(X, y)
        else:
            self.clfs = []
            for train, test in self.cv.split(X, y):
                clf = clone(self.base_clf)
                clf.fit(X[list(train), :], y.iloc[list(train)])
                self.clfs.append(clf)
        return self

    def predict(self, X, mode=None):  # vote, mean, proba_wmean, prob, off
        mode = mode if mode is not None else self.predict_mode
        if mode == 'vote':
            res = self.predict_vote(X)
        elif mode == 'mean':
            res = self.predict_mean(X)
        elif mode == 'prob':
            res = self.predict_by_proba(X)
        elif mode == 'proba_wmean' or mode == 'predict_weighted_mean':
            res = self.predict_weighted_mean(X)
        elif mode == 'off':
            res = self.predict_simple(X)
        return np.array(res)

    def predict_proba(self, X):
        probas = self._predict_probas(X)
#        print(probas.shape)
        if self.proba_mode == 'mean':
            return probas.mean(axis=0)
        elif self.proba_mode == 'max':
            return probas.max(axis=0)
        raise Exception(f'Unknown proba mode: {self.predict_mode}')

    def _predicts(self, X):
        predicts = []
        for clf in self.clfs:  # BaseEstimator
            predicts.append(clf.predict(X)[:, np.newaxis])
        return np.hstack(tuple(predicts))

    def _predict_probas(self, X):
        probas = []
        for clf in self.clfs:
            probas.append(clf.predict_proba(X))
        return np.array(probas)

    def predict_vote(self, X):
        predicts = self._predicts(X)
 #       print(predicts.shape)
        mean_probas = self.predict_proba(X)
 #       print(mean_probas.shape)
        res = []
        for i, predict in enumerate(zip(*predicts.T)):
            counter_ = Counter()
            counter_.update(predict)
            counter_ = sorted(list(counter_.items()), key=lambda x: -x[1])
            max_votes = counter_[0][1]
            alts = [c[0] for c in counter_ if c[1] == max_votes]
            if len(alts) == 1:
                prediction = alts[0]
            else:
                probas = mean_probas[i, :][[i_ - 1 for i_ in alts]]
                prediction = alts[np.argmax(probas)]
            res.append(prediction)
        return res

    def predict_mean(self, X):
        return np.round(self._predicts(X).mean(axis=1)).astype(int)

    def predict_by_proba(self, X):
        proba = self.predict_proba(X)
        return np.argmax(proba, axis=1) + 1

    def predict_weighted_mean(self, X):
        predicts = self._predicts(X)
        probas = self._predict_probas(X)
        weights = probas.max(axis=2).T
        print(X.shape, probas.shape, weights.shape, predicts.shape)
        # (2377, 45) (5, 2377, 4) (5, 2377) (2377, 5)
        res = np.round((predicts * weights).sum(axis=1) / weights.sum(axis=1)).astype('int')[:, np.newaxis]
        # print(X.shape, probas.shape, weights.shape, predicts.shape, probas.shape, res.shape)
        return res

    def predict_simple(self, X):
        return self.base_clf.predict(X)


class MetaEstimator(BaseEstimator, TransformerMixin):
    def __init__(self, estimators, estimators_weight='equal', predicts_weight='equal', mode='mean'):
        self.estimators = estimators
        self.estimators_weight = estimators_weight  # equal, array of weights
        self.predicts_weight = predicts_weight  # equal, proba
        self.mode = mode

    def fit(self, X, y):
        for est in self.estimators:
            est.fit(X, y)
        return self

    def get_weights(self, X):
        weights = self.get_predict_weights(X) * self.get_estimator_weights()
        return weights / weights.sum(axis=1)[:, np.newaxis]

    def get_estimator_weights(self):
        if self.estimators_weight == 'equal':
            return np.array([[1 / len(self.estimators)] * len(self.estimators)])
        return self.estimators_weight

    def get_predict_weights(self, X):
        if self.predicts_weight == 'equal':
            return np.array([[1 / len(self.estimators)] * len(self.estimators)] * X.shape[0])
        if self.predicts_weight == 'proba':
            return self.get_weights_by_prob(X)
        raise Exception(f"Unknown predict_weight method: {self.predicts_weight}")

    def get_weights_by_prob(self, X):
        probs = self._predict_probas(X).max(axis=2).T
        return probs / probs.sum(axis=1)[:, np.newaxis]

    def predict(self, X):
        res = None
        for est in self.estimators:
            predict = est.predict(X)[:, np.newaxis]
            res = predict if res is None else np.hstack([res, predict])
        weights = self.get_weights(X)
        return np.round((res * weights).sum(axis=1)).astype('int')

    def _predicts(self, X):
        predicts = []
        for clf in self.estimators:  # BaseEstimator
            predicts.append(list(clf.predict(X)))
        return np.array(tuple(predicts))

    def _predict_probas(self, X):
        probas = []
        for clf in self.estimators:
            probas.append(clf.predict_proba(X))
        return np.array(tuple(probas))


In [None]:
# -*- coding: utf-8 -*-
"""
Created on Fri Jul  6 08:16:44 2018

@author: Konstantin V. Grishanov
"""
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.base import TransformerMixin, BaseEstimator
from copy import deepcopy
from scipy.stats import norm
from datetime import datetime
from sklearn.model_selection import StratifiedKFold
from sklearn.base import clone
from collections import Counter



class XBaseTransformer(BaseEstimator, TransformerMixin):
    """
    Base class for Transformers
    """
    def __init__(
            self,
            none_value=None,
            none_value_strategy='const',
            transformer=None,
            select_columns=None,
    ):
        self.none_value = none_value
        self.none_value_strategy = none_value_strategy
        self.none_value_dict = {}

        self.columns = []
        self.transformers = {}
        self.transformer = transformer
        self.select_columns = select_columns

        self.debug = False
        self.debug_csv = False
        self.debug_name = 'new'


    def dprint(self, *argv):
        if self.debug:
            print(*argv)

    def fit(self, X, y=None):
        """
        Fitting transformer
        
        :param X: DataFrame of data to transform
        :param y: Series of target variable
        :returns: self
        """
        #print('Fit:', X.shape)
        self.dprint(datetime.now(), 'Start fit...' + str(self.__class__), X.shape)
        self.set_columns(X, y)
        for v in self.columns:
            x = self.get_one_var(X, v)
            self.fit_none_value(x, v)
            self.before_fit_var(x, v)
            self.fit_var(x, v)
            self.after_fit_var(x, v)
        self.dprint(datetime.now(), 'End fit ' + str(self.__class__))
        return self

    def transform(self, X, y=None):
        """
        Transform X by fitted parameters
        
        :param X: DataFrame of data to transform
        :param y: Series of target variable
        :returns: DataFrame
        """
        #print('Transform:', X.shape)
        self.dprint(datetime.now(), 'Start transform...' + str(self.__class__), X.shape)
        df = X.copy(deep=True)
        df = self.before_transform(df)
        for v in self.columns:
            self.before_transform_var(self.get_one_var(df, v), v)
            df[v] = self.transform_var(self.get_one_var(df, v), v)
            self.after_transform_var(self.get_one_var(df, v), v)
        self.dprint(datetime.now(), 'End transform' + str(self.__class__))
        return self.prepare_transform_result(df)

    def prepare_var(self, x, varname):
        """
        Prepare one var of Dataframe to transform or fitting
        By default called from fit_prepare_var and transform_prepare_var
        
        :param x: Series to transform
        :param varname: colname in DataFrame
        :returns: Series transformed x
        """
        none_value = self.get_none_value(varname)
        if none_value is None:
            return x
        return x.fillna(none_value)

    def fit_var(self, x, varname):
        """
        Fit one var
        
        :param x: Series
        :param varname: string
        """
        self.transformers[varname] = self.create_transformer(x, varname).fit(
            self.fit_prepare_var(x, varname)
        )
        
    def before_fit_var(self, x, varname):
        """
        """
        pass
    
    def after_fit_var(self, x, varname):
        """
        """
        pass

    def fit_prepare_var(self, x, varname=None):
        """
        Prepare one var for fit
        
        :param x: Series
        :param varname: string varname
        :returns: Series
        """
        # print('fit ' + varname)
        return self.prepare_var(x, varname)

    def transform_var(self, x, varname):
        # print(self.__class__, varname)
        return self.transformers[varname].transform(
            self.transform_prepare_var(x, varname)
        )
 
    def before_transform_var(self, x, varname):
        pass
    
    def after_transform_var(self, x, varname):
        pass

    def transform_prepare_var(self, x, varname=None):
        # print('transform')
        return self.prepare_var(x, varname)

    def create_transformer(self, x, varname):
        return deepcopy(self.transformer)

    def set_columns(self, X, y=None):
        self.columns = (
            list(X.columns) if self.select_columns is None
            else self.select_columns
        )

    def get_one_var(self, X, v):
        return X[v]


    def prepare_transform_result(self, X):
        cols = self.columns
        #cols.sort()
        if self.debug_csv:
            X[cols].to_csv(self.debug_filename('vars'))
        return X[cols]

    def before_transform(self, X):
        return X

    def calc_default_value(self, x, varname, strategy, default_value):
        if strategy == 'mode':
            return x.mode()[0]
        return default_value

    def fit_none_value(self, x, varname):
        self.none_value_dict[varname] = self.calc_default_value(
            x, varname, self.none_value_strategy, self.none_value
        )

    def get_none_value(self, varname):
        return self.none_value_dict[varname]

    def debug_filename(self, name):
        return str(self.__class__)[8:-2] + '-' + name + '-new.txt'





class XLabelEncoder(XBaseTransformer):

    def __init__(
            self,
            none_value='__NONE__',
            none_value_strategy='const',
            transformer=LabelEncoder(),
            select_columns=None,
            quantity_treshold=0,
            other_label='__OTHER__',
            other_label_strategy='const'
    ):
        super().__init__(
            none_value=none_value,
            none_value_strategy=none_value_strategy,
            transformer=transformer,
            select_columns=select_columns
        )

        self.quantity_treshold = quantity_treshold
        self.other_label = other_label
        self.other_label_strategy = other_label_strategy
        self.value_dicts = {}
        self.other_label_dict = {}

    def prepare_var(self, x, varname):
        return x.fillna(self.get_none_value(varname)).astype('str').apply(
            lambda x_:
            self.value_dicts[varname][x_] if x_ in self.value_dicts[varname]
            else self.get_other_value(varname)
        )

    def get_other_value(self, varname):
        return self.other_label_dict[varname]

    def init_value_dict(self, x, varname):
        counts = x.fillna(self.get_none_value(varname)).astype('str').value_counts()
        value_dict = {v:v for v in counts.index if counts[v] > self.quantity_treshold}
        self.init_other_label_dict(x, varname, value_dict, counts)
        self.value_dicts[varname] = value_dict

    def init_other_label_dict(self, x, varname, value_dict, counts):
        self.other_label_dict[varname] = str(
            self.calc_default_value(
                x, varname, 'mode', self.other_label
            ) if len(counts) == len(value_dict)
            else self.calc_default_value(
                x, varname, self.other_label_strategy, self.other_label
            )
        )

    def before_fit_var(self, x, varname):
        self.init_value_dict(x, varname)


class XLabelZcoder(XLabelEncoder):
    def __init__(
            self,
            none_value='__NONE__',
            none_value_strategy='const',
            select_columns=None,
            quantity_treshold=0,
            other_label='__OTHER__',
            other_label_strategy='const',
            class_label_varname='Target',
            alpha=0.05
    ):
        super().__init__(
            none_value=none_value,
            none_value_strategy=none_value_strategy,
            transformer=None,
            select_columns=select_columns,
            quantity_treshold=quantity_treshold,
            other_label=other_label,
            other_label_strategy=other_label_strategy
        )

        self.alpha=alpha
        self.class_label_varname=class_label_varname
        self.y = None
        self.z = None
        self.default_pleft = None

    def fit(self, X, y=None):
        self.z = norm.ppf(1 - self.alpha)
        self.y = X[self.class_label_varname]
        y_counts = self.y.value_counts()
        self.default_pleft = self.uleft(y_counts[-1], y_counts[1])
        return super().fit(X, y)

    def zleft(self, n0, n1):
        z = self.z
        p = n1 / (n0 + n1)
        return p - z * np.sqrt(p * (1 - p) / (n0 + n1))

    def uleft(self, n0, n1):
        z = self.z
        n = n0 + n1
        p = n1 / n
        return 1 / (1 + (z ** 2) / n) * (p + (z ** 2) / 2 / n - np.sqrt(p * (1 - p) / n + (z ** 2) / 4 / n ** 2))

    def fit_var(self, x, varname):
        """
        Fit one var

        :param x: Series
        :param varname: string
        """
        cross_tab = pd.crosstab(
            self.fit_prepare_var(x, varname),
            self.y
        )

        dict_p = {
            ind: self.uleft(cross_tab.loc[ind][-1], cross_tab.loc[ind][1]) for ind in cross_tab.index
        }

        value_dict = {
            ind: dict_p[self.value_dicts[varname][ind]] for ind in self.value_dicts[varname]
        }

        other_label = self.get_other_value(varname)
        other_value = dict_p[other_label] if other_label in dict_p else self.default_pleft
        value_dict[other_label] = other_value
        self.other_label_dict[varname] = other_value
        self.value_dicts[varname] = value_dict



    def transform_var(self, x, varname):
        return self.transform_prepare_var(x, varname)



class XScaler(XBaseTransformer):
    def __init__(
            self,
            none_value=0,
            none_value_strategy='const',
            transformer=MinMaxScaler(),
            select_columns=None,
            is_none_vars_suffix='_none',
            scale_none_value=False, #TODO: убить, связав с none_value_srategy
            return_is_none_vars=True #TODO: убить, связав с none_value_srategy
    ):
        super().__init__(
            none_value=none_value,
            none_value_strategy=none_value_strategy,
            transformer=transformer,
            select_columns=select_columns
        )

        self.return_is_none_vars = return_is_none_vars
        self.scale_none_value = scale_none_value

        self.is_none_vars_suffix = is_none_vars_suffix

        self.is_none_columns = []
        self.is_none_df = None


    def fit_prepare_var(self, x, varname):
        if self.scale_none_value:
            return x.fillna(self.get_none_value(varname))[:, np.newaxis]
        return x[x.isnull() == False][:, np.newaxis]

    def transform_prepare_var(self, x, varname):
        return x.fillna(self.get_none_value(varname))[:, np.newaxis]

    def prepare_transform_result(self, X):
        #self.is_none_columns.sort()
        #with open(str(self.__class__)[8:-2] + '-none.txt', 'a') as f:
        #    f.write(','.join(self.is_none_columns) + '\n')
        cols = self.columns + (self.is_none_columns if self.return_is_none_vars else [])
        #cols.sort()
        if self.debug_csv:
            X[cols].to_csv(self.debug_filename('vars'))
        return X[cols]

    def before_transform(self, X):
        for v_is_none in self.is_none_columns:
            v = v_is_none[:-len(self.is_none_vars_suffix)]
            X[v_is_none] = X[v].isnull()*1
        self.X_tr = X
        return X

    def set_columns(self, X, y=None):
        super().set_columns(X, y)
        self.is_none_columns = []

    def get_is_none_varname(self, varname):
        return varname + self.is_none_vars_suffix

    def transform_var(self, x, varname):

        x_tr = super().transform_var(x, varname).ravel()

        if self.get_is_none_varname(varname) in self.is_none_columns:
            x_is_none = x.isnull()*1
            if self.return_is_none_vars:
                if self.is_none_df is None:
                    self.is_none_df = pd.DataFrame()
                self.is_none_df[self.get_is_none_varname(varname)] = x_is_none

            if self.scale_none_value == False:
                x_tr = x_tr * (1 - x_is_none) + self.get_none_value(varname) * x_is_none

        return x_tr

    def calc_default_value(self, x, varname, strategy, default_value):
        if strategy == 'mean':
            return x.mean()
        elif strategy == 'median':
            return x.median()
        elif strategy == 'flag_var':
            return 0
        return super().calc_default_value(x, varname, strategy, default_value)

    #def before_fit_var(self, x, varname):
        #obs_count = len(x)
        #if x.count() < obs_count:
        #    is_nan_name = varname + self.is_none_vars_suffix
        #    self.is_none_columns.append(is_nan_name)

    def fit_none_value(self, x, varname):
        #if  self.#self.none_value_strategy == 'flag_var':
        obs_count = len(x)
        if x.count() < obs_count:
            is_nan_name = varname + self.is_none_vars_suffix
            self.is_none_columns.append(is_nan_name)
        super().fit_none_value(x, varname)

    def fit(self, X, y=None):
        if self.none_value_strategy in ['mean', 'median', 'mode']:
            self.scale_none_value = True
        elif self.none_value_strategy == 'flag_var':
            self.scale_none_value = False
            self.return_is_none_vars = True
        # print(self.return_is_none_vars)
        return super().fit(X, y)


class XVarSelector(BaseEstimator, TransformerMixin):
    def __init__(self, varnames=None, mode='names'):
        self.varnames = varnames
        self.mode = mode

    def fit(self, X, y=None):
        return self

    def _select_by_names(self, X, y=None):
        return X[self.varnames]

    def _select_by_colnum(self, X, y=None):
        return np.array(X)[:, self.varnames]

    def transform(self, X, y=None):
        if self.varnames is None:
            return X
        if self.mode == 'names':
            return  self._select_by_names(X, y)
        return self._select_by_colnum(X, y)


class XNominalPairs(XLabelEncoder):
    def __init__(
            self,
            select_columns,
            none_value='__NONE__',
            none_value_strategy='const',
            transformer=LabelEncoder(),
            quantity_treshold=0,
            other_label='__OTHER__',
            other_label_strategy='const'
    ):
        super().__init__(
            select_columns=select_columns,
            none_value=none_value,
            none_value_strategy=none_value_strategy,
            transformer=transformer,
            quantity_treshold=quantity_treshold,
            other_label=other_label,
            other_label_strategy=other_label_strategy
        )


    def get_one_var(self, X, varname):
        # print(varname, varname.split('_'), self.columns)
        var1, var2 = varname.split('_')
        none_value = self.none_value
        x = X[var1].fillna(self.none_value) + '_' + X[var2].fillna(self.none_value)
        if self.debug_csv:
            x.to_csv(self.debug_filename(varname))
        return x


class XEstimatorVar(BaseEstimator, TransformerMixin):
    def __init__(self, est):
        self.est = est
        self.est_model = None

    def fit(self, X, y):
        self.est_model = self.est.fit(X, y)
        return self

    def transform(self, X):
        return self.est_model.predict_proba(X)[:, 1][:, np.newaxis]


class XFilter(BaseEstimator, TransformerMixin):
    def __init__(self, condition=None):
        self.condition = condition

    def fit(self, X, y):
        return self

    def transform(self, X):
        return X[self.condition(X)]


class XLambda(BaseEstimator, TransformerMixin):
    def __init__(self, transforms=[], copy=True):
        self.transforms = transforms
        self.copy = copy

    def fit(self, X, y):
        return self

    def transform(self, X):
        if self.copy:
            X = X.copy()
        for lambda_, cols in self.transforms:
            X[cols] = X[cols].apply(lambda x: x.map(lambda_))
        return X


In [None]:
import re
import numpy as np
import pandas as pd


class FeatureGenerator:
    feature_counter = 0
    feature_prefix = 'new_feat_'

    def __init__(self, name=None, transformer=None, var_type='numeric', fillna=None):
        self.name = name or self._name()
        self.var_type = var_type
        self.fillna = fillna
        if transformer is not None:
            self.transformer = transformer

    def fit(self, X, y=None, X_test=None):
        return self

    def fit_generate(self, X, y=None, X_test=None):
        self.fit(X, y, X_test).generate(X, y)

    def generate(self, X, y=None):
        if self.check_column_exists(X):
            # raise self.FeatureGeneratorException('Column name exists!')
            print(f'Column {self.name} exists!')
        self._generate(X, y)
        self._fillna(X, y)

    def _fillna(self, X, y=None):
        if self.fillna is not None:
            X[self.name] = X[self.name].fillna(self.fillna)

    def _generate(self, X, y=None):
        X[self.name] = self.transformer(X)

    def check_column_exists(self, X):
        if type(self.name) == str:
            names = [self.name]
        else:
            names = self.name
        for name in names:
            if name in X.columns:
                return True
        return False

    def _name(self):
        name = self.feature_prefix + str(self.feature_counter)
        self.feature_counter += 1
        return name

    class FeatureGeneratorException(Exception):
        pass


class FeatureGeneratorList:
    def __init__(self, feature_generators):
        self.feature_generators = feature_generators

    def fit(self, X, y=None, X_test=None):
        for feat_generator in self.feature_generators:
            feat_generator.fit(X, y, X_test)
        return self

    def generate(self, X, y=None):
        for feat_generator in self.feature_generators:
            feat_generator.generate(X, y)

    def fit_generate(self, X, y=None, X_test=None):
        for feat_generator in self.feature_generators:  # type: FeatureGenerator
            feat_generator.fit_generate(X, y, X_test)
            if X_test is not None:
                feat_generator.generate(X_test)

    def new_feats(self, var_type=None):
        new_feats = []
        for feat_generator in self.feature_generators:
            if var_type is None or feat_generator.var_type == var_type:
                if type(feat_generator.name) == str:
                    new_feats.append(feat_generator.name)
                else:
                    new_feats = new_feats + feat_generator.name
        return new_feats


class NormByGroup(FeatureGenerator):
    def __init__(self, source_var, group_var, name=None, transformer=None, mode='norm', exclude_mean=True, test_x=None):
        self.source_var = source_var
        self.group_var = group_var
        self.mode = mode
        self.mean = {}
        self.std = {}
        self.exclude_mean = exclude_mean
        self.test_x = test_x
        super().__init__(name=name, transformer=transformer)

    def fit(self, X, y=None, test_X=None):
        X_ = X if test_X is None else X.append(test_X)
        self.mean = dict(X_.groupby([self.group_var])[self.source_var].mean())
        self.std = dict(X_.groupby([self.group_var])[self.source_var].std(1))
        return self

    def _lambda(self, v, group):
        if self.mode == 'index':
            return v / self.mean[group]
        return (v - self.mean[group] * self.exclude_mean) / self.std[group]

    def _generate(self, X, y=None):
        X[self.name] = [
            self._lambda(v, group) for v, group in zip(X[self.source_var], X[self.group_var])
        ]

    def _name(self):
        return self.source_var + f'_{self.mode}_by_' + self.group_var


class LatentFeats(FeatureGenerator):
    def __init__(self, n_feats, source_feats, name=None, transformer=None, test_x=None, mode='transform', var_type='numeric'):
        self.n_feats = n_feats
        self.mode = mode
        self.source_feats = source_feats
        self.index_var = 'idhogar'
        self.index_indicator = 'parentesco1'
        self.name_prefix = name
        self.test_X = test_x
        super().__init__(transformer=transformer(n_feats), var_type=var_type)

    def fit(self, X, y=None, test_X=None):
        X_ = X if test_X is None else X.append(test_X)
        self.transformer.fit(X_[self.source_feats][X_[self.index_indicator] == 1].fillna(0))
        return self

    def _generate(self, X, y=None):
        if self.mode == 'transform':
            values = self.transformer.transform(X[X[self.index_indicator] == 1][self.source_feats].fillna(0))
            for i, varname in enumerate(self.name):
                X[varname] = X[self.index_var].map({
                    index: value for index, value in zip(X[X[self.index_indicator] == 1][self.index_var], values[:, i])
                })
        else:
            values = self.transformer.predict(
                X[X[self.index_indicator] == 1][self.source_feats].fillna(0)
            )
            X[self.name_prefix] = X[self.index_var].map({
                index: value for index, value in zip(X[X[self.index_indicator] == 1][self.index_var], values)
            })

    def _name(self):
        if self.mode == 'transform':
            return [f'{self.name_prefix}_{i}' for i in range(0, self.n_feats)]
        return self.name_prefix


class OneHotDecoder(FeatureGenerator):
    def __init__(self, name=None, transformer=None, columns=None, regexp=None):
        super().__init__(name=name, transformer=transformer, var_type='nominal')
        self.columns = columns
        self.regexp = regexp
        self.codes = None

    def fit(self, X, y=None, test_X=None):
        if self.columns is None:
            self.columns = [c for c in X.columns if re.match(self.regexp, c)]
        self.codes = list(range(1, len(self.columns) + 1))
        return self

    def _generate(self, X, y=None):
        X[self.name] = [int(np.dot(self.codes, v)) for v in X[self.columns].values]


In [None]:
from sklearn.exceptions import DataConversionWarning, UndefinedMetricWarning
import warnings
warnings.filterwarnings(action='ignore', category=DataConversionWarning)
warnings.filterwarnings(action='ignore', category=FutureWarning)
warnings.filterwarnings(action='ignore', category=UndefinedMetricWarning)
warnings.filterwarnings(action='ignore', category=DeprecationWarning)

In [None]:
from sklearn.cluster import KMeans
from lightgbm import LGBMClassifier
from sklearn.model_selection import cross_val_score, StratifiedKFold, GridSearchCV
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA
from statsmodels.stats.weightstats import zconfint
from sklearn.linear_model import LogisticRegression

### Reading data and preparing feat generators

In [None]:
df = pd.read_csv('../input/train.csv')
hh_head = df.parentesco1 == 1

df[['meaneduc', 'SQBmeaned', 'v18q1', 'rez_esc']] = df[['meaneduc', 'SQBmeaned', 'v18q1', 'rez_esc']].fillna(0)
material_feats = [
    c for c in df.columns 
    if re.match(r'^(?:pared|piso|techo|abasta|sanitario|energcocinar|elimbasu|epared|etecho).*$', c)
] + ['public', 'planpri', 'noelec', 'coopele']

education_by_age_mean = df.groupby(df.age.map(lambda x: x if x < 65 else 65))['escolari'].median()
education_by_age_mean = {i:education_by_age_mean[i if i < 65 else 65] for i in range(0, df.age.max()+1)}

feature_generators = FeatureGeneratorList([
# One-hot decode:
    OneHotDecoder(name='region', regexp='^lugar\d+$'),
    OneHotDecoder(name='outside_wall', regexp='^pared.*$'),
    OneHotDecoder(name='floor', regexp='^piso.*$'),
    OneHotDecoder(name='roof', regexp='^techo.*$'),
    OneHotDecoder(name='water_provision', regexp='^abastagua.*$'),
    OneHotDecoder(name='electricity', columns=['public', 'planpri', 'noelec', 'coopele']),
    OneHotDecoder(name='toilet', regexp='^sanitario\d+$'),
    OneHotDecoder(name='cook_energy', regexp='^energcocinar\d+$'),
    OneHotDecoder(name='rubbish', regexp='^elimbasu\d+$'),
    OneHotDecoder(name='wall_state', regexp='^epared\d+$'),
    OneHotDecoder(name='roof_state', regexp='^etecho\d+$'),
    OneHotDecoder(name='family_state', regexp='^estadocivil\d+$'),
    OneHotDecoder(name='family_head', regexp='^parentesco\d+$'),
    OneHotDecoder(name='education', regexp='^instlevel\d+$'),
    OneHotDecoder(name='property_state', regexp='^tipovivi\d+$'),
# Rent feats:   
    FeatureGenerator(
        name='rent_by_room', 
        transformer=lambda df: df.v2a1/df.rooms
    ),
    FeatureGenerator(
        name='rent_by_18',
         transformer=lambda df: df.idhogar.map(
            dict(df.groupby('idhogar')['v2a1'].mean()/df[df.age>=18].groupby('idhogar')['idhogar'].count())
        ),
    ),       
    FeatureGenerator(
        name='dis_any',
        transformer=lambda df: df.idhogar.map(dict(df.groupby('idhogar')['dis'].any()*1)),
        var_type='binary'
    ),
    FeatureGenerator(
        name='dis_sum',
        transformer=lambda df: df.idhogar.map(dict(df.groupby('idhogar')['dis'].sum())),
    ),
    FeatureGenerator(
        name='age_mean',
        transformer=lambda df: df.idhogar.map(dict(df.groupby('idhogar')['age'].mean())),
    ),
    FeatureGenerator(
        name='age_std', 
        transformer=lambda df: df.idhogar.map(
            dict(df.groupby('idhogar')['age'].std(1))
        )
    ),
    FeatureGenerator(
        name='age_median',
        transformer=lambda df: df.idhogar.map(dict(df.groupby('idhogar')['age'].median())),
    ),
    FeatureGenerator(
        name='mobilification',
        transformer=lambda df: df.idhogar.map(
            dict(df.groupby('idhogar')['qmobilephone'].mean() / df.groupby('idhogar')['tamhog'].mean())
        ),
    ),
     FeatureGenerator(
        name='mobilification_10',
        transformer=lambda df: df.idhogar.map(
            dict(df.groupby('idhogar')['qmobilephone'].mean() / df[df.age > 10].groupby('idhogar')['idhogar'].count())
        ),
    ),
    FeatureGenerator(
        name='mobilification_18',
        transformer=lambda df: df.idhogar.map(
            dict(df.groupby('idhogar')['qmobilephone'].mean() / df[df.age > 18].groupby('idhogar')['idhogar'].count())
        ),
    ),
    FeatureGenerator(
        name='mobilification_adult',
        transformer=lambda df: df.idhogar.map(
            dict(
                df.groupby('idhogar')['qmobilephone'].mean() / df[
                    (df.age >= 14) & (df.age<66)
                ].groupby('idhogar')['idhogar'].count())
        ),
    ),
    FeatureGenerator(
        name='bedrooms_share',
        transformer=lambda df: df.idhogar.map(
            dict(df.groupby('idhogar')['bedrooms'].mean() / df.groupby('idhogar')['rooms'].mean())
        ),
    ),
    LatentFeats(5, material_feats, 'material_cluster', KMeans, mode='predict'),
    FeatureGenerator(
        name='educ_balanced', 
        transformer=lambda df: df.escolari - df.age.map(education_by_age_mean)
    ),
    FeatureGenerator(
        name='educ_balanced_std', 
        transformer=lambda df: df.idhogar.map(
            dict(df.groupby('idhogar')['educ_balanced'].std(1))
        )
    ),
    FeatureGenerator(
        name='max_educ_years_18',
        transformer=lambda df: df.idhogar.map(
            dict(df[df.age>=18].groupby('idhogar')['escolari'].max())
        ),
    ),
    FeatureGenerator(
        name='min_educ_years',
         transformer=lambda df: df.idhogar.map(
            dict(df.groupby('idhogar')['escolari'].min())
        ),
    ),
    FeatureGenerator(
        name='educ_balanced_max_18',
         transformer=lambda df: df.idhogar.map(
            dict(df[df.age>=18].groupby('idhogar')['educ_balanced'].max())
        ),
    ),    
    FeatureGenerator(
        name='educ_balanced_mean',
         transformer=lambda df: df.idhogar.map(
            dict(df.groupby('idhogar')['educ_balanced'].mean())
        ),
    ),
    FeatureGenerator(
        name='educ_balanced_25_45',
         transformer=lambda df: df.idhogar.map(
            dict(df[(df.age>=25) & (df.age<=45)].groupby('idhogar')['educ_balanced'].mean())
        ),
        fillna=0
    ),
    FeatureGenerator(
        name='educ_balanced_46_',
         transformer=lambda df: df.idhogar.map(
            dict(df[(df.age>=46)].groupby('idhogar')['educ_balanced'].mean())
        ),
        fillna=0
    ),
    FeatureGenerator(
        name='educ_balanced_progress1',
         transformer=lambda df: df.educ_balanced_25_45 - df.educ_balanced_46_,
        fillna=0
    ),
    FeatureGenerator(
        name='educ_balanced_10_30',
         transformer=lambda df: df.idhogar.map(
            dict(df[(df.age>=10) & (df.age<=30)].groupby('idhogar')['educ_balanced'].mean())
        ),
        fillna=0
    ),
    FeatureGenerator(
        name='educ_balanced_31_',
         transformer=lambda df: df.idhogar.map(
            dict(df[(df.age>=31)].groupby('idhogar')['educ_balanced'].mean())
        ),
        fillna=0
    ),
    FeatureGenerator(
        name='educ_balanced_progress2',
         transformer=lambda df: df.educ_balanced_10_30 - df.educ_balanced_31_,
        fillna=0
    ),
    LatentFeats(10, material_feats, 'material_pca', PCA),

    # means / indexes by means by region groups:
    NormByGroup('v2a1', 'region', name='rent_norm_by_region'),
    NormByGroup('rent_by_room', 'region'),
    NormByGroup('v2a1', 'region', name='rent_index_by_region', mode='index'),
    NormByGroup('rent_by_room', 'region', mode='index'),
    NormByGroup('meaneduc', 'region'),
    NormByGroup('meaneduc', 'region', mode='index'),
    NormByGroup('educ_balanced', 'region'),
    NormByGroup('educ_balanced', 'region', mode='index'),
])

age_groups = [(0, 10), (11, 18), (19, 65), (66, 200)]
def age_int_transformer(int_, sex=None):
    _age = int_[0]
    age_ = int_[1]
    sex_ = sex
    def func_(df):
        age = df.age * (1 if sex_ is None else 2*df[sex_] - 1)
        age.name='age'
        return df.idhogar.map(
            dict(df[['idhogar']].join(
                age.apply(lambda x: 1 if _age <= x <= age_ else 0)
            ).groupby('idhogar').age.sum()/df.groupby('idhogar').age.count())
        )
    return func_

feature_generators.feature_generators.extend([FeatureGenerator(
    name=f'age_{gr[0]}_{gr[1]}_share',
    transformer = age_int_transformer(gr)
) for gr in age_groups])


feature_generators.feature_generators.extend([FeatureGenerator(
    name=f'male_{gr[0]}_{gr[1]}_share',
    transformer = age_int_transformer(gr, 'male')
) for gr in age_groups])

feature_generators.feature_generators.extend([FeatureGenerator(
    name=f'female_{gr[0]}_{gr[1]}_share',
    transformer = age_int_transformer(gr, 'female')
) for gr in age_groups])


# feature_generators.fit_generate(df, df.Target)

### Feats, selected for base models

In [None]:
# LGBMClassifier feats
gb_num_vars = ['age_mean', 'educ_balanced_mean', 'educ_balanced_index_by_region', 'age_std', 'age', 'educ_balanced_std', 'age_median', 'mobilification', 'meaneduc', 'educ_balanced_progress2', 'educ_balanced_norm_by_region', 'educ_balanced_progress1', 'educ_balanced_max_18', 'overcrowding', 'male_19_65_share', 'edjefe', 'max_educ_years_18', 'age_19_65_share', 'educ_balanced_25_45', 'rooms', 'v2a1', 'educ_balanced_10_30', 'bedrooms_share', 'min_educ_years', 'dependency', 'r4t2', 'rent_by_room', 'edjefa', 'mobilification_10', 'female_19_65_share', 'educ_balanced_31_', 'r4t1', 'educ_balanced_46_', 'r4h2', 'age_11_18_share', 'bedrooms', 'rent_by_room_norm_by_region', 'age_66_200_share', 'educ_balanced', 'female_66_200_share', 'rent_by_18', 'escolari', 'rent_norm_by_region', 'age_0_10_share', 'r4h3', 'tamviv', 'hogar_mayor', 'rent_by_room_index_by_region', 'female_0_10_share', 'male_11_18_share', 'r4m3', 'dis_sum']
gb_cat_vars = ['region', 'roof_state', 'v18q', 'outside_wall', 'family_state', 'cook_energy', 'dis', 'floor', 'education', 'wall_state', 'property_state', 'male', 'toilet', 'area1', 'rubbish']

# LogisticRegression feats
lr_num_vars =  ['v18q1', 'r4h2', 'r4t2', 'tamviv', 'hogar_nin', 'hogar_adul', 'hogar_mayor', 'dependency', 'edjefa', 'meaneduc', 'qmobilephone', 'SQBescolari', 'SQBhogar_total', 'SQBhogar_nin', 'SQBdependency', 'SQBmeaned', 'age_mean', 'mobilification_adult', 'educ_balanced_mean', 'rent_by_room_norm_by_region', 'rent_by_room_index_by_region', 'meaneduc_index_by_region', 'male_0_10_share', 'male_19_65_share', 'male_66_200_share', 'female_66_200_share']
lr_bin_vars =  ['v14a', 'refrig', 'v18q', 'eviv1', 'eviv3', 'dis', 'estadocivil2', 'estadocivil4', 'estadocivil6', 'instlevel3', 'instlevel8', 'tipovivi2', 'tipovivi3', 'computer', 'television', 'lugar1', 'lugar3', 'lugar6', 'area1']
lr_log_vars =  ['v2a1', 'rent_by_room_index_by_region', 'SQBedjefe', 'rent_by_room', 'SQBhogar_total', 'SQBmeaned', 'rent_index_by_region', 'age']

### Base models pipelines

In [None]:
lr_pipeline = Pipeline(steps=[
    ('clean', XLambda(transforms=[
        (
            lambda x: 0 if x=='no' else 1 if x=='yes' else float(x),
            ['edjefe', 'edjefa', 'dependency']
        ), (
            lambda x: np.log(1 + x),
            lr_log_vars
        )
    ])),
    ('feats', FeatureUnion(transformer_list=[
        ('scaled', XScaler(select_columns=lr_num_vars, transformer=StandardScaler(), return_is_none_vars=False)),
        ('binary', XVarSelector(lr_bin_vars))
    ])),
    # We use some sort of bagging for classifier improvements.
    ('clf', FoldsEstimator(
        LogisticRegression(C=0.1, penalty='l2', class_weight='balanced', random_state=25),
        predict_mode='mean'
    ))
])

gb_pipeline = Pipeline(steps=[
    ('clean', XLambda(transforms=[
        (
            lambda x: 0 if x=='no' else 1 if x=='yes' else float(x),
            ['edjefe', 'edjefa', 'dependency']
        ), (
            lambda x: np.log(1 + x),
            []
        )
    ])),
    ('feats', FeatureUnion(transformer_list=[
        ('scaled', XScaler(
            select_columns = gb_num_vars, 
            transformer=MinMaxScaler(), 
            none_value = -100, return_is_none_vars=False,
        )),
        ('nominal', XVarSelector(gb_cat_vars))
    ])),

    # We use some sort of bagging for classifier improvements.
    ('clf', FoldsEstimator(
        LGBMClassifier(learning_rate=0.05,                      
                        n_estimators=200,
                        num_leaves=20,
                        max_depth=15,
                        class_weight='balanced',
                        objective='multinominal',
                        boosting='dart'),
        cv=StratifiedKFold(
            n_splits=10,
            shuffle=True,
            random_state=39
        ), predict_mode='mean'
    ))
])

In [None]:
from sklearn.base import BaseEstimator, TransformerMixin, clone
class XEstimatorVar(BaseEstimator, TransformerMixin):
    def __init__(self, est):
        self.est = est
        self.est_model = None

    def fit(self, X, y):
        self.est_model = self.est.fit(X, y)
        return self

    def transform(self, X):
        res = self.est_model.predict(X)
        return np.array(res)[:, np.newaxis]

In [None]:
class WeightedMeanEstimator(BaseEstimator, TransformerMixin):
    def __init__(self, w=[0.5, 0.5]):
        self.w = w
    
    def fit(self, X, y):
        return self
    
    def predict(self, X):
        return [int(r) for r in np.round((X[:, 0] * self.w[0] + X[:, 1] * self.w[1]), 0)]

### Final mix of classifiers

In [None]:
sum_pipeline= MetaEstimator(
    estimators=[
        lr_pipeline,
        gb_pipeline
    ], 
    estimators_weight=np.array([0.5, 0.5]),
    predicts_weight='proba'
)

In [None]:
cv = GridSearchCV(
    sum_pipeline, 
    param_grid={'estimators_weight': [[w, 1-w] for w in np.linspace(0.2, 0.8, 7)]}, 
    cv=StratifiedKFold(
        n_splits=5,
        shuffle=True,
        random_state=25
    ),
    scoring='f1_macro'    
)

if False:
    df_test = pd.read_csv('../input/test.csv')
    feature_generators.fit_generate(df, df.Target, X_test=df_test)
    cv.fit(df[hh_head], df.Target[hh_head])
    print(cv.best_score_, cv.best_params_)
    print()
    print([cv.cv_results_[f'split{i}_test_score'][cv.best_index_] for i in range(5)])
    print(zconfint([cv.cv_results_[f'split{i}_test_score'][cv.best_index_] for i in range(5)]))

In [None]:
df_test = pd.read_csv('../input/test.csv')
feature_generators.fit_generate(df, df.Target, X_test=df_test) # generate new feats on train dataset
sum_pipeline.fit(df[hh_head], df.Target[hh_head])

In [None]:
feature_generators.generate(df_test, None) # generate new feats on test dataset
res = pd.DataFrame(df_test.Id)
res['Target'] = sum_pipeline.predict(df_test)
res.to_csv('submission.csv', index=False)

### Creating submissions
One of submissions have #6 score on LB 0.446

In [None]:
for w in np.linspace(0.2, 0.8, 7):
    sum_pipeline.estimators_weight = [1-w, w]
    res['Target'] = sum_pipeline.predict(df_test)
    rounded_w = np.round(w, 2)
    res.to_csv(f'submission-{rounded_w}.csv', index=False)