# Заполнение пропущенных данных, с помощью обученной модели

In [None]:
X = df[['age','gender','bmi']].copy()
X.gender = X.gender.replace({'Male':0,'Female':1,'Other':-1}).astype(np.uint8)

Missing = X[X.bmi.isna()]
X = X[~X.bmi.isna()]
Y = X.pop('bmi')

DT_bmi_pipe = Pipeline( steps=[ 
                               ('scale',StandardScaler()),
                               ('lr',DecisionTreeRegressor(random_state=42))
                              ])
DT_bmi_pipe.fit(X,Y)
predicted_bmi = pd.Series(DT_bmi_pipe.predict(Missing[['age','gender']]),index=Missing.index)

df.loc[Missing.index,'bmi'] = predicted_bmi

### pipe version

In [None]:
class FillNa(BaseEstimator,TransformerMixin):   
    """
    Класс заменяющий пропущенные значения, выбранных столбцов, при помощи обученной модели DecisionTreeRegressor
    """
    def __init__(self, columns, fill_type):
        self.columns = columns
        self.fill_type = fill_type
        self.pipes = {}
    
    
    def fit(self, df, y=None):
        if self.fill_type != 'model':
            return self
        
        for fillcol in self.columns:
            X = df.copy()
            na = X[fillcol]
            X = X.drop(self.columns, axis=1)
            X[fillcol] = na

            if na.isna().sum() != 0:
                X = X[~X[fillcol].isna()]
                Y = X.pop(fillcol)
                pipe = Pipeline( steps=[
                    ('scale',StandardScaler()),
                    ('lr',DecisionTreeRegressor(random_state=42))
                ])
                pipe.fit(X,Y)
                self.pipes[fillcol] = pipe
        return self
    
    
    def transform(self, df):
        if self.fill_type == 'model':
            for fillcol in self.pipes.keys():
                X = df.copy()
                na = X[fillcol]
                X = X.drop(self.columns, axis=1)
                full_cols = X.columns
                X[fillcol] = na
                
                Missing = X[X[fillcol].isna()]
                pred = pd.Series(
                    self.pipes[fillcol].predict(Missing[full_cols]),index=Missing.index)
                df.loc[Missing.index, fillcol] = pred
        
        if self.fill_type == 'mean':
            for col in self.columns:
                df[col] = df[col].fill_na(df[col].mean())
                
        if self.fill_type == 'median':
            for col in self.columns:
                df[col] = df[col].fill_na(df[col].median())
            
            
        if self.fill_type == 'drop':
            for col in self.columns:
                df[col] = df[col].drop_na()
            
            
        return df

# pipe encoder

In [None]:
class ColEncoder(BaseEstimator,TransformerMixin):
    """
    Класс изменяющий значение, выбранных столбцов, при помощи LabelEncoder
    """
    def init(self, columns):
        self.columns = columns
        self.encoders = {}


    def fit(self, df, y=None):
        for col in self.columns:
            le = LabelEncoder()
            le.fit(df[col])
            self.encoders[col] = le
        return self


    def transform(self, df):
        for col in self.columns:
            df[col] = self.encoders[col].transform(df[col])
        return df 