In [None]:
class Col_Extractor(TransformerMixin, BaseEstimator):
    ''' returns a DF with the columns passed'''
    def __init__(self, cols): # declarar cols sempre numa lista 
        self.cols = cols
    
    def transform(self, X, **transform_params):
        assert isinstance(X, pd.DataFrame)

        try:
            return X[self.cols]
        except KeyError:
#             print(self.cols)
#             print(X.columns)
            cols_error = list(set(self.cols) - set(X.columns))
            raise KeyError("The DataFrame does not include the columns: %s" % cols_error)
                                            
    def fit(self, X, y=None, **fit_params):
        return self

In [None]:
class ModelTransformer(TransformerMixin):

    def __init__(self, model):
        self.model = model

    def fit(self, *args, **kwargs):
        self.model.fit(*args, **kwargs)
        return self

    def transform(self, X, **transform_params):
        return pd.DataFrame(self.model.predict(X))

In [None]:
# X-train as DF
# 

class Preprocessor(TransformerMixin, BaseEstimator):

    def fit(self, X, y=None, **fit_params):
        return self
    
    # MADE to work with DATAFRAME
    def transform(self, X, **transform_params):

        text = X.applymap(lambda x: re.sub('<[^>]*>', '', x) )     # applying elementwise to a DF
        emoticons = text.applymap(lambda x: re.findall('(?::|;|=)(?:-)?(?:\)|\(|D|P)', x))
        result = text.applymap(lambda x: re.sub('[\W]+', ' ', str(x).lower()) + \
            ' '.join(emoticons).replace('-', ''))
        return result
        

class To_array(TransformerMixin, BaseEstimator):
    '''returns an array out of a DF'''
    
    def fit(self, X, y=None, **fit_params):
        return self
    
    def transform(self, X, **transform_params):
        return X.values.ravel()
    

# Feature Engineering

In [None]:
class num_stop_words(TransformerMixin, BaseEstimator):
    def transform(self, X, **transform_params):
        return X.applymap(lambda x: len([w for w in str(x).lower().split() if w in stop_words] ))
      

    def fit(self, X, y=None, **fit_params):
        return self

In [None]:
class mean_word_len(TransformerMixin, BaseEstimator):
    def transform(self, X, **transform_params):
        return X.applymap(lambda x: np.mean([len(w) for w in x.lower().split()]))   
                                            
    def fit(self, X, y=None, **fit_params):
        return self

In [None]:
class num_words_title(TransformerMixin, BaseEstimator):
    '''inclui os espacos entre as palavras'''
    
    def transform(self, X, **transform_params):
#         print('end word title')
        return X.applymap(lambda x: len([w for w in x.split() if w.istitle()]))

    def fit(self, X, y=None, **fit_params):
        return self

In [None]:
class num_uni_words(TransformerMixin, BaseEstimator):
    def transform(self, X, **transform_params):
        return X.applymap(lambda x: len(set(x.split())))

    def fit(self, X, y=None, **fit_params):
        return self