In [1]:
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

In [2]:
import sklearn

sklearn.set_config(display="text")

from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.utils.validation import check_X_y, check_is_fitted

## Interactions

In [7]:
class interact_features(BaseEstimator, TransformerMixin):
    def __init__(self, interaction_only = False, include_intercept = False):
        self.interaction_only = interaction_only
        self.include_intercept = include_intercept
  
    def fit(self, X, y=None):
        # https://github.com/scikit-learn/scikit-learn/blob/main/sklearn/base.py#L495
        self._validate_data(X=X, reset=True, ensure_min_features=2)
        self._check_feature_names(X=X, reset=True)

        return self
  
    def transform(self, X, y=None):
        check_is_fitted(self, "n_features_in_")
        
        self._validate_data(X=X, reset=False)
        self._check_feature_names(X=X, reset=False)
        
        X = np.array(X)
        
        new_cols = []
        for i in range(self.n_features_in_-1):
          for j in range(1, self.n_features_in_):
            new_cols.append( X[:,i] * X[:,j] )
        
        new_X = np.column_stack(new_cols)
        
        if not self.interaction_only:
          new_X = np.column_stack([X, new_X])
        
        if self.include_intercept:
          new_X = np.column_stack([np.ones((new_X.shape[0],1)), new_X])
        
        
        return new_X

    def get_feature_names_out(self):
        check_is_fitted(self, "n_features_in_")
        
        if not hasattr(self, "feature_names_in_"):
          feat_names = ["x"+str(i) for i in range(self.n_features_in_)]
        else:
          feat_names = list(self.feature_names_in_)
        
        new_feat_names = []
        for i in range(self.n_features_in_-1):
          for j in range(1, self.n_features_in_):
            new_feat_names.append( feat_names[i] + " * " + feat_names[j] )
        
        if not self.interaction_only:
          new_feat_names = feat_names + new_feat_names
          
        if self.include_intercept:
          new_feat_names = ["1"] + new_feat_names
        
        return new_feat_names

In [19]:
X = pd.DataFrame({"abc": range(1,6), "xyz": range(5, 0, -1)})
Y = pd.DataFrame({"x1": range(1,6)})
Z = np.array(X)

## Example 1

In [20]:
X

Unnamed: 0,abc,xyz
0,1,5
1,2,4
2,3,3
3,4,2
4,5,1


In [27]:
it = interact_features(include_intercept=True).fit(X)

In [28]:
it.transform(X)

array([[1., 1., 5., 5.],
       [1., 2., 4., 8.],
       [1., 3., 3., 9.],
       [1., 4., 2., 8.],
       [1., 5., 1., 5.]])

In [29]:
it.get_feature_names_out()

['1', 'abc', 'xyz', 'abc * xyz']

## Example 2

In [17]:
it = interact_features()

In [18]:
it.get_feature_names_out()

NotFittedError: This interact_features instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator.

## Example 3

In [24]:
it = interact_features().fit(Z)

In [25]:
it.transform(Z)

array([[1, 5, 5],
       [2, 4, 8],
       [3, 3, 9],
       [4, 2, 8],
       [5, 1, 5]])

In [26]:
it.get_feature_names_out()

['x0', 'x1', 'x0 * x1']

In [30]:
?sklearn.base.BaseEstimator._validate_data

[0;31mSignature:[0m
[0msklearn[0m[0;34m.[0m[0mbase[0m[0;34m.[0m[0mBaseEstimator[0m[0;34m.[0m[0m_validate_data[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mself[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mX[0m[0;34m=[0m[0;34m'no_validation'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0my[0m[0;34m=[0m[0;34m'no_validation'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mreset[0m[0;34m=[0m[0;32mTrue[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mvalidate_separately[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0;34m**[0m[0mcheck_params[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Validate input data and set or check the `n_features_in_` attribute.

Parameters
----------
X : {array-like, sparse matrix, dataframe} of shape                 (n_samples, n_features), default='no validation'
    The input samples.
    If `'no_validation'`, no validation is performed on `X`. This is
    use