In [3]:
import pandas as pd
s =pd.date_range("2020-01-06","2020-01-10",freq="10H").to_series()
features = {
    "dayofweek":s.dt.dayofweek.values,
    "quarter":s.dt.quarter.values
}
features

{'dayofweek': array([0, 0, 0, 1, 1, 2, 2, 2, 3, 3], dtype=int64),
 'quarter': array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=int64)}

In [5]:
import numpy as np
df = pd.DataFrame(
    np.random.rand(100,2),
    columns = [f"f_{i}" for i in range(1,3)]
)
df

Unnamed: 0,f_1,f_2
0,0.683987,0.745357
1,0.105413,0.617193
2,0.805041,0.487346
3,0.577839,0.237197
4,0.592090,0.473080
...,...,...
95,0.640502,0.347923
96,0.311399,0.736096
97,0.024502,0.841237
98,0.801801,0.805443


In [7]:
from sklearn.preprocessing import PolynomialFeatures
pf = PolynomialFeatures(
    degree=2,
    interaction_only=False,
    include_bias=False
)
poly_feats = pf.fit_transform(df)
num = poly_feats.shape[1]

In [9]:
df_transformed = pd.DataFrame(
    poly_feats,
    columns=[f"f_{i}" for i in range(1,num+1)]
)
df_transformed

Unnamed: 0,f_1,f_2,f_3,f_4,f_5
0,0.683987,0.745357,0.467838,0.509814,0.555556
1,0.105413,0.617193,0.011112,0.065060,0.380927
2,0.805041,0.487346,0.648091,0.392333,0.237506
3,0.577839,0.237197,0.333898,0.137062,0.056262
4,0.592090,0.473080,0.350570,0.280106,0.223804
...,...,...,...,...,...
95,0.640502,0.347923,0.410243,0.222845,0.121050
96,0.311399,0.736096,0.096969,0.229219,0.541837
97,0.024502,0.841237,0.000600,0.020612,0.707680
98,0.801801,0.805443,0.642884,0.645805,0.648739


In [12]:
df["f_bin_10"] = pd.cut(df["f_1"],bins=10,labels=False)

In [15]:
from sklearn.feature_selection import chi2,f_classif,f_regression,mutual_info_classif,mutual_info_regression,SelectKBest,SelectPercentile

In [16]:
class UnivariateFeatureSelection:
    def __init__(self,n_features,problem_type,scoring):
        if problem_type=="classification":
            valid_scoring = {
                "f_classif":f_classif,
                "chi2":chi2,
                "mutual_info_classif":mutual_info_classif
            }
        else:
            valid_scoring ={
                "f_regression":f_regression,
                "mutual_info_regression":mutual_info_regression
            }
        if scoring not in valid_scoring:
            raise Exception("Invalid scoring function")

        if isinstance(n_features,int):
            self.selection = SelectKBest(
                valid_scoring[scoring],
                k = n_features
            )
        elif isinstance(n_features,float):
            self.selection = SelectPercentile(
                valid_scoring[scoring],
                percentile=int(n_features*100)
            )
        else:
            raise Exception("Invalid type of feature")

    def fit(self,X,y):
        return self.selection.fit(X,y)
    
    def transform(self,X):
        return self.selection.transform(X)
    
    def fit_transform(self,X,y):
        return self.selection.fit_transform(X,y)