The FunctionTransformer takes as input a single function that will be applied to each sample in the data. This function can be any Python function that takes a single argument, such as a lambda function or a user-defined function. The function should retum the transformed sample.

In [1]:
import numpy as np
from sklearn.preprocessing import FunctionTransformer

# Create a dataset
x = np.array([[1,2], [3,4]])

# Define the transformation function
log_transform = FunctionTransformer(np.log1p)

# apply the transformation to the dataset
x_transformed = log_transform.transform(x)

# View the transform data
print(x_transformed)

[[0.69314718 1.09861229]
 [1.38629436 1.60943791]]


In [2]:
# 1. Custom Feature Engineering
import numpy as np
from sklearn.preprocessing import FunctionTransformer

# Create a dataset
x = np.array([[1,2], [3,4]])

# Define a custom feature engineering function
def holi(x):
    return np.hstack((x, x**2))

# Create a FunctionTransformer to apply the custom function
custom_transformer = FunctionTransformer(holi)

# apply the transformation to the dataset
x_transformed = custom_transformer.transform(x)

# View the transformed data
print(x_transformed)

[[ 1  2  1  4]
 [ 3  4  9 16]]


In [3]:
# 2. Custom Feature Engineering
import numpy as np
from sklearn.preprocessing import FunctionTransformer

# Create a dataset
x = np.array([[1,2], [3,4]])

# Define a custom feature engineering function
def my_scaling(x):
    return x / np.max(x)

# Create a FunctionTransformer to apply the custom function
custom_transformer = FunctionTransformer(my_scaling)

# apply the transformation to the dataset
x_transformed = custom_transformer.transform(x)

# View the transformed data
print(x_transformed)

[[0.25 0.5 ]
 [0.75 1.  ]]


In [4]:
# 3. Custom Feature Engineering
import numpy as np
from sklearn.preprocessing import FunctionTransformer

# Create a dataset
x = np.array([[1,2], [3, np.nan]])

# Define a custom feature engineering function
def my_cleaning(x):
    x[np.isnan(x)] = 0
    return x

# Create a FunctionTransformer to apply the custom function
custom_transformer = FunctionTransformer(my_cleaning)

# apply the transformation to the dataset
x_transformed = custom_transformer.transform(x)

# View the transformed data
print(x_transformed)

[[1. 2.]
 [3. 0.]]


In [5]:
import pandas as pd
import numpy as np

In [6]:
df = pd.read_csv("D:\\DataSet\\placement.csv")
df.head(3)

Unnamed: 0,cgpa,resume_score,placed
0,8.14,6.52,1
1,6.17,5.17,0
2,8.27,8.86,1


In [7]:
x = df.drop(columns = ["placed"])
y = df["placed"]

In [8]:
from sklearn.preprocessing import FunctionTransformer

log_transform = FunctionTransformer(np.log1p)

x_transformed = log_transform.transform(x)

x_transformed

Unnamed: 0,cgpa,resume_score
0,2.212660,2.017566
1,1.969906,1.819699
2,2.226783,2.288486
3,2.064328,2.112635
4,2.142416,2.116256
...,...,...
95,1.991976,1.998774
96,2.222459,2.170196
97,2.034706,2.172476
98,2.212660,1.891605


In [9]:
# 1. Custom Feature Engineering
import numpy as np
from sklearn.preprocessing import FunctionTransformer

# Create a dataset
df = pd.read_csv("D:\\DataSet\\covid_toy.csv")

# Define a custom feature engineering function
int_list = []
float_list = []
object_list = []
def separate(df):
    for i in df.select_dtypes(include = ["int64"]).columns:
        int_list.append(i)
    for j in df.select_dtypes(include = ["float64"]).columns:
        float_list.append(j)
    for k in df.select_dtypes(include = ["object"]).columns:
        object_list.append(k)

# Create a FunctionTransformer to apply the custom function
custom_transformer = FunctionTransformer(separate)

# apply the transformation to the dataset
x_transformed = custom_transformer.transform(df)

# View the transformed data
print("Integer data type columns name :",int_list)
print("Float data type columns name :",float_list)
print("object data type columns name :",object_list)

Integer data type columns name : ['age']
Float data type columns name : ['fever']
object data type columns name : ['gender', 'cough', 'city', 'has_covid']


In [10]:
x.dtypes

cgpa            float64
resume_score    float64
dtype: object

In [11]:
df.head(3)

Unnamed: 0,age,gender,fever,cough,city,has_covid
0,60,Male,103.0,Mild,Kolkata,No
1,27,Male,100.0,Mild,Delhi,Yes
2,42,Male,101.0,Mild,Delhi,No


In [12]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()

for i in object_list:
    df[i] = le.fit_transform(df[i])

df.head(3)

Unnamed: 0,age,gender,fever,cough,city,has_covid
0,60,1,103.0,0,2,0
1,27,1,100.0,0,1,1
2,42,1,101.0,0,1,0


In [13]:
missing_df = df.isnull().sum().reset_index(name="missing")
missing_df

Unnamed: 0,index,missing
0,age,0
1,gender,0
2,fever,10
3,cough,0
4,city,0
5,has_covid,0


In [14]:
missing_col_list = []
for column in df.columns:
    missing_count = df[column].isnull().sum()
    if missing_count>0:
        missing_col_list.append(column)

print(missing_col_list)

['fever']


In [15]:
for i in missing_col_list:
    if i in int_list:
        from sklearn.impute import SimpleImputer
        si = SimpleImputer(strategy="mean")
        df[i] = si.fit_transform(df[[i]])
    elif i in float_list:
        from sklearn.impute import SimpleImputer
        si = SimpleImputer(strategy="mean")
        df[i] = si.fit_transform(df[[i]])
    
df.isnull().sum()

age          0
gender       0
fever        0
cough        0
city         0
has_covid    0
dtype: int64