## Function Transformer

It is a tool in scikit-learn, a popular Python Library for machine learning, that allows you to apply a specified function to the input data. The Function Transformer can be useful for performing custom transformations of input data in a machine learning pipeline.

In [1]:
from sklearn.preprocessing import FunctionTransformer
import numpy as np
import pandas as pd

In [3]:
## Creating an Array
x = np.array([[1,2], [3,4]])

## Building logic
log_transform = FunctionTransformer(np.log1p)

## Applying the Function Transform to the Dataset
x_transformed = log_transform.transform(x)

print(x_transformed)

[[0.69314718 1.09861229]
 [1.38629436 1.60943791]]


In [None]:
## Similarly, Creating Array
x = np.array([[1,2], [3,4]])

## Defining a custom feature engineering Function 
def squ(x):
    return np.hstack((x, x**2))

## Defining Function Transformer to apply the custom function
sq_transform = FunctionTransformer(squ)

## Applying Function Transformer
x_new = sq_transform.fit_transform(x)
x_new

array([[ 1,  2,  1,  4],
       [ 3,  4,  9, 16]])

In [5]:
## Creating Datset
x = np.array([[1,2], [3,4]])

# define a custom scaling function
def my_scaling(x):
    return x / np.max(x)

# create a Function Transformer to apply the custom function
custom_transformer = FunctionTransformer(my_scaling)

# apply the transformer to the input data
x_transformed = custom_transformer.transform(x)

# View the transformed data
print(x_transformed)

[[0.25 0.5 ]
 [0.75 1.  ]]


In [6]:
# Data Cleaning

# Creating a array with a missing value
x = np.array([[1,2], [3, np.nan]])

# define a custom cleaning function
def my_cleaning(x):
    x[np.isnan(x)] = 0
    return x

# create a Function Transformer to apply the custom function
custom_transformer = FunctionTransformer(my_cleaning)

# apply the transformer to the input data
x_transformed = custom_transformer.transform(x)

# view the transformed data
print(x_transformed)

[[1. 2.]
 [3. 0.]]


### Applying on a Dataset

In [10]:
df1 = pd.read_csv(r"C:\Users\singh\OneDrive\Desktop\Python\Data\placement.csv")
df1.head()

Unnamed: 0,cgpa,resume_score,placed
0,8.14,6.52,1
1,6.17,5.17,0
2,8.27,8.86,1
3,6.88,7.27,1
4,7.52,7.3,1


In [11]:
x = df1.drop(columns= ['placed'])
y = df1['placed']

In [12]:
log_transform = FunctionTransformer(np.log1p)
x_new = log_transform.transform(x)
x_new

Unnamed: 0,cgpa,resume_score
0,2.212660,2.017566
1,1.969906,1.819699
2,2.226783,2.288486
3,2.064328,2.112635
4,2.142416,2.116256
...,...,...
95,1.991976,1.998774
96,2.222459,2.170196
97,2.034706,2.172476
98,2.212660,1.891605


In [21]:
df2 = pd.read_csv(r"C:\Users\singh\OneDrive\Desktop\Python\Data\insurance - insurance.csv")
df2.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.9,0,yes,southwest,16884.924
1,18,male,33.77,1,no,southeast,1725.5523
2,28,male,33.0,3,no,southeast,4449.462
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.88,0,no,northwest,3866.8552


In [22]:
from sklearn.preprocessing import LabelEncoder
lb = LabelEncoder()

df2['sex'] = lb.fit_transform(df2['sex'])
df2['smoker'] = lb.fit_transform(df2['smoker'])
df2['region'] = lb.fit_transform(df2['region'])

In [20]:
log_transform = FunctionTransformer(np.log1p)
df2_new = log_transform.transform(df2)
df2_new

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,2.995732,0.000000,3.363842,0.000000,0.693147,1.386294,9.734236
1,2.944439,0.693147,3.548755,0.693147,0.000000,1.098612,7.453882
2,3.367296,0.693147,3.526361,1.386294,0.000000,1.098612,8.400763
3,3.526361,0.693147,3.165686,0.000000,0.000000,0.693147,9.998137
4,3.496508,0.693147,3.397189,0.000000,0.000000,0.693147,8.260455
...,...,...,...,...,...,...,...
1333,3.931826,0.693147,3.464798,1.386294,0.000000,0.693147,9.268755
1334,2.944439,0.000000,3.494080,0.000000,0.000000,0.000000,7.699381
1335,2.944439,0.000000,3.633631,0.000000,0.000000,1.098612,7.396847
1336,3.091042,0.000000,3.288402,0.000000,0.000000,1.386294,7.605365
