In [7]:
import numpy as np
from sklearn.preprocessing import FunctionTransformer

# Create a dataset
X = np.array([[1, 2],
              [3, 4]])

# Define the transformation function (log1p = log(1 + x))
log_transform = FunctionTransformer(np.log1p)

# Apply the transformation to the dataset
X_transformed = log_transform.transform(X)

print(X)
print(X_transformed)


[[1 2]
 [3 4]]
[[0.69314718 1.09861229]
 [1.38629436 1.60943791]]


In [5]:
from sklearn.preprocessing import FunctionTransformer
import numpy as np

# create a dataset
Y = np.array([[1, 2], [3, 4]])

# define a custom feature engineering function
def squ(Y):
    return np.hstack((Y, Y ** 2))

# create a FunctionTransformer to apply the custom function
custom_transformer = FunctionTransformer(squ)

# apply the transformer to the input data
Y_transformed = custom_transformer.transform(Y)

# view the transformed data
print(Y_transformed)

[[ 1  2  1  4]
 [ 3  4  9 16]]


In [13]:
from sklearn.preprocessing import FunctionTransformer
import numpy as np

# create a dataset
Z = np.array([[1, 2], [3, 4]])

# define a custom scaling function
def my_scaling(Z):
    return Z / np.max(Z)

# create a FunctionTransformer to apply the custom function
custom_transformer = FunctionTransformer(my_scaling)

# apply the transformer to the input data
Z_transformed = custom_transformer.transform(Z)

# view the transformed data
print(Z)
print(Z_transformed)

[[1 2]
 [3 4]]
[[0.25 0.5 ]
 [0.75 1.  ]]


In [17]:
from sklearn.preprocessing import FunctionTransformer
import numpy as np

# create a dataset with missing values
A = np.array([[1, 2], [3, np.nan]])

# define a custom cleaning function
def my_cleaning(A):
    A[np.isnan(A)] = 0
    return A

# create a FunctionTransformer to apply the custom function
custom_transformer = FunctionTransformer(my_cleaning)

# apply the transformer to the input data
A_transformed = custom_transformer.transform(A)

# view the transformed data
print(A)
print(A_transformed)

[[1. 2.]
 [3. 0.]]
[[1. 2.]
 [3. 0.]]


In [19]:
import numpy as np
import pandas as pd
df=pd.read_csv("placement.csv")
df.head(3)

Unnamed: 0,cgpa,resume_score,placed
0,8.14,6.52,1
1,6.17,5.17,0
2,8.27,8.86,1


In [25]:
x=df.drop(columns=["placed"])
y=df["placed"]

In [27]:
from sklearn.preprocessing import FunctionTransformer
log_transform=FunctionTransformer(np.log1p)
L_transformed=log_transform.transform(x)
L_transformed

Unnamed: 0,cgpa,resume_score
0,2.212660,2.017566
1,1.969906,1.819699
2,2.226783,2.288486
3,2.064328,2.112635
4,2.142416,2.116256
...,...,...
95,1.991976,1.998774
96,2.222459,2.170196
97,2.034706,2.172476
98,2.212660,1.891605


In [214]:
import numpy as np
import pandas as pd

In [216]:
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OrdinalEncoder
from sklearn.preprocessing import OneHotEncoder

In [218]:
df1=pd.read_csv("covid_toy.csv")
df1.head()

Unnamed: 0,age,gender,fever,cough,city,has_covid
0,60,Male,103.0,Mild,Kolkata,No
1,27,Male,100.0,Mild,Delhi,Yes
2,42,Male,101.0,Mild,Delhi,No
3,31,Female,98.0,Mild,Kolkata,No
4,65,Female,101.0,Mild,Mumbai,No


In [220]:
df1.isnull().sum()

age           0
gender        0
fever        10
cough         0
city          0
has_covid     0
dtype: int64

In [222]:
df1['fever'] = df1['fever'].fillna(df1['fever'].mean())


In [224]:
df1.isnull().sum()

age          0
gender       0
fever        0
cough        0
city         0
has_covid    0
dtype: int64

In [226]:
s=df1.drop(columns=["has_covid"])
t=df1["has_covid"]

In [228]:
from sklearn.model_selection import train_test_split

In [230]:
s_train,s_test,t_train,t_test=train_test_split(s,t,test_size=0.2,random_state=42)

In [232]:
s_train.head(2)

Unnamed: 0,age,gender,fever,cough,city
55,81,Female,101.0,Mild,Mumbai
88,5,Female,100.0,Mild,Kolkata


In [274]:
from sklearn.compose import ColumnTransformer


transformer = ColumnTransformer(
    transformers=[
        ("tnf1", SimpleImputer(), ["fever"]),
        ("tnf2", OrdinalEncoder(categories=[["Mild", "Strong"]]), ["cough"]),
        ("tnf3", OneHotEncoder(sparse_output=False), ["gender", "city"])
    ],
    remainder="drop"
)

transformer.set_output(transform="pandas")


In [276]:
a=transformer.fit_transform(s_train)

In [278]:
a

Unnamed: 0,tnf1__fever,tnf2__cough,tnf3__gender_Female,tnf3__gender_Male,tnf3__city_Bangalore,tnf3__city_Delhi,tnf3__city_Kolkata,tnf3__city_Mumbai
55,101.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
88,100.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
26,100.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
42,100.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
69,103.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...
60,102.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0
71,104.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0
14,104.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0
92,102.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0


In [282]:



import numpy as np
from sklearn.preprocessing import FunctionTransformer
transformer.set_output(transform="pandas")

a = transformer.fit_transform(s_train)   # all columns numeric now
log_transformA = FunctionTransformer(np.log1p)
A_transformed = log_transformA.fit_transform(a)
print(A_transformed.head())


    tnf1__fever  tnf2__cough  tnf3__gender_Female  tnf3__gender_Male  \
55     4.624973          0.0             0.693147           0.000000   
88     4.615121          0.0             0.693147           0.000000   
26     4.615121          0.0             0.693147           0.000000   
42     4.615121          0.0             0.000000           0.693147   
69     4.644391          0.0             0.693147           0.000000   

    tnf3__city_Bangalore  tnf3__city_Delhi  tnf3__city_Kolkata  \
55                   0.0          0.000000            0.000000   
88                   0.0          0.000000            0.693147   
26                   0.0          0.000000            0.693147   
42                   0.0          0.693147            0.000000   
69                   0.0          0.693147            0.000000   

    tnf3__city_Mumbai  
55           0.693147  
88           0.000000  
26           0.000000  
42           0.000000  
69           0.000000  
