## ColumnTransformer

In [37]:
import pandas as pd
import numpy as np

df = pd.read_csv('csvFile/titanic/train.csv')
df.shape

(891, 12)

In [38]:
#removing the unwanted columns
df.drop(columns=['PassengerId', 'Name', 'Ticket'], inplace=True)


In [39]:
df.head(2)

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Cabin,Embarked
0,0,3,male,22.0,1,0,7.25,,S
1,1,1,female,38.0,1,0,71.2833,C85,C


In [40]:
df.isnull().sum()

Survived      0
Pclass        0
Sex           0
Age         177
SibSp         0
Parch         0
Fare          0
Cabin       687
Embarked      2
dtype: int64

**Planning**
```
0.Filling the Null of Age with mean (SimpleImputer)
1.Applying OneHotEncoding on ['Sex', 'Embarked']
2.Applying Scaling on Age and Fare (standardization)
```

In [73]:
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler

transform = ColumnTransformer(transformers=[
    ('t1',SimpleImputer(),[3]),
    ('t2', OneHotEncoder(), [2, 8]),
    ('t3', StandardScaler(), [3, 6])
], remainder='passthrough')

In [74]:
df.head(8)

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Cabin,Embarked
0,0,3,male,22.0,1,0,7.25,,S
1,1,1,female,38.0,1,0,71.2833,C85,C
2,1,3,female,26.0,0,0,7.925,,S
3,1,1,female,35.0,1,0,53.1,C123,S
4,0,3,male,35.0,0,0,8.05,,S
5,0,3,male,,0,0,8.4583,,Q
6,0,1,male,54.0,0,0,51.8625,E46,S
7,0,3,male,2.0,3,1,21.075,,S


In [75]:

new_df= pd.DataFrame(transform.fit_transform(df))
new_df.head(10)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,22.0,0.0,1.0,0.0,0.0,1.0,0.0,-0.530377,-0.502445,0,3,1,0,
1,38.0,1.0,0.0,1.0,0.0,0.0,0.0,0.571831,0.786845,1,1,1,0,C85
2,26.0,1.0,0.0,0.0,0.0,1.0,0.0,-0.254825,-0.488854,1,3,0,0,
3,35.0,1.0,0.0,0.0,0.0,1.0,0.0,0.365167,0.42073,1,1,1,0,C123
4,35.0,0.0,1.0,0.0,0.0,1.0,0.0,0.365167,-0.486337,0,3,0,0,
5,29.699118,0.0,1.0,0.0,1.0,0.0,0.0,,-0.478116,0,3,0,0,
6,54.0,0.0,1.0,0.0,0.0,1.0,0.0,1.674039,0.395814,0,1,0,0,E46
7,2.0,0.0,1.0,0.0,0.0,1.0,0.0,-1.908136,-0.224083,0,3,3,1,
8,27.0,1.0,0.0,0.0,0.0,1.0,0.0,-0.185937,-0.424256,1,3,0,2,
9,14.0,1.0,0.0,1.0,0.0,0.0,0.0,-1.08148,-0.042956,1,2,1,0,
