## **Encoding features using Column Transformer**

In [1]:
import pandas as pd
import numpy as np

In [2]:
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OrdinalEncoder
from sklearn.preprocessing import OneHotEncoder

In [3]:
data=pd.read_csv('/content/AER_credit_card_data.csv')

In [4]:
data.sample(5)

Unnamed: 0,card,reports,age,income,share,expenditure,owner,selfemp,dependents,months,majorcards,active
1301,no,0,23.91667,1.636,0.000733,0.0,no,no,1,13,1,0
437,yes,0,27.16667,2.6796,0.007277,15.58333,no,no,4,36,1,4
476,yes,0,38.25,2.3,0.067609,129.4167,yes,no,0,3,1,3
1309,yes,1,40.16667,2.0,0.254827,424.6292,yes,no,1,121,1,10
1060,yes,0,35.16667,4.815,0.042952,171.5125,yes,no,2,48,1,18


In [5]:
data.shape

(1319, 12)

In [6]:
data.isnull().sum() #there are 10  missing values in fever col.

card           0
reports        0
age            0
income         0
share          0
expenditure    0
owner          0
selfemp        0
dependents     0
months         0
majorcards     0
active         0
dtype: int64

### **Train-test Split step**

In [7]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(data.drop(columns=['reports']),data['selfemp'],
                                                test_size=0.2)

In [8]:
X_train

Unnamed: 0,card,age,income,share,expenditure,owner,selfemp,dependents,months,majorcards,active
250,yes,24.50000,2.6000,0.069059,149.545000,no,yes,0,21,1,2
268,yes,46.91667,3.4000,0.031019,87.636670,no,no,0,42,1,4
556,yes,49.16667,1.7000,0.000706,0.000000,no,no,1,14,0,6
1158,yes,35.83333,2.5000,0.079149,164.810000,yes,no,1,42,0,11
521,yes,41.66667,2.7000,0.043561,97.512500,yes,no,2,28,1,11
...,...,...,...,...,...,...,...,...,...,...,...
147,yes,35.66667,3.5822,0.104998,313.352500,yes,no,1,84,1,10
725,no,23.50000,2.5000,0.000480,0.000000,no,no,0,36,0,0
53,yes,23.75000,2.8000,0.014875,34.125000,no,no,2,48,1,8
92,yes,43.41667,2.4000,0.034359,68.384170,no,no,0,12,1,0


## **2. Mentos Zindagi (Using Column Transformer)**

In [9]:
from sklearn.compose import ColumnTransformer

In [10]:
transformer = ColumnTransformer(transformers=[
    ('tnf3',OneHotEncoder(sparse=False,drop='first'),['card','owner', 'selfemp' ])
],remainder='passthrough')

In [11]:
X_train=transformer.fit_transform(X_train)



In [12]:
X_test=transformer.transform(X_test)

In [13]:
X_train_convert=pd.DataFrame(X_train)


In [14]:
X_test_convert=pd.DataFrame(X_test)

In [15]:
X_train_convert.sample(10), X_test_convert.sample(5)

(      0    1    2         3       4         5           6    7      8    9   \
 184  1.0  0.0  0.0  20.58333  2.7000  0.118307  266.190800  0.0  147.0  1.0   
 267  1.0  0.0  0.0  32.83333  2.5500  0.047929  101.683300  2.0    6.0  1.0   
 687  1.0  0.0  0.0  20.16667  2.1000  0.153850  269.153300  1.0   27.0  0.0   
 290  1.0  1.0  0.0  60.08333  6.1000  0.094348  479.436700  1.0  244.0  1.0   
 209  1.0  0.0  0.0  23.08333  2.0400  0.179497  304.895000  0.0   12.0  1.0   
 10   1.0  0.0  0.0  37.08333  3.0000  0.003667    8.333333  0.0   60.0  1.0   
 716  1.0  0.0  0.0  21.16667  2.4000  0.100833  201.582500  0.0    2.0  1.0   
 302  1.0  0.0  0.0  23.16667  3.9200  0.226991  741.504200  0.0   16.0  1.0   
 141  0.0  0.0  0.0  37.50000  2.6000  0.000462    0.000000  0.0   40.0  1.0   
 797  0.0  1.0  0.0  40.00000  4.9875  0.000241    0.000000  2.0   32.0  1.0   
 
        10  
 184  22.0  
 267  18.0  
 687   6.0  
 290  14.0  
 209   9.0  
 10    9.0  
 716   5.0  
 302   0.0  
 