In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn import preprocessing

In [2]:
np.random.seed(1)
df = pd.DataFrame({
    'Country':['France','Spain','Germany','Spain','Germany','France','Spain','France','Germany','France'],
    'Age':[44,27,30,38,40,35,np.nan,48,50,37],
    'Salary':[72000,48000,54000,61000,np.nan,58000,52000,79000,83000,67000],
    'Purchased':['No','Yes','No','No','Yes','Yes','No','Yes','No','Yes']

})

In [3]:
df

Unnamed: 0,Country,Age,Salary,Purchased
0,France,44.0,72000.0,No
1,Spain,27.0,48000.0,Yes
2,Germany,30.0,54000.0,No
3,Spain,38.0,61000.0,No
4,Germany,40.0,,Yes
5,France,35.0,58000.0,Yes
6,Spain,,52000.0,No
7,France,48.0,79000.0,Yes
8,Germany,50.0,83000.0,No
9,France,37.0,67000.0,Yes


In [7]:
data = pd.get_dummies(df, columns=['Country'])

In [6]:
data

Unnamed: 0,Age,Salary,Purchased,Country_France,Country_Germany,Country_Spain
0,44.0,72000.0,No,1,0,0
1,27.0,48000.0,Yes,0,0,1
2,30.0,54000.0,No,0,1,0
3,38.0,61000.0,No,0,0,1
4,40.0,,Yes,0,1,0
5,35.0,58000.0,Yes,1,0,0
6,,52000.0,No,0,0,1
7,48.0,79000.0,Yes,1,0,0
8,50.0,83000.0,No,0,1,0
9,37.0,67000.0,Yes,1,0,0


In [8]:
data['Age'].fillna(data['Age'].mean(), inplace=True)

In [9]:
data['Salary'].fillna(data['Salary'].mean(), inplace=True)

In [9]:
data

Unnamed: 0,Age,Salary,Purchased,Country_France,Country_Germany,Country_Spain
0,44.0,72000.0,No,1,0,0
1,27.0,48000.0,Yes,0,0,1
2,30.0,54000.0,No,0,1,0
3,38.0,61000.0,No,0,0,1
4,40.0,63777.777778,Yes,0,1,0
5,35.0,58000.0,Yes,1,0,0
6,38.777778,52000.0,No,0,0,1
7,48.0,79000.0,Yes,1,0,0
8,50.0,83000.0,No,0,1,0
9,37.0,67000.0,Yes,1,0,0


### Standard Scaler

In [10]:
from sklearn.preprocessing import StandardScaler


In [11]:
data[['Age', 'Salary']] = StandardScaler().fit_transform(data[['Age', 'Salary']])
data

Unnamed: 0,Age,Salary,Purchased,Country_France,Country_Germany,Country_Spain
0,0.758874,0.7494733,No,1,0,0
1,-1.711504,-1.438178,Yes,0,0,1
2,-1.275555,-0.8912655,No,0,1,0
3,-0.113024,-0.2532004,No,0,0,1
4,0.177609,6.632192e-16,Yes,0,1,0
5,-0.548973,-0.5266569,Yes,1,0,0
6,0.0,-1.07357,No,0,0,1
7,1.34014,1.387538,Yes,1,0,0
8,1.630773,1.752147,No,0,1,0
9,-0.25834,0.2937125,Yes,1,0,0


### MaxAbscaler

In [12]:
from sklearn.preprocessing import MaxAbsScaler

In [13]:
data[['Age', 'Salary']] = MaxAbsScaler().fit_transform(data[['Age', 'Salary']])
data

Unnamed: 0,Age,Salary,Purchased,Country_France,Country_Germany,Country_Spain
0,0.443396,0.4277457,No,1,0,0
1,-1.0,-0.8208092,Yes,0,0,1
2,-0.745283,-0.5086705,No,0,1,0
3,-0.066038,-0.1445087,No,0,0,1
4,0.103774,3.78518e-16,Yes,0,1,0
5,-0.320755,-0.300578,Yes,1,0,0
6,0.0,-0.6127168,No,0,0,1
7,0.783019,0.7919075,Yes,1,0,0
8,0.95283,1.0,No,0,1,0
9,-0.150943,0.1676301,Yes,1,0,0


### MinMaxScaler

In [15]:
from sklearn.preprocessing import MinMaxScaler

In [16]:
data[['Age', 'Salary']] = MinMaxScaler().fit_transform(data[['Age', 'Salary']])
data

Unnamed: 0,Age,Salary,Purchased,Country_France,Country_Germany,Country_Spain
0,0.73913,0.685714,No,1,0,0
1,0.0,0.0,Yes,0,0,1
2,0.130435,0.171429,No,0,1,0
3,0.478261,0.371429,No,0,0,1
4,0.565217,0.450794,Yes,0,1,0
5,0.347826,0.285714,Yes,1,0,0
6,0.512077,0.114286,No,0,0,1
7,0.913043,0.885714,Yes,1,0,0
8,1.0,1.0,No,0,1,0
9,0.434783,0.542857,Yes,1,0,0


### RobustScaler

In [17]:
from sklearn.preprocessing import RobustScaler

In [18]:
data[['Age', 'Salary']] = RobustScaler().fit_transform(data[['Age', 'Salary']])
data

Unnamed: 0,Age,Salary,Purchased,Country_France,Country_Germany,Country_Spain
0,0.748148,0.610229,No,1,0,0
1,-1.518519,-0.91358,Yes,0,0,1
2,-1.118519,-0.532628,No,0,1,0
3,-0.051852,-0.088183,No,0,0,1
4,0.214815,0.088183,Yes,0,1,0
5,-0.451852,-0.27866,Yes,1,0,0
6,0.051852,-0.659612,No,0,0,1
7,1.281481,1.054674,Yes,1,0,0
8,1.548148,1.308642,No,0,1,0
9,-0.185185,0.292769,Yes,1,0,0


### QuantileTransformer

In [19]:
from sklearn.preprocessing import QuantileTransformer

In [23]:
qt = QuantileTransformer(n_quantiles=10, random_state=0)
qt.fit_transform(data[['Age', 'Salary']])
data

Unnamed: 0,Age,Salary,Purchased,Country_France,Country_Germany,Country_Spain
0,0.777778,0.777778,No,1,0,0
1,0.0,0.0,Yes,0,0,1
2,0.111111,0.222222,No,0,1,0
3,0.444444,0.444444,No,0,0,1
4,0.666667,0.555556,Yes,0,1,0
5,0.222222,0.333333,Yes,1,0,0
6,0.555556,0.111111,No,0,0,1
7,0.888889,0.888889,Yes,1,0,0
8,1.0,1.0,No,0,1,0
9,0.333333,0.666667,Yes,1,0,0


### Power Transformer Scaler

In [24]:
from sklearn.preprocessing import PowerTransformer

In [25]:
data[['Age', 'Salary']] = PowerTransformer().fit_transform(data[['Age', 'Salary']])
data

Unnamed: 0,Age,Salary,Purchased,Country_France,Country_Germany,Country_Spain
0,0.874886,0.874886,No,1,0,0
1,-1.610567,-1.610567,Yes,0,0,1
2,-1.230735,-0.860716,No,0,1,0
3,-0.145869,-0.145869,No,0,0,1
4,0.54069,0.200617,Yes,0,1,0
5,-0.860716,-0.499397,Yes,1,0,0
6,0.200617,-1.230735,No,0,0,1
7,1.203665,1.203665,Yes,1,0,0
8,1.527427,1.527427,No,0,1,0
9,-0.499397,0.54069,Yes,1,0,0


### Unit Vector scaler/Normalizer

In [26]:
from sklearn.preprocessing import Normalizer
scaler = Normalizer(norm = 'l2')
data[['Age', 'Salary']] = scaler.fit_transform(data[['Age', 'Salary']])
data

Unnamed: 0,Age,Salary,Purchased,Country_France,Country_Germany,Country_Spain
0,0.707107,0.707107,No,1,0,0
1,-0.707107,-0.707107,Yes,0,0,1
2,-0.819481,-0.573106,No,0,1,0
3,-0.707107,-0.707107,No,0,0,1
4,0.937545,0.347865,Yes,0,1,0
5,-0.864952,-0.501854,Yes,1,0,0
6,0.160882,-0.986974,No,0,0,1
7,0.707107,0.707107,Yes,1,0,0
8,0.707107,0.707107,No,0,1,0
9,-0.678499,0.734601,Yes,1,0,0
