In [43]:
import pandas as pd 
import matplotlib.pyplot as plt 
import seaborn as sns 

In [44]:
df = pd.DataFrame({'Country' : ['France' , 'Spain' , 'Germany' , 'Spain' , ' Germany' , 'France' , 'Spain' , 'France' , 'Germany' , 'France'],
                   'Age' : [44 , 27 , 30, 38, 40, 35, 'NaN' ,48, 50,57],
                   'Salary' : [72000,48000,54000,61000,'NaN',58000,52000,79000,83000,67000],
                   'Purchased' : ['No' , 'Yes' , 'No' , 'No' , 'Yes' , 'Yes' , 'No' , 'Yes' , 'No' , 'Yes']})

In [45]:
df = df.astype({'Age':'float','Salary':'float'}) #To convert a column into a floar
df.dtypes


Country       object
Age          float64
Salary       float64
Purchased     object
dtype: object

##**METHOD1 :** Normalisation Method - **Simple Features Scaling**

Data is Rescaled and new values are [0 , 1] \
 **Xnorm = (X - Xmin) / (Xmax - Xmin)**

Firstly , we've to fill the **NaN** Value with **mean method** 

In [46]:
df['Salary'].fillna((df['Salary'].mean()), inplace=True)
df['Age'].fillna((df['Age'].mean()), inplace=True)
df

Unnamed: 0,Country,Age,Salary,Purchased
0,France,44.0,72000.0,No
1,Spain,27.0,48000.0,Yes
2,Germany,30.0,54000.0,No
3,Spain,38.0,61000.0,No
4,Germany,40.0,63777.777778,Yes
5,France,35.0,58000.0,Yes
6,Spain,41.0,52000.0,No
7,France,48.0,79000.0,Yes
8,Germany,50.0,83000.0,No
9,France,57.0,67000.0,Yes


Transform the Purchased values : Yes ==> 1 , No ==>0

In [47]:
df['Purchased'] = df['Purchased'].apply(lambda x : 0 if x=='No' else 1)

Transform countries into columns of 0 and 1 (Dummy method)

In [48]:
df = pd.get_dummies(data=df , columns=['Country'])

In [49]:
df

Unnamed: 0,Age,Salary,Purchased,Country_ Germany,Country_France,Country_Germany,Country_Spain
0,44.0,72000.0,0,0,1,0,0
1,27.0,48000.0,1,0,0,0,1
2,30.0,54000.0,0,0,0,1,0
3,38.0,61000.0,0,0,0,0,1
4,40.0,63777.777778,1,1,0,0,0
5,35.0,58000.0,1,0,1,0,0
6,41.0,52000.0,0,0,0,0,1
7,48.0,79000.0,1,0,1,0,0
8,50.0,83000.0,0,0,0,1,0
9,57.0,67000.0,1,0,1,0,0


In [55]:
df.columns

Index(['Age', 'Salary', 'Purchased', 'Country_ Germany', 'Country_France',
       'Country_Germany', 'Country_Spain'],
      dtype='object')

In [50]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

In [56]:
scaler.fit(df)
scaled_features = scaler.transform(df)

In [57]:
df_minmax = pd.DataFrame(data=scaled_features ,columns=['Age', 'Salary', 'Purchased', 'Country_ Germany', 'Country_France','Country_Germany', 'Country_Spain'] )

In [58]:
df_minmax

Unnamed: 0,Age,Salary,Purchased,Country_ Germany,Country_France,Country_Germany,Country_Spain
0,0.566667,0.685714,0.0,0.0,1.0,0.0,0.0
1,0.0,0.0,1.0,0.0,0.0,0.0,1.0
2,0.1,0.171429,0.0,0.0,0.0,1.0,0.0
3,0.366667,0.371429,0.0,0.0,0.0,0.0,1.0
4,0.433333,0.450794,1.0,1.0,0.0,0.0,0.0
5,0.266667,0.285714,1.0,0.0,1.0,0.0,0.0
6,0.466667,0.114286,0.0,0.0,0.0,0.0,1.0
7,0.7,0.885714,1.0,0.0,1.0,0.0,0.0
8,0.766667,1.0,0.0,0.0,0.0,1.0,0.0
9,1.0,0.542857,1.0,0.0,1.0,0.0,0.0


##**METHOD2 :** **Standardization - Z-score** 
is the process if rescaling the features so that'll have the properties of **Gaussian Distribution** of µ=0 and sigma=1(std dv) 

**Z = (X - Xmean) / Sigma**

In [59]:
from sklearn.preprocessing import StandardScaler
sc_x = StandardScaler()

In [60]:
sc_x = sc_x.fit_transform(df)
sc_x

array([[ 3.44577291e-01,  7.49473254e-01, -1.00000000e+00,
        -3.33333333e-01,  1.22474487e+00, -5.00000000e-01,
        -6.54653671e-01],
       [-1.60802736e+00, -1.43817841e+00,  1.00000000e+00,
        -3.33333333e-01, -8.16496581e-01, -5.00000000e-01,
         1.52752523e+00],
       [-1.26345007e+00, -8.91265492e-01, -1.00000000e+00,
        -3.33333333e-01, -8.16496581e-01,  2.00000000e+00,
        -6.54653671e-01],
       [-3.44577291e-01, -2.53200424e-01, -1.00000000e+00,
        -3.33333333e-01, -8.16496581e-01, -5.00000000e-01,
         1.52752523e+00],
       [-1.14859097e-01,  6.63219199e-16,  1.00000000e+00,
         3.00000000e+00, -8.16496581e-01, -5.00000000e-01,
        -6.54653671e-01],
       [-6.89154581e-01, -5.26656882e-01,  1.00000000e+00,
        -3.33333333e-01,  1.22474487e+00, -5.00000000e-01,
        -6.54653671e-01],
       [ 0.00000000e+00, -1.07356980e+00, -1.00000000e+00,
        -3.33333333e-01, -8.16496581e-01, -5.00000000e-01,
         1.5275252

In [62]:
sc_x = pd.DataFrame(data=sc_x , columns=['Age', 'Salary', 'Purchased', 'Country_ Germany', 'Country_France','Country_Germany', 'Country_Spain'] )
sc_x

Unnamed: 0,Age,Salary,Purchased,Country_ Germany,Country_France,Country_Germany,Country_Spain
0,0.344577,0.7494733,-1.0,-0.333333,1.224745,-0.5,-0.654654
1,-1.608027,-1.438178,1.0,-0.333333,-0.816497,-0.5,1.527525
2,-1.26345,-0.8912655,-1.0,-0.333333,-0.816497,2.0,-0.654654
3,-0.344577,-0.2532004,-1.0,-0.333333,-0.816497,-0.5,1.527525
4,-0.114859,6.632192e-16,1.0,3.0,-0.816497,-0.5,-0.654654
5,-0.689155,-0.5266569,1.0,-0.333333,1.224745,-0.5,-0.654654
6,0.0,-1.07357,-1.0,-0.333333,-0.816497,-0.5,1.527525
7,0.804014,1.387538,1.0,-0.333333,1.224745,-0.5,-0.654654
8,1.033732,1.752147,-1.0,-0.333333,-0.816497,2.0,-0.654654
9,1.837746,0.2937125,1.0,-0.333333,1.224745,-0.5,-0.654654
