In [1]:
import numpy as np
import pandas as pd

In [2]:
V1 = np.array([1, 3, 6, 5, 7])
V2 = np.array([7, 7, 5, 8, 12])
V3 = np.array([6, 12, 5, 6, 14])
df = pd.DataFrame(
    {"V1" : V1,
     "V2" : V2,
     "V3" : V3}
)
df = df.astype(float)
df

Unnamed: 0,V1,V2,V3
0,1.0,7.0,6.0
1,3.0,7.0,12.0
2,6.0,5.0,5.0
3,5.0,8.0,6.0
4,7.0,12.0,14.0


## Standardization

In [3]:
from sklearn import preprocessing

In [4]:
preprocessing.scale(df)

array([[-1.57841037, -0.34554737, -0.70920814],
       [-0.64993368, -0.34554737,  0.92742603],
       [ 0.74278135, -1.2094158 , -0.98198051],
       [ 0.27854301,  0.08638684, -0.70920814],
       [ 1.2070197 ,  1.81412369,  1.47297076]])

In [5]:
df

Unnamed: 0,V1,V2,V3
0,1.0,7.0,6.0
1,3.0,7.0,12.0
2,6.0,5.0,5.0
3,5.0,8.0,6.0
4,7.0,12.0,14.0


In [6]:
df.mean()

V1    4.4
V2    7.8
V3    8.6
dtype: float64

## Normalization

In [7]:
preprocessing.normalize(df)

array([[0.10783277, 0.75482941, 0.64699664],
       [0.21107926, 0.49251828, 0.84431705],
       [0.64699664, 0.53916387, 0.53916387],
       [0.4472136 , 0.71554175, 0.53665631],
       [0.35491409, 0.60842415, 0.70982818]])

## `Max` - `Min` Transformation

In [8]:
preprocessing.MinMaxScaler(feature_range = (10, 20))

0,1,2
,"feature_range  feature_range: tuple (min, max), default=(0, 1) Desired range of transformed data.","(10, ...)"
,"copy  copy: bool, default=True Set to False to perform inplace row normalization and avoid a copy (if the input is already a numpy array).",True
,"clip  clip: bool, default=False Set to True to clip transformed values of held-out data to provided `feature_range`. Since this parameter will clip values, `inverse_transform` may not be able to restore the original data. .. note::  Setting `clip=True` does not prevent feature drift (a distribution  shift between training and test data). The transformed values are clipped  to the `feature_range`, which helps avoid unintended behavior in models  sensitive to out-of-range inputs (e.g. linear models). Use with care,  as clipping can distort the distribution of test data. .. versionadded:: 0.24",False


In [9]:
scaler = preprocessing.MinMaxScaler(feature_range = (10, 200))

In [10]:
scaler.fit_transform(df)

array([[ 10.        ,  64.28571429,  31.11111111],
       [ 73.33333333,  64.28571429, 157.77777778],
       [168.33333333,  10.        ,  10.        ],
       [136.66666667,  91.42857143,  31.11111111],
       [200.        , 200.        , 200.        ]])

## Data Standardization - Tranformation

In [11]:
df

Unnamed: 0,V1,V2,V3
0,1.0,7.0,6.0
1,3.0,7.0,12.0
2,6.0,5.0,5.0
3,5.0,8.0,6.0
4,7.0,12.0,14.0


In [12]:
preprocessing.scale(df)

array([[-1.57841037, -0.34554737, -0.70920814],
       [-0.64993368, -0.34554737,  0.92742603],
       [ 0.74278135, -1.2094158 , -0.98198051],
       [ 0.27854301,  0.08638684, -0.70920814],
       [ 1.2070197 ,  1.81412369,  1.47297076]])

### Normalization

In [13]:
preprocessing.normalize(df)

array([[0.10783277, 0.75482941, 0.64699664],
       [0.21107926, 0.49251828, 0.84431705],
       [0.64699664, 0.53916387, 0.53916387],
       [0.4472136 , 0.71554175, 0.53665631],
       [0.35491409, 0.60842415, 0.70982818]])

### Binarize Transformation

In [14]:
binarizer = preprocessing.Binarizer(threshold = 5).fit(df)

In [16]:
binarizer.transform(df)

array([[0., 1., 1.],
       [0., 1., 1.],
       [1., 0., 0.],
       [0., 1., 1.],
       [1., 1., 1.]])

### 0 - 1 Transformation

In [19]:
import seaborn as sns
tips = sns.load_dataset("tips")
df = tips.copy()
df_l = df.copy()

In [20]:
df_l

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.50,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4
...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3
240,27.18,2.00,Female,Yes,Sat,Dinner,2
241,22.67,2.00,Male,Yes,Sat,Dinner,2
242,17.82,1.75,Male,No,Sat,Dinner,2


In [21]:
df_l["new_sex"] = df_l["sex"].cat.codes

In [22]:
df_l

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,new_sex
0,16.99,1.01,Female,No,Sun,Dinner,2,1
1,10.34,1.66,Male,No,Sun,Dinner,3,0
2,21.01,3.50,Male,No,Sun,Dinner,3,0
3,23.68,3.31,Male,No,Sun,Dinner,2,0
4,24.59,3.61,Female,No,Sun,Dinner,4,1
...,...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3,0
240,27.18,2.00,Female,Yes,Sat,Dinner,2,1
241,22.67,2.00,Male,Yes,Sat,Dinner,2,0
242,17.82,1.75,Male,No,Sat,Dinner,2,0


In [23]:
lbe = preprocessing.LabelEncoder()

In [25]:
df_l["more_new_sex"] = lbe.fit_transform(df_l["sex"])

df_l

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,new_sex,more_new_sex
0,16.99,1.01,Female,No,Sun,Dinner,2,1,0
1,10.34,1.66,Male,No,Sun,Dinner,3,0,1
2,21.01,3.50,Male,No,Sun,Dinner,3,0,1
3,23.68,3.31,Male,No,Sun,Dinner,2,0,1
4,24.59,3.61,Female,No,Sun,Dinner,4,1,0
...,...,...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3,0,1
240,27.18,2.00,Female,Yes,Sat,Dinner,2,1,0
241,22.67,2.00,Male,Yes,Sat,Dinner,2,0,1
242,17.82,1.75,Male,No,Sat,Dinner,2,0,1
