# 09. Normalization & Standardization

## 09.1. Min-Max Normalization

### Preparation

In [1]:
# Import modules
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# Generate data
np.random.seed(85)

date = pd.date_range(start='2023-01-01', periods=100, freq='D')
values = np.random.randint(0, 100, size=100)

date, values

(DatetimeIndex(['2023-01-01', '2023-01-02', '2023-01-03', '2023-01-04',
                '2023-01-05', '2023-01-06', '2023-01-07', '2023-01-08',
                '2023-01-09', '2023-01-10', '2023-01-11', '2023-01-12',
                '2023-01-13', '2023-01-14', '2023-01-15', '2023-01-16',
                '2023-01-17', '2023-01-18', '2023-01-19', '2023-01-20',
                '2023-01-21', '2023-01-22', '2023-01-23', '2023-01-24',
                '2023-01-25', '2023-01-26', '2023-01-27', '2023-01-28',
                '2023-01-29', '2023-01-30', '2023-01-31', '2023-02-01',
                '2023-02-02', '2023-02-03', '2023-02-04', '2023-02-05',
                '2023-02-06', '2023-02-07', '2023-02-08', '2023-02-09',
                '2023-02-10', '2023-02-11', '2023-02-12', '2023-02-13',
                '2023-02-14', '2023-02-15', '2023-02-16', '2023-02-17',
                '2023-02-18', '2023-02-19', '2023-02-20', '2023-02-21',
                '2023-02-22', '2023-02-23', '2023-02-24', '2023-

In [3]:
# Creae a dataframe
data = pd.DataFrame({
    'Date': date,
    'Values': values
})

data

Unnamed: 0,Date,Values
0,2023-01-01,32
1,2023-01-02,13
2,2023-01-03,68
3,2023-01-04,51
4,2023-01-05,13
...,...,...
95,2023-04-06,36
96,2023-04-07,12
97,2023-04-08,2
98,2023-04-09,19


### Min-Max Normalization

In [4]:
# Get Min&Max values
min_value = data['Values'].min()
max_value = data['Values'].max()

min_value, max_value

(0, 98)

In [5]:
# Calcualte normalization
data['Value_Normalized'] = (data['Values'] - min_value) / (max_value - min_value)

data

Unnamed: 0,Date,Values,Value_Normalized
0,2023-01-01,32,0.326531
1,2023-01-02,13,0.132653
2,2023-01-03,68,0.693878
3,2023-01-04,51,0.520408
4,2023-01-05,13,0.132653
...,...,...,...
95,2023-04-06,36,0.367347
96,2023-04-07,12,0.122449
97,2023-04-08,2,0.020408
98,2023-04-09,19,0.193878


# 09.2. Standardization

### Preparation

In [6]:
# Install libraries
!pip install scikit-learn

Defaulting to user installation because normal site-packages is not writeable


In [7]:
# Import modules
import pandas as pd
from sklearn.preprocessing import StandardScaler

In [8]:
# Generate data
np.random.seed(85)

date = pd.date_range(start='2023-01-01', periods=100, freq='D')
values = [10, 12, 11, 15, 9, 13, 8, 14, 10, 16] * 10

date, values

(DatetimeIndex(['2023-01-01', '2023-01-02', '2023-01-03', '2023-01-04',
                '2023-01-05', '2023-01-06', '2023-01-07', '2023-01-08',
                '2023-01-09', '2023-01-10', '2023-01-11', '2023-01-12',
                '2023-01-13', '2023-01-14', '2023-01-15', '2023-01-16',
                '2023-01-17', '2023-01-18', '2023-01-19', '2023-01-20',
                '2023-01-21', '2023-01-22', '2023-01-23', '2023-01-24',
                '2023-01-25', '2023-01-26', '2023-01-27', '2023-01-28',
                '2023-01-29', '2023-01-30', '2023-01-31', '2023-02-01',
                '2023-02-02', '2023-02-03', '2023-02-04', '2023-02-05',
                '2023-02-06', '2023-02-07', '2023-02-08', '2023-02-09',
                '2023-02-10', '2023-02-11', '2023-02-12', '2023-02-13',
                '2023-02-14', '2023-02-15', '2023-02-16', '2023-02-17',
                '2023-02-18', '2023-02-19', '2023-02-20', '2023-02-21',
                '2023-02-22', '2023-02-23', '2023-02-24', '2023-

In [9]:
# Create a dataframe
data = pd.DataFrame({
    'Date': date,
    'Values': values
})

data

Unnamed: 0,Date,Values
0,2023-01-01,10
1,2023-01-02,12
2,2023-01-03,11
3,2023-01-04,15
4,2023-01-05,9
...,...,...
95,2023-04-06,13
96,2023-04-07,8
97,2023-04-08,14
98,2023-04-09,10


### Standardization

In [10]:
# Get X
X = data['Values'].values.reshape(-1, 1)

#X

In [11]:
# Standardization
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_scaled

array([[-0.71374643],
       [ 0.07930516],
       [-0.31722063],
       [ 1.26888254],
       [-1.11027222],
       [ 0.47583095],
       [-1.50679801],
       [ 0.87235674],
       [-0.71374643],
       [ 1.66540833],
       [-0.71374643],
       [ 0.07930516],
       [-0.31722063],
       [ 1.26888254],
       [-1.11027222],
       [ 0.47583095],
       [-1.50679801],
       [ 0.87235674],
       [-0.71374643],
       [ 1.66540833],
       [-0.71374643],
       [ 0.07930516],
       [-0.31722063],
       [ 1.26888254],
       [-1.11027222],
       [ 0.47583095],
       [-1.50679801],
       [ 0.87235674],
       [-0.71374643],
       [ 1.66540833],
       [-0.71374643],
       [ 0.07930516],
       [-0.31722063],
       [ 1.26888254],
       [-1.11027222],
       [ 0.47583095],
       [-1.50679801],
       [ 0.87235674],
       [-0.71374643],
       [ 1.66540833],
       [-0.71374643],
       [ 0.07930516],
       [-0.31722063],
       [ 1.26888254],
       [-1.11027222],
       [ 0

In [12]:
# Add standardized X to a new column
data['Value_scaled'] = X_scaled

data

Unnamed: 0,Date,Values,Value_scaled
0,2023-01-01,10,-0.713746
1,2023-01-02,12,0.079305
2,2023-01-03,11,-0.317221
3,2023-01-04,15,1.268883
4,2023-01-05,9,-1.110272
...,...,...,...
95,2023-04-06,13,0.475831
96,2023-04-07,8,-1.506798
97,2023-04-08,14,0.872357
98,2023-04-09,10,-0.713746
