# Feature Scaling Methods for ML

In [1]:
# Importing modules
import numpy as np
import pandas as pd

In [2]:
# Raw dataset as numpy array
x = np.array([[1000, 10, 100], [2000, 20, 200], [3000, 30, 300], [4000, 40, 400], [5000, 50, 500], [10000, 100, 1000]])
y = np.array([1, 2, 3, 4, 5, 10])

# Dataset as pandas dataframe
df_x = pd.DataFrame(x, columns=["x1", "x2", "x3"])
df_y = pd.DataFrame(y, columns=["y"])
raw_df = pd.concat([df_x, df_y], axis=1)

print(raw_df)

      x1   x2    x3   y
0   1000   10   100   1
1   2000   20   200   2
2   3000   30   300   3
3   4000   40   400   4
4   5000   50   500   5
5  10000  100  1000  10


## 1. Max Normalization

In [3]:
# Max Normalization
def max_norm(df):
    
    # pandas series with max value for each column
    max_series = df.max()
    
    # dividing each column with max value
    norm_df = df.divide(max_series, axis=1)
    
    return norm_df

max_norm_df = pd.concat([max_norm(df_x), df_y], axis=1)

print(max_norm_df)

    x1   x2   x3   y
0  0.1  0.1  0.1   1
1  0.2  0.2  0.2   2
2  0.3  0.3  0.3   3
3  0.4  0.4  0.4   4
4  0.5  0.5  0.5   5
5  1.0  1.0  1.0  10


## 2. Mean Normalization

In [4]:
# Mean Normalization
def mean_norm(df):
    
    # pandas series with mean value for each column
    mean_series = df.mean()
    
    # dividing each column with mean value
    norm_df = df.divide(mean_series, axis=1)
    
    return norm_df

mean_norm_df = pd.concat([mean_norm(df_x), df_y], axis=1)

print(mean_norm_df)

     x1    x2    x3   y
0  0.24  0.24  0.24   1
1  0.48  0.48  0.48   2
2  0.72  0.72  0.72   3
3  0.96  0.96  0.96   4
4  1.20  1.20  1.20   5
5  2.40  2.40  2.40  10


## 3. Z-score Normalization

In [5]:
# Z-score Normalization
def z_score_norm(df):
    # pandas series with mean value for each column
    mean_series = df.mean()
    # pandas series with std value for each column
    std_series = df.std()
    
    # substracting each column with mean value and creating new df
    df_minus_mean = df.subtract(mean_series, axis=1)
    # dividing each column of new df with std value
    norm_df = df_minus_mean.divide(std_series, axis=1)
    
    return norm_df

z_score_norm_df = pd.concat([z_score_norm(df_x), df_y], axis=1)

print(z_score_norm_df)

         x1        x2        x3   y
0 -0.993146 -0.993146 -0.993146   1
1 -0.679521 -0.679521 -0.679521   2
2 -0.365896 -0.365896 -0.365896   3
3 -0.052271 -0.052271 -0.052271   4
4  0.261354  0.261354  0.261354   5
5  1.829479  1.829479  1.829479  10
