## Import

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler,MaxAbsScaler
import numpy as np


## Create Sample Data

In [2]:

data = {
    'Income': [15000, 200000, 45000, 70000, 30000],  # Large range
    'Age': [25, 45, 30, 60, 22],                    # Moderate range
    'Experience': [1, 15, 5, 20, 2]                 # Outlier in experience
}
df = pd.DataFrame(data)
df

Unnamed: 0,Income,Age,Experience
0,15000,25,1
1,200000,45,15
2,45000,30,5
3,70000,60,20
4,30000,22,2


In [3]:
X = df[['Income', 'Age', 'Experience']]

## Split Data (Essential Step for preventing Data Leakage)

In [4]:
 X_train, X_test = train_test_split(X, test_size=0.3, random_state=42)


In [5]:
X_train

Unnamed: 0,Income,Age,Experience
2,45000,30,5
0,15000,25,1
3,70000,60,20


## StandardScaler

In [6]:
scaler_std = StandardScaler()
# Fit ONLY on Training Data
X_train_std = scaler_std.fit_transform(X_train)
# Transform both Train and Test Data
X_test_std = scaler_std.transform(X_test)
print("-" * 50)
print("StandardScaler - Original Training Data :")
print(X_train.reset_index(drop=True))
print("StandardScaler - Scaled Training Data :")
print(pd.DataFrame(X_train_std, columns=X_train.columns))
print("-" * 50)


--------------------------------------------------
StandardScaler - Original Training Data :
   Income  Age  Experience
0   45000   30           5
1   15000   25           1
2   70000   60          20
StandardScaler - Scaled Training Data :
     Income       Age  Experience
0  0.074125 -0.539164   -0.448327
1 -1.260124 -0.862662   -0.937410
2  1.185999  1.401826    1.385737
--------------------------------------------------


## RobustScaler (Good for data with outliers)

In [7]:
scaler_robust = RobustScaler()
# Fit ONLY on Training Data
X_train_robust = scaler_robust.fit_transform(X_train)
# Transform both Train and Test Data
X_test_robust = scaler_robust.transform(X_test)
print("-" * 50)
print("RobustScaler - Original Training Data :")
print(X_train.reset_index(drop=True))
print("RobustScaler - Scaled Training Data :")
print(pd.DataFrame(X_train_robust, columns=X_train.columns))
print("-" * 50)


--------------------------------------------------
RobustScaler - Original Training Data :
   Income  Age  Experience
0   45000   30           5
1   15000   25           1
2   70000   60          20
RobustScaler - Scaled Training Data :
     Income       Age  Experience
0  0.000000  0.000000    0.000000
1 -1.090909 -0.285714   -0.421053
2  0.909091  1.714286    1.578947
--------------------------------------------------


## MinMaxScaler

In [8]:
# Default range is [0, 1]. You can set a different range, e.g., feature_range=(-1, 1).
minmax_scaler = MinMaxScaler()

# Fit ONLY on Training Data to learn X_min and X_max
X_train_scaled_mm=minmax_scaler.fit_transform(X_train)

# 4. Transform Data
X_test_scaled_mm = minmax_scaler.transform(X_test) # Transform test using learned values
print("-" * 50)
print("MinMax_scaler - Original Training Data :")
print(X_train.reset_index(drop=True))
print("MinMax_scaler - Scaled Training Data :")
print(pd.DataFrame(X_train_scaled_mm,columns=X_train.columns))
print("-" * 50)

--------------------------------------------------
MinMax_scaler - Original Training Data :
   Income  Age  Experience
0   45000   30           5
1   15000   25           1
2   70000   60          20
MinMax_scaler - Scaled Training Data :
     Income       Age  Experience
0  0.545455  0.142857    0.210526
1  0.000000  0.000000    0.000000
2  1.000000  1.000000    1.000000
--------------------------------------------------


## MaxAbsScaler

In [9]:
# 3. Initialize and Fit the MaxAbsScaler
maxabs_scaler = MaxAbsScaler()

# Fit ONLY on Training Data to learn the max absolute value
X_train_scaled_ma=maxabs_scaler.fit_transform(X_train)

# 4. Transform Data
X_test_scaled_ma = maxabs_scaler.transform(X_test)

print("-" * 50)
print("MaxAbsScaler - Original Training Data :")
print(X_train.reset_index(drop=True))
print("\nMaxAbsScaler - Scaled Training Data :")
print(pd.DataFrame(X_train_scaled_ma, columns=X_train.columns))
print("-" * 50)

--------------------------------------------------
MaxAbsScaler - Original Training Data :
   Income  Age  Experience
0   45000   30           5
1   15000   25           1
2   70000   60          20

MaxAbsScaler - Scaled Training Data :
     Income       Age  Experience
0  0.642857  0.500000        0.25
1  0.214286  0.416667        0.05
2  1.000000  1.000000        1.00
--------------------------------------------------
