# Scaling Techniques in Machine Learning
Different scaling techniques using Python.

In [2]:
# Step 1: Import required libraries
import pandas as pd
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from sklearn.datasets import load_iris

In [3]:
# Step 2: Load a sample dataset (Iris Dataset)
iris = load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [4]:
# Step 3: Apply Standard Scaling (removes mean and scales to unit variance)
standard_scaler = StandardScaler()
df_standard = pd.DataFrame(standard_scaler.fit_transform(df), columns=[col + '_std' for col in df.columns])
df_standard.head()

Unnamed: 0,sepal length (cm)_std,sepal width (cm)_std,petal length (cm)_std,petal width (cm)_std
0,-0.900681,1.019004,-1.340227,-1.315444
1,-1.143017,-0.131979,-1.340227,-1.315444
2,-1.385353,0.328414,-1.397064,-1.315444
3,-1.506521,0.098217,-1.283389,-1.315444
4,-1.021849,1.249201,-1.340227,-1.315444


In [5]:
# Step 4: Apply Min-Max Scaling (scales values between 0 and 1)
minmax_scaler = MinMaxScaler()
df_minmax = pd.DataFrame(minmax_scaler.fit_transform(df), columns=[col + '_minmax' for col in df.columns])
df_minmax.head()

Unnamed: 0,sepal length (cm)_minmax,sepal width (cm)_minmax,petal length (cm)_minmax,petal width (cm)_minmax
0,0.222222,0.625,0.067797,0.041667
1,0.166667,0.416667,0.067797,0.041667
2,0.111111,0.5,0.050847,0.041667
3,0.083333,0.458333,0.084746,0.041667
4,0.194444,0.666667,0.067797,0.041667


In [6]:
# Step 5: Apply Robust Scaling (uses median and IQR, good for outliers)
robust_scaler = RobustScaler()
df_robust = pd.DataFrame(robust_scaler.fit_transform(df), columns=[col + '_robust' for col in df.columns])
df_robust.head()

Unnamed: 0,sepal length (cm)_robust,sepal width (cm)_robust,petal length (cm)_robust,petal width (cm)_robust
0,-0.538462,1.0,-0.842857,-0.733333
1,-0.692308,0.0,-0.842857,-0.733333
2,-0.846154,0.4,-0.871429,-0.733333
3,-0.923077,0.2,-0.814286,-0.733333
4,-0.615385,1.2,-0.842857,-0.733333


In [7]:
# Step 6: Combine all scaled data with original
df_combined = pd.concat([df, df_standard, df_minmax, df_robust], axis=1)
df_combined.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),sepal length (cm)_std,sepal width (cm)_std,petal length (cm)_std,petal width (cm)_std,sepal length (cm)_minmax,sepal width (cm)_minmax,petal length (cm)_minmax,petal width (cm)_minmax,sepal length (cm)_robust,sepal width (cm)_robust,petal length (cm)_robust,petal width (cm)_robust
0,5.1,3.5,1.4,0.2,-0.900681,1.019004,-1.340227,-1.315444,0.222222,0.625,0.067797,0.041667,-0.538462,1.0,-0.842857,-0.733333
1,4.9,3.0,1.4,0.2,-1.143017,-0.131979,-1.340227,-1.315444,0.166667,0.416667,0.067797,0.041667,-0.692308,0.0,-0.842857,-0.733333
2,4.7,3.2,1.3,0.2,-1.385353,0.328414,-1.397064,-1.315444,0.111111,0.5,0.050847,0.041667,-0.846154,0.4,-0.871429,-0.733333
3,4.6,3.1,1.5,0.2,-1.506521,0.098217,-1.283389,-1.315444,0.083333,0.458333,0.084746,0.041667,-0.923077,0.2,-0.814286,-0.733333
4,5.0,3.6,1.4,0.2,-1.021849,1.249201,-1.340227,-1.315444,0.194444,0.666667,0.067797,0.041667,-0.615385,1.2,-0.842857,-0.733333
