##  Data Standardization is the process of transforming features so that they have: Mean = 0 ,Standard Deviation = 1
### 🔍 Why Use Standardization?
#### -Ensures all features are on the same scale.
#### -Helps algorithms that rely on distance (like KNN, SVM, logistic regression).
#### -Improves training stability and speed in gradient-based models (like neural networks).

In [51]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [59]:
scaler = StandardScaler()
le =LabelEncoder()

In [61]:
df = pd.read_csv('iris.csv')

In [63]:
df.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa


In [67]:
df['encoded'] = le.fit_transform(df['Species'])

In [71]:
df.drop('Species',axis=1,inplace=True)

In [73]:
df.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,encoded
0,1,5.1,3.5,1.4,0.2,0
1,2,4.9,3.0,1.4,0.2,0
2,3,4.7,3.2,1.3,0.2,0
3,4,4.6,3.1,1.5,0.2,0
4,5,5.0,3.6,1.4,0.2,0


In [75]:
X = df.drop('encoded',axis=1)
Y = df['encoded']

In [77]:
X.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm
0,1,5.1,3.5,1.4,0.2
1,2,4.9,3.0,1.4,0.2
2,3,4.7,3.2,1.3,0.2
3,4,4.6,3.1,1.5,0.2
4,5,5.0,3.6,1.4,0.2


In [79]:
Y.head()

0    0
1    0
2    0
3    0
4    0
Name: encoded, dtype: int32

In [103]:
X_train, X_test,Y_train,Y_test = train_test_split(X,Y,test_size = 0.2 , random_state=2)

In [105]:
X_train.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm
126,127,6.2,2.8,4.8,1.8
23,24,5.1,3.3,1.7,0.5
64,65,5.6,2.9,3.6,1.3
117,118,7.7,3.8,6.7,2.2
84,85,5.4,3.0,4.5,1.5


In [107]:
X_train.shape

(120, 5)

In [109]:
X_train.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm
126,127,6.2,2.8,4.8,1.8
23,24,5.1,3.3,1.7,0.5
64,65,5.6,2.9,3.6,1.3
117,118,7.7,3.8,6.7,2.2
84,85,5.4,3.0,4.5,1.5


In [111]:
X_test.shape

(30, 5)

In [113]:
X_test.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm
6,7,4.6,3.4,1.4,0.3
3,4,4.6,3.1,1.5,0.2
113,114,5.7,2.5,5.0,2.0
12,13,4.8,3.0,1.4,0.1
24,25,4.8,3.4,1.9,0.2


In [115]:
Y_train.shape

(120,)

In [119]:
Y_train.head()

126    2
23     0
64     1
117    2
84     1
Name: encoded, dtype: int32

In [121]:
Y_test.shape

(30,)

In [123]:
Y_test.head()

6      0
3      0
113    2
12     0
24     0
Name: encoded, dtype: int32

In [125]:
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

### After we scaled they changed to numpy array so if we want to convert to dataframe we need to apply pd.DataFrame()

In [129]:
X_train_scaled

array([[ 1.15368203,  0.37346331, -0.57939897,  0.54063457,  0.74225155],
       [-1.24247154, -0.9845851 ,  0.55297687, -1.24706373, -0.99115027],
       [-0.28866284, -0.36729037, -0.3529238 , -0.15137768,  0.07555854],
       [ 0.94430939,  2.22534751,  1.68535271,  1.63632062,  1.27560595],
       [ 0.1766097 , -0.61420826, -0.12644864,  0.36763151,  0.34223574],
       [-1.45184418, -0.12037247,  2.13830304, -1.53540217, -1.39116607],
       [ 1.29326379,  0.6203812 , -0.57939897,  1.00197606,  1.27560595],
       [ 0.33945509,  0.25000437, -0.12644864,  0.42529919,  0.20889714],
       [-0.54456273, -0.49074931, -1.71177481,  0.07929307,  0.07555854],
       [ 1.50263643,  1.23767594,  0.10002653,  0.71363763,  1.40894455],
       [ 0.03702794,  0.12654542, -0.3529238 ,  0.36763151,  0.34223574],
       [ 0.47903685,  0.37346331, -0.3529238 ,  0.25229613,  0.07555854],
       [ 1.54916369,  1.11421699,  0.3265017 ,  1.17497913,  1.40894455],
       [ 0.38598234, -1.10804404, -1.7

In [131]:
X_test_scaled

array([[-1.63795319, -1.60187983,  0.77945204, -1.4200668 , -1.25782747],
       [-1.70774408, -1.60187983,  0.10002653, -1.36239911, -1.39116607],
       [ 0.85125488, -0.24383142, -1.25882447,  0.65596994,  1.00892875],
       [-1.49837143, -1.35496194, -0.12644864, -1.4200668 , -1.52450467],
       [-1.21920791, -1.35496194,  0.77945204, -1.13172836, -1.39116607],
       [ 1.22347291,  1.60805278, -0.12644864,  1.11731144,  0.47557434],
       [-1.19594428, -1.10804404, -0.12644864, -1.30473142, -1.39116607],
       [ 0.73493675,  0.99075804, -1.25882447,  1.11731144,  0.74225155],
       [ 1.20020928,  0.6203812 , -0.57939897,  1.00197606,  1.14226735],
       [-0.73067175, -1.35496194, -0.12644864, -1.4200668 , -1.25782747],
       [-0.66088087, -0.7376672 ,  1.45887754, -1.36239911, -1.39116607],
       [-0.80046263, -1.84879772,  0.3265017 , -1.47773448, -1.39116607],
       [-0.96330802, -1.10804404,  0.3265017 , -1.53540217, -1.39116607],
       [-1.66121682, -0.61420826,  1.9

In [138]:
X_train_scaleddf= pd.DataFrame(X_train_scaled,columns = X.columns)

In [140]:
X_train_scaleddf.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm
0,1.153682,0.373463,-0.579399,0.540635,0.742252
1,-1.242472,-0.984585,0.552977,-1.247064,-0.99115
2,-0.288663,-0.36729,-0.352924,-0.151378,0.075559
3,0.944309,2.225348,1.685353,1.636321,1.275606
4,0.17661,-0.614208,-0.126449,0.367632,0.342236


In [142]:
X_test_scaleddf = pd.DataFrame(X_test_scaled,columns=X.columns)

In [144]:
X_test_scaleddf.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm
0,-1.637953,-1.60188,0.779452,-1.420067,-1.257827
1,-1.707744,-1.60188,0.100027,-1.362399,-1.391166
2,0.851255,-0.243831,-1.258824,0.65597,1.008929
3,-1.498371,-1.354962,-0.126449,-1.420067,-1.524505
4,-1.219208,-1.354962,0.779452,-1.131728,-1.391166
