In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Standardization
Standardization refers to rescaling the features so that they have the properties of a standard normal distribution with a mean of 0 and a standard deviation of 1. It's useful for algorithms that assume the input features to be normally distributed, such as logistic regression, linear regression, and support vector machines. Standardization is performed using the formula:

z= (x−μ)/σ

where 
x is the original feature value, 
μ is the mean of the feature, and 
σ is the standard deviation of the feature.

# Normalization
Normalization, or Min-Max scaling, rescales the features to a fixed range, typically [0, 1], without distorting differences in the ranges of values or losing information. It is useful for algorithms that do not assume any distribution of the data, like k-nearest neighbors and neural networks. Normalization is performed using the formula:

x = (max(x)−min(x))/(x−min(x))
    
where 
x is the original value, 
min(x) is the minimum value in the feature column, and 
max(x) is the maximum value in the feature column.

In [2]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import pandas as pd

# Load Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Splitting dataset into training and testing set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardization
scaler = StandardScaler()
X_train_standardized = scaler.fit_transform(X_train)
X_test_standardized = scaler.transform(X_test)

# Normalization
min_max_scaler = MinMaxScaler()
X_train_normalized = min_max_scaler.fit_transform(X_train)
X_test_normalized = min_max_scaler.transform(X_test)

# Convert to DataFrame for better visualization
X_train_standardized_df = pd.DataFrame(X_train_standardized, columns=iris.feature_names)
X_train_normalized_df = pd.DataFrame(X_train_normalized, columns=iris.feature_names)

print("Standardized training data (first 5 rows):")
print(X_train_standardized_df.head())

print("\nNormalized training data (first 5 rows):")
print(X_train_normalized_df.head())

Standardized training data (first 5 rows):
   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
0          -1.473937          1.203658          -1.562535         -1.312603
1          -0.133071          2.992376          -1.276006         -1.045633
2           1.085898          0.085709           0.385858          0.289218
3          -1.230143          0.756479          -1.218701         -1.312603
4          -1.717731          0.309299          -1.390618         -1.312603

Normalized training data (first 5 rows):
   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
0           0.088235          0.666667           0.000000          0.041667
1           0.411765          1.000000           0.087719          0.125000
2           0.705882          0.458333           0.596491          0.541667
3           0.147059          0.583333           0.105263          0.041667
4           0.029412          0.500000           0.052632          0.041667
