In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
np.random.seed(0)
data = {"A": np.random.randint(0, 100, 10)}
df = pd.DataFrame(data)
df

Unnamed: 0,A
0,44
1,47
2,64
3,67
4,67
5,9
6,83
7,21
8,36
9,87


In [3]:
from scipy.stats import zscore

df["A_zscore"] = zscore(df["A"])
df

Unnamed: 0,A,A_zscore
0,44,-0.34898
1,47,-0.22581
2,64,0.472149
3,67,0.595318
4,67,0.595318
5,9,-1.785954
6,83,1.252221
7,21,-1.293277
8,36,-0.677431
9,87,1.416447


In [4]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler, MaxAbsScaler, RobustScaler

In [5]:
sc = StandardScaler()
df['Standard'] = sc.fit_transform(df[['A']])
df

Unnamed: 0,A,A_zscore,Standard
0,44,-0.34898,-0.34898
1,47,-0.22581,-0.22581
2,64,0.472149,0.472149
3,67,0.595318,0.595318
4,67,0.595318,0.595318
5,9,-1.785954,-1.785954
6,83,1.252221,1.252221
7,21,-1.293277,-1.293277
8,36,-0.677431,-0.677431
9,87,1.416447,1.416447


In [6]:
mms = MinMaxScaler()
df['Minmax'] = mms.fit_transform(df[['A']])
df

Unnamed: 0,A,A_zscore,Standard,Minmax
0,44,-0.34898,-0.34898,0.448718
1,47,-0.22581,-0.22581,0.487179
2,64,0.472149,0.472149,0.705128
3,67,0.595318,0.595318,0.74359
4,67,0.595318,0.595318,0.74359
5,9,-1.785954,-1.785954,0.0
6,83,1.252221,1.252221,0.948718
7,21,-1.293277,-1.293277,0.153846
8,36,-0.677431,-0.677431,0.346154
9,87,1.416447,1.416447,1.0


In [7]:
mas = MaxAbsScaler()
df['MaxAbs'] = mas.fit_transform(df[['A']])
df

Unnamed: 0,A,A_zscore,Standard,Minmax,MaxAbs
0,44,-0.34898,-0.34898,0.448718,0.505747
1,47,-0.22581,-0.22581,0.487179,0.54023
2,64,0.472149,0.472149,0.705128,0.735632
3,67,0.595318,0.595318,0.74359,0.770115
4,67,0.595318,0.595318,0.74359,0.770115
5,9,-1.785954,-1.785954,0.0,0.103448
6,83,1.252221,1.252221,0.948718,0.954023
7,21,-1.293277,-1.293277,0.153846,0.241379
8,36,-0.677431,-0.677431,0.346154,0.413793
9,87,1.416447,1.416447,1.0,1.0


In [8]:
rb = RobustScaler()
df['Robust'] = rb.fit_transform(df[['A']])
df

Unnamed: 0,A,A_zscore,Standard,Minmax,MaxAbs,Robust
0,44,-0.34898,-0.34898,0.448718,0.505747,-0.396552
1,47,-0.22581,-0.22581,0.487179,0.54023,-0.293103
2,64,0.472149,0.472149,0.705128,0.735632,0.293103
3,67,0.595318,0.595318,0.74359,0.770115,0.396552
4,67,0.595318,0.595318,0.74359,0.770115,0.396552
5,9,-1.785954,-1.785954,0.0,0.103448,-1.603448
6,83,1.252221,1.252221,0.948718,0.954023,0.948276
7,21,-1.293277,-1.293277,0.153846,0.241379,-1.189655
8,36,-0.677431,-0.677431,0.346154,0.413793,-0.672414
9,87,1.416447,1.416447,1.0,1.0,1.086207


In [9]:
a_mean = df['A'].mean()
a_max = df['A'].max()
a_min = df['A'].min()

df['Mean_Norm'] = df['A'].apply(lambda x: (x - a_mean)/(a_max - a_min))
df

Unnamed: 0,A,A_zscore,Standard,Minmax,MaxAbs,Robust,Mean_Norm
0,44,-0.34898,-0.34898,0.448718,0.505747,-0.396552,-0.108974
1,47,-0.22581,-0.22581,0.487179,0.54023,-0.293103,-0.070513
2,64,0.472149,0.472149,0.705128,0.735632,0.293103,0.147436
3,67,0.595318,0.595318,0.74359,0.770115,0.396552,0.185897
4,67,0.595318,0.595318,0.74359,0.770115,0.396552,0.185897
5,9,-1.785954,-1.785954,0.0,0.103448,-1.603448,-0.557692
6,83,1.252221,1.252221,0.948718,0.954023,0.948276,0.391026
7,21,-1.293277,-1.293277,0.153846,0.241379,-1.189655,-0.403846
8,36,-0.677431,-0.677431,0.346154,0.413793,-0.672414,-0.211538
9,87,1.416447,1.416447,1.0,1.0,1.086207,0.442308


In [10]:
# Measure of Central Tendency

In [11]:
class standard_scaler:
    def __init__(self) -> None:
        self.mean_ = list()
        self.std_ = list()
        self.cols = list()
        
        
    def fit(self, train_df):
        self.mean_ = train_df.mean().values
        self.std_ = train_df.std(ddof=0).values
        self.cols = train_df.columns
        # print("Fitted means:", self.mean_)
        # print("Fitted stds:", self.std_)
    
    def transform(self, test_df):
        
        if len(self.cols) != len(test_df.columns):
            raise ValueError("Number of columns in test data must match the number of columns in training data.")
        
        test_df = test_df.copy()
        for i, col in enumerate(test_df.columns):
            test_df[col] = (test_df[col] - self.mean_[i])/self.std_[i]
            
        return test_df

In [12]:
scaler = standard_scaler()

scaler.fit(df[['A']])
df['Scaler'] = scaler.transform(df[['A']])

In [13]:
sc.mean_, sc.scale_

(array([52.5]), array([24.35672392]))

In [14]:
scaler.mean_, scaler.std_

(array([52.5]), array([24.35672392]))