In [6]:
import pandas as pd
import numpy as np

In [7]:
data = {'price': [110, 105, 115, 120, 110, 110, 130, 150, 100, 105]}
df = pd.DataFrame(data)

In [8]:
df.head()

Unnamed: 0,price
0,110
1,105
2,115
3,120
4,110


In [9]:
# Normalization
# Standardization
# Log Transformation
# Robust Scaler
# Max Absolute Scaler

# Normalization - Manual

In [11]:
min_price = df['price'].min()
max_price = df['price'].max()
df['price_scaled_manual'] = (df['price'] - min_price)/ (max_price - min_price)

In [12]:
df.head()

Unnamed: 0,price,price_scaled_manual
0,110,0.2
1,105,0.1
2,115,0.3
3,120,0.4
4,110,0.2


# Sklearn

In [14]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

In [15]:
df['price_scaled_sklearn'] = scaler.fit_transform(df[['price']])
df.head()

Unnamed: 0,price,price_scaled_manual,price_scaled_sklearn
0,110,0.2,0.2
1,105,0.1,0.1
2,115,0.3,0.3
3,120,0.4,0.4
4,110,0.2,0.2


# Standardization (Manual)

In [25]:
data = {'price': [110, 105, 115, 120, 110, 110, 130, 150, 100, 105]}
df = pd.DataFrame(data)

In [51]:
df['price_scaled_stand'] = (df['price'] - df.price.mean()) / df.price.std(ddof=0)

In [52]:
df.head()

Unnamed: 0,price,price_scaled_stand
0,110,-0.39161
1,105,-0.74762
2,115,-0.035601
3,120,0.320408
4,110,-0.39161


In [53]:
from sklearn.preprocessing import StandardScaler

In [54]:
std = StandardScaler()

In [55]:
df['price_scaled_std_sklearn'] = std.fit_transform(df[['price']])
df.head()

Unnamed: 0,price,price_scaled_stand,price_scaled_std_sklearn
0,110,-0.39161,-0.39161
1,105,-0.74762,-0.74762
2,115,-0.035601,-0.035601
3,120,0.320408,0.320408
4,110,-0.39161,-0.39161


# Robust Scaler

In [50]:
data = {'price': [110, 105, 115, 120, 110, 110, 130, 150, 100, 105]}
df = pd.DataFrame(data)

In [56]:
q1 = df.price.quantile(0.25)
q2 = df.price.quantile(0.50)
q3 = df.price.quantile(0.75)
iqr= q3- q1


df['price_scaled_robust']= (df.price - df.price.quantile(.50)) / iqr
df.head()

Unnamed: 0,price,price_scaled_stand,price_scaled_std_sklearn,price_scaled_robust
0,110,-0.39161,-0.39161,0.0
1,105,-0.74762,-0.74762,-0.4
2,115,-0.035601,-0.035601,0.4
3,120,0.320408,0.320408,0.8
4,110,-0.39161,-0.39161,0.0


In [59]:
from sklearn.preprocessing import RobustScaler
rob = RobustScaler()

In [61]:
df['price_scaled_robust_sklearn'] = rob.fit_transform(df[['price']])
df.head()

Unnamed: 0,price,price_scaled_stand,price_scaled_std_sklearn,price_scaled_robust,price_scaled_robust_sklearn
0,110,-0.39161,-0.39161,0.0,0.0
1,105,-0.74762,-0.74762,-0.4,-0.4
2,115,-0.035601,-0.035601,0.4,0.4
3,120,0.320408,0.320408,0.8,0.8
4,110,-0.39161,-0.39161,0.0,0.0


# Log Transform

In [69]:
data = {'price': [110, 105, 115, 120, 110, 110, 130, 150, 100, 105]}
df = pd.DataFrame(data)

In [70]:
df.head()

Unnamed: 0,price
0,110
1,105
2,115
3,120
4,110


In [71]:
from sklearn.preprocessing import FunctionTransformer
ft = FunctionTransformer()

In [72]:
df['log_cost'] = ft.fit_transform(np.log1p)(df[['price']])

In [73]:
df.head()

Unnamed: 0,price,log_cost
0,110,4.70953
1,105,4.663439
2,115,4.75359
3,120,4.795791
4,110,4.70953


In [74]:
df['log_manual'] = df['price'].apply(lambda x: np.log1p(x))
df

Unnamed: 0,price,log_cost,log_manual
0,110,4.70953,4.70953
1,105,4.663439,4.663439
2,115,4.75359,4.75359
3,120,4.795791,4.795791
4,110,4.70953,4.70953
5,110,4.70953,4.70953
6,130,4.875197,4.875197
7,150,5.01728,5.01728
8,100,4.615121,4.615121
9,105,4.663439,4.663439


# Max absolute scaler

In [77]:
from sklearn.preprocessing import MaxAbsScaler
scaler = MaxAbsScaler()

In [78]:
df['scaler_abs']= scaler.fit_transform(df[['price']])

In [79]:
df.head()

Unnamed: 0,price,log_cost,log_manual,scaler_abs
0,110,4.70953,4.70953,0.733333
1,105,4.663439,4.663439,0.7
2,115,4.75359,4.75359,0.766667
3,120,4.795791,4.795791,0.8
4,110,4.70953,4.70953,0.733333
