In [46]:
# !pip install scikit-learn

<h1>DataFrame Creation</h1>

In [1]:
import pandas as pd
import numpy as np

In [2]:
data = {"price":[110,105,115,120,110,130,150,100,105]}
df = pd.DataFrame(data)

In [3]:
df

Unnamed: 0,price
0,110
1,105
2,115
3,120
4,110
5,130
6,150
7,100
8,105


<h1>Normalization</h1>

In [4]:
from sklearn.preprocessing import MinMaxScaler

In [5]:
scaler = MinMaxScaler()
df["min_max_scaled_data"] = scaler.fit_transform(df[["price"]])

In [6]:
df

Unnamed: 0,price,min_max_scaled_data
0,110,0.2
1,105,0.1
2,115,0.3
3,120,0.4
4,110,0.2
5,130,0.6
6,150,1.0
7,100,0.0
8,105,0.1


<h1>Standardization</h1>

In [7]:
from sklearn.preprocessing import StandardScaler

In [8]:
scaler = StandardScaler()
df["standard_scaled_data"] = scaler.fit_transform(df[["price"]])

In [9]:
df

Unnamed: 0,price,min_max_scaled_data,standard_scaled_data
0,110,0.2,-0.416356
1,105,0.1,-0.757011
2,115,0.3,-0.075701
3,120,0.4,0.264954
4,110,0.2,-0.416356
5,130,0.6,0.946264
6,150,1.0,2.308884
7,100,0.0,-1.097666
8,105,0.1,-0.757011


<h1>Log Transform</h1>

In [10]:
df["log_transformed_data"] = np.log1p(df[["price"]])

In [11]:
df

Unnamed: 0,price,min_max_scaled_data,standard_scaled_data,log_transformed_data
0,110,0.2,-0.416356,4.70953
1,105,0.1,-0.757011,4.663439
2,115,0.3,-0.075701,4.75359
3,120,0.4,0.264954,4.795791
4,110,0.2,-0.416356,4.70953
5,130,0.6,0.946264,4.875197
6,150,1.0,2.308884,5.01728
7,100,0.0,-1.097666,4.615121
8,105,0.1,-0.757011,4.663439


In [12]:
# from scipy.stats import boxcox

In [13]:
# df = pd.DataFrame(data)

In [14]:
# df["transformed_data"] = boxcox(df["price"]+1)

<h1>Max Absolute Scaler</h1>

In [15]:
from sklearn.preprocessing import MaxAbsScaler

In [16]:
scaler = MaxAbsScaler()
df["max_abs_scaled_data"] = scaler.fit_transform(df[["price"]])

In [17]:
df

Unnamed: 0,price,min_max_scaled_data,standard_scaled_data,log_transformed_data,max_abs_scaled_data
0,110,0.2,-0.416356,4.70953,0.733333
1,105,0.1,-0.757011,4.663439,0.7
2,115,0.3,-0.075701,4.75359,0.766667
3,120,0.4,0.264954,4.795791,0.8
4,110,0.2,-0.416356,4.70953,0.733333
5,130,0.6,0.946264,4.875197,0.866667
6,150,1.0,2.308884,5.01728,1.0
7,100,0.0,-1.097666,4.615121,0.666667
8,105,0.1,-0.757011,4.663439,0.7


<h1>Robust Scaler</h1>

In [18]:
from sklearn.preprocessing import RobustScaler

In [19]:
scaler = RobustScaler()
df["robust_scaled_data"] = scaler.fit_transform(df[["price"]])

In [20]:
df

Unnamed: 0,price,min_max_scaled_data,standard_scaled_data,log_transformed_data,max_abs_scaled_data,robust_scaled_data
0,110,0.2,-0.416356,4.70953,0.733333,0.0
1,105,0.1,-0.757011,4.663439,0.7,-0.333333
2,115,0.3,-0.075701,4.75359,0.766667,0.333333
3,120,0.4,0.264954,4.795791,0.8,0.666667
4,110,0.2,-0.416356,4.70953,0.733333,0.0
5,130,0.6,0.946264,4.875197,0.866667,1.333333
6,150,1.0,2.308884,5.01728,1.0,2.666667
7,100,0.0,-1.097666,4.615121,0.666667,-0.666667
8,105,0.1,-0.757011,4.663439,0.7,-0.333333


<h1>Excel File Generation</h1>

In [24]:
# !pip install openpyxl

In [25]:
df.to_excel("ml_scaled_data.xlsx", index=False)