In [97]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, StandardScaler, MaxAbsScaler, RobustScaler

In [98]:
data = pd.read_csv("100_Sales.csv")

In [99]:
data.head()

Unnamed: 0,Region,Country,Item_Type,Sales_Channel,Order_Priority,Ship_Date,Unit_Cost,Total_Revenue,Total_Profit,Unnamed: 9,Unnamed: 10
0,Australia and Oceania,Tuvalu,Baby Food,Offline,H,27/06/2010,159.42,2533654.0,951410.5,,
1,Central America and the Caribbean,Grenada,Cereal,Online,C,15/09/2012,117.11,576782.8,248406.36,,
2,Europe,Russia,Office Supplies,Offline,L,05/08/2014,524.96,1158502.59,224598.75,,
3,Sub_Saharan Africa,Sao Tome and Principe,Fruits,Online,C,07/05/2014,6.92,75591.66,19525.82,,
4,Sub_Saharan Africa,Rwanda,Office Supplies,Offline,L,02/06/2013,524.96,3296425.02,639077.5,,


In [100]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 11 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Region          100 non-null    object 
 1   Country         100 non-null    object 
 2   Item_Type       100 non-null    object 
 3   Sales_Channel   100 non-null    object 
 4   Order_Priority  100 non-null    object 
 5   Ship_Date       100 non-null    object 
 6   Unit_Cost       100 non-null    float64
 7   Total_Revenue   100 non-null    float64
 8   Total_Profit    100 non-null    float64
 9   Unnamed: 9      0 non-null      float64
 10  Unnamed: 10     0 non-null      float64
dtypes: float64(5), object(6)
memory usage: 8.7+ KB


In [101]:
newdata = ["Unit_Cost","Total_Revenue","Total_Profit"]

In [102]:
data = data[newdata]

In [103]:
min_max_scaler = MinMaxScaler()

In [104]:
min_max_normalized = min_max_scaler.fit_transform(data)

In [105]:
min_max_normalized = pd.DataFrame(min_max_normalized,columns=newdata)

In [106]:
min_max_normalized

Unnamed: 0,Unit_Cost,Total_Revenue,Total_Profit
0,0.294379,0.422014,0.552844
1,0.212706,0.095443,0.143803
2,1.000000,0.192523,0.129950
3,0.000000,0.011802,0.010629
4,1.000000,0.549308,0.371114
...,...,...,...
95,0.055826,0.015382,0.037213
96,0.000000,0.008945,0.008056
97,0.162169,0.037367,0.053815
98,0.096035,0.077846,0.083357


In [107]:
max_abs_scaler = MaxAbsScaler()

In [108]:
max_abs_normalized = max_abs_scaler.fit_transform(data)

In [109]:
max_abs_scaler_normalized = pd.DataFrame(max_abs_normalized,columns=newdata)

In [110]:
max_abs_scaler_normalized

Unnamed: 0,Unit_Cost,Total_Revenue,Total_Profit
0,0.303680,0.422483,0.553171
1,0.223084,0.096178,0.144429
2,1.000000,0.193179,0.130587
3,0.013182,0.012605,0.011353
4,1.000000,0.549674,0.371574
...,...,...,...
95,0.068272,0.016181,0.037917
96,0.013182,0.009750,0.008781
97,0.173213,0.038149,0.054507
98,0.107951,0.078595,0.084028


In [111]:
z_score = StandardScaler()

In [112]:
z_score_normal = z_score.fit_transform(data)

In [113]:
z_score_normalized = pd.DataFrame(z_score_normal,columns=newdata)
z_score_normalized

Unnamed: 0,Unit_Cost,Total_Revenue,Total_Profit
0,-0.168895,0.798622,1.168192
1,-0.394831,-0.548427,-0.442948
2,1.783101,-0.147989,-0.497510
3,-0.983250,-0.893431,-0.967494
4,1.783101,1.323690,0.452390
...,...,...,...
95,-0.828816,-0.878666,-0.862785
96,-0.983250,-0.905216,-0.977629
97,-0.534633,-0.787981,-0.797392
98,-0.717583,-0.621012,-0.681031


In [114]:
robust_scaler = RobustScaler()

In [115]:
robust_normalized = robust_scaler.fit_transform(data)

In [116]:
robust_scaler_normalized = pd.DataFrame(robust_normalized,columns=newdata)

In [117]:
robust_scaler_normalized

Unnamed: 0,Unit_Cost,Total_Revenue,Total_Profit
0,0.229219,0.916646,1.284334
1,0.043233,-0.090325,-0.082354
2,1.836059,0.209017,-0.128638
3,-0.441140,-0.348230,-0.527313
4,1.836059,1.309154,0.677137
...,...,...,...
95,-0.314014,-0.337192,-0.438491
96,-0.441140,-0.357040,-0.535911
97,-0.071849,-0.269402,-0.383020
98,-0.222449,-0.144586,-0.284314


In [118]:
total_mean_revenue = data["Total_Revenue"]-data["Total_Revenue"].mean()

In [127]:
total_mean_revenue

0     1.160166e+06
1    -7.967049e+05
2    -2.149851e+05
3    -1.297896e+06
4     1.922937e+06
          ...     
95   -1.276447e+06
96   -1.315017e+06
97   -1.144709e+06
98   -9.021508e+05
99    2.213117e+06
Name: Total_Revenue, Length: 100, dtype: float64

# Result

In [132]:
print("\nMin-Max Normalization:\n\n", min_max_normalized.head(10))
print("\n\nMax-Abs Normalization:\n\n", max_abs_scaler_normalized.head(10))
print("\n\nRobust Normalization:\n\n", robust_scaler_normalized.head(10))
print("\n\nZ-Score Normalization:\n\n", z_score_normalized.head(10))
print("\n\nTotal_Mean_Revenue:\n", total_mean_revenue.head(10))


Min-Max Normalization:

    Unit_Cost  Total_Revenue  Total_Profit
0   0.294379       0.422014      0.552844
1   0.212706       0.095443      0.143803
2   1.000000       0.192523      0.129950
3   0.000000       0.011802      0.010629
4   1.000000       0.549308      0.371114
5   0.294379       0.125886      0.165145
6   0.956721       0.466137      0.403019
7   0.162169       0.206977      0.296136
8   0.096035       0.081979      0.087775
9   0.212706       0.225512      0.339110


Max-Abs Normalization:

    Unit_Cost  Total_Revenue  Total_Profit
0   0.303680       0.422483      0.553171
1   0.223084       0.096178      0.144429
2   1.000000       0.193179      0.130587
3   0.013182       0.012605      0.011353
4   1.000000       0.549674      0.371574
5   0.303680       0.126596      0.165756
6   0.957292       0.466570      0.403455
7   0.173213       0.207621      0.296651
8   0.107951       0.082724      0.088442
9   0.223084       0.226141      0.339593


Robust Normalization: