In [1]:
import pandas as pd

In [2]:
from sklearn.preprocessing import LabelEncoder

In [3]:
df = pd.read_csv('agora.csv')
df.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit
0,114523.61,136897.8,471784.1,Dhaka,192261.83
1,162597.7,151377.59,443898.53,Ctg,191792.06
2,153441.51,101145.55,407934.54,Rangpur,191050.39
3,144372.41,118671.85,383199.62,Dhaka,182901.99
4,142107.34,91391.77,366168.42,Rangpur,166187.94


## Label Encoding for Area

In [4]:
encoder = LabelEncoder()

In [5]:
df['Area'] = encoder.fit_transform(df['Area'])

In [6]:
df.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit
0,114523.61,136897.8,471784.1,1,192261.83
1,162597.7,151377.59,443898.53,0,191792.06
2,153441.51,101145.55,407934.54,2,191050.39
3,144372.41,118671.85,383199.62,1,182901.99
4,142107.34,91391.77,366168.42,2,166187.94


# Normalization / Min Max Scaler

In [7]:
from sklearn.preprocessing import MinMaxScaler

In [8]:
min_max  = MinMaxScaler()

In [9]:
min_df = df.copy()

In [10]:
min_df['Marketing Spend'] = min_max.fit_transform(min_df[['Marketing Spend']])

In [11]:
min_df['Administration'] = min_max.fit_transform(min_df[['Administration']])

In [12]:
min_df['Transport'] = min_max.fit_transform(min_df[['Transport']])

In [13]:
min_df.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit
0,0.692617,0.651744,1.0,1,192261.83
1,0.983359,0.761972,0.940893,0,191792.06
2,0.927985,0.379579,0.864664,2,191050.39
3,0.873136,0.512998,0.812235,1,182901.99
4,0.859438,0.305328,0.776136,2,166187.94


# Standardization

In [14]:
stand_df = df.copy()

In [15]:
from sklearn.preprocessing import StandardScaler

In [16]:
stand_sc = StandardScaler()

In [17]:
stand_df['Marketing Spend'] = stand_sc.fit_transform(stand_df[['Marketing Spend']])

In [18]:
stand_df['Administration'] = stand_sc.fit_transform(stand_df[['Administration']])

In [19]:
stand_df['Transport'] = stand_sc.fit_transform(stand_df[['Transport']])

In [20]:
stand_df.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit
0,0.897913,0.560753,2.165287,1,192261.83
1,1.95586,1.082807,1.929843,0,191792.06
2,1.754364,-0.728257,1.626191,2,191050.39
3,1.554784,-0.096365,1.417348,1,182901.99
4,1.504937,-1.079919,1.27355,2,166187.94


# Max Absolute Scaler

In [21]:
max_abs_df = df.copy()

In [22]:
abs_val = max_abs_df['Marketing Spend'].abs()
max_abs_df['Marketing Spend'] = max_abs_df['Marketing Spend'] / abs_val.max()

In [23]:
abs_val = max_abs_df['Administration'].abs()
max_abs_df['Administration'] = max_abs_df['Administration'] / abs_val.max()

In [24]:
abs_val = max_abs_df['Transport'].abs()
max_abs_df['Transport'] = max_abs_df['Transport'] / abs_val.max()

In [25]:
max_abs_df.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit
0,0.692617,0.749527,1.0,1,192261.83
1,0.983359,0.828805,0.940893,0,191792.06
2,0.927985,0.553781,0.864664,2,191050.39
3,0.873136,0.649738,0.812235,1,182901.99
4,0.859438,0.500378,0.776136,2,166187.94


# Robust¶

In [26]:
from sklearn.preprocessing import RobustScaler

In [27]:
robust_df = df.copy()

In [28]:
robust_sc = RobustScaler()

In [29]:
robust_df['Marketing Spend'] = robust_sc.fit_transform(robust_df[['Marketing Spend']])

In [30]:
robust_df['Administration'] = robust_sc.fit_transform(robust_df[['Administration']])

In [31]:
robust_df['Transport'] = robust_sc.fit_transform(robust_df[['Transport']])

In [32]:
robust_df.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit
0,0.67253,0.345355,1.552016,1,192261.83
1,1.452113,0.697565,1.383714,0,191792.06
2,1.303634,-0.52429,1.166654,2,191050.39
3,1.156567,-0.097977,1.017368,1,182901.99
4,1.119836,-0.761543,0.914576,2,166187.94
