# All about Feature Transformation Techniques

In [1]:
import pandas as pd
import seaborn as sna

In [2]:
df = pd.read_csv('supershops.csv')

In [3]:
df.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit
0,114523.61,136897.8,471784.1,Dhaka,192261.83
1,162597.7,151377.59,443898.53,Ctg,191792.06
2,153441.51,101145.55,407934.54,Rangpur,191050.39
3,144372.41,118671.85,383199.62,Dhaka,182901.99
4,142107.34,91391.77,366168.42,Rangpur,166187.94


# MinMaxScaler / Normalization

In [4]:
from sklearn.preprocessing import MinMaxScaler

In [5]:
min_max_sc = MinMaxScaler() # creating Object min_max_sc

In [6]:
# model fit / train
ms_fit = min_max_sc.fit(df[['Marketing Spend']])

min_max_sc

MinMaxScaler()

In [7]:
df.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit
0,114523.61,136897.8,471784.1,Dhaka,192261.83
1,162597.7,151377.59,443898.53,Ctg,191792.06
2,153441.51,101145.55,407934.54,Rangpur,191050.39
3,144372.41,118671.85,383199.62,Dhaka,182901.99
4,142107.34,91391.77,366168.42,Rangpur,166187.94


In [11]:
df['Marketing spend'] = min_max_sc.transform(df[['Marketing Spend']]) # data transform
df.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit,Marketing spend
0,114523.61,136897.8,471784.1,Dhaka,192261.83,0.692617
1,162597.7,151377.59,443898.53,Ctg,191792.06,0.983359
2,153441.51,101145.55,407934.54,Rangpur,191050.39,0.927985
3,144372.41,118671.85,383199.62,Dhaka,182901.99,0.873136
4,142107.34,91391.77,366168.42,Rangpur,166187.94,0.859438


In [12]:
# at a time fit and transform
df['Administration'] = min_max_sc.fit_transform(df[['Administration']])
df.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit,Marketing spend
0,114523.61,0.651744,471784.1,Dhaka,192261.83,0.692617
1,162597.7,0.761972,443898.53,Ctg,191792.06,0.983359
2,153441.51,0.379579,407934.54,Rangpur,191050.39,0.927985
3,144372.41,0.512998,383199.62,Dhaka,182901.99,0.873136
4,142107.34,0.305328,366168.42,Rangpur,166187.94,0.859438


In [13]:
mmx = MinMaxScaler(feature_range=(3,5)) 

In [14]:
df['Administration'] = mmx.fit_transform(df[['Administration']])
df.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit,Marketing spend
0,114523.61,4.303488,471784.1,Dhaka,192261.83,0.692617
1,162597.7,4.523943,443898.53,Ctg,191792.06,0.983359
2,153441.51,3.759158,407934.54,Rangpur,191050.39,0.927985
3,144372.41,4.025997,383199.62,Dhaka,182901.99,0.873136
4,142107.34,3.610656,366168.42,Rangpur,166187.94,0.859438


In [15]:
mmx2 = MinMaxScaler(feature_range=(2,4)) 

In [17]:
df['Transport'] = mmx2.fit_transform(df[['Transport']])
df.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit,Marketing spend
0,114523.61,4.303488,4.0,Dhaka,192261.83,0.692617
1,162597.7,4.523943,3.881787,Ctg,191792.06,0.983359
2,153441.51,3.759158,3.729327,Rangpur,191050.39,0.927985
3,144372.41,4.025997,3.62447,Dhaka,182901.99,0.873136
4,142107.34,3.610656,3.552271,Rangpur,166187.94,0.859438


In [18]:
df2 = pd.read_csv('supershops.csv')
df2.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit
0,114523.61,136897.8,471784.1,Dhaka,192261.83
1,162597.7,151377.59,443898.53,Ctg,191792.06
2,153441.51,101145.55,407934.54,Rangpur,191050.39
3,144372.41,118671.85,383199.62,Dhaka,182901.99
4,142107.34,91391.77,366168.42,Rangpur,166187.94


# Staandard Scaler / standardization

In [20]:
from sklearn.preprocessing import StandardScaler

In [21]:
st = StandardScaler() # creating Object st for StandardScaler

In [22]:
# at a time fit and transform
df['Marketing Spend'] = st.fit_transform(df[['Marketing Spend']])

In [23]:
st

StandardScaler()

In [24]:
df.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit,Marketing spend
0,0.897913,4.303488,4.0,Dhaka,192261.83,0.692617
1,1.95586,4.523943,3.881787,Ctg,191792.06,0.983359
2,1.754364,3.759158,3.729327,Rangpur,191050.39,0.927985
3,1.554784,4.025997,3.62447,Dhaka,182901.99,0.873136
4,1.504937,3.610656,3.552271,Rangpur,166187.94,0.859438


In [25]:
# at a time fit and transform
df['Transport'] = st.fit_transform(df[['Transport']])
df.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit,Marketing spend
0,0.897913,4.303488,2.165287,Dhaka,192261.83,0.692617
1,1.95586,4.523943,1.929843,Ctg,191792.06,0.983359
2,1.754364,3.759158,1.626191,Rangpur,191050.39,0.927985
3,1.554784,4.025997,1.417348,Dhaka,182901.99,0.873136
4,1.504937,3.610656,1.27355,Rangpur,166187.94,0.859438


In [28]:
# at a time fit and transform
df['Administration'] = st.fit_transform(df[['Administration']])
df.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit,Marketing spend
0,0.897913,0.560753,2.165287,Dhaka,192261.83,0.692617
1,1.95586,1.082807,1.929843,Ctg,191792.06,0.983359
2,1.754364,-0.728257,1.626191,Rangpur,191050.39,0.927985
3,1.554784,-0.096365,1.417348,Dhaka,182901.99,0.873136
4,1.504937,-1.079919,1.27355,Rangpur,166187.94,0.859438


# Robust Scaler

In [29]:
df3 = pd.read_csv('supershops.csv')
df3.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit
0,114523.61,136897.8,471784.1,Dhaka,192261.83
1,162597.7,151377.59,443898.53,Ctg,191792.06
2,153441.51,101145.55,407934.54,Rangpur,191050.39
3,144372.41,118671.85,383199.62,Dhaka,182901.99
4,142107.34,91391.77,366168.42,Rangpur,166187.94


In [30]:
from sklearn.preprocessing import RobustScaler

In [31]:
rbs =RobustScaler()

In [32]:
rbs

RobustScaler()

In [33]:
# at a time fit and transform
df3['Marketing Spend'] = rbs.fit_transform(df[['Marketing Spend']])

In [34]:
df3.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit
0,0.67253,136897.8,471784.1,Dhaka,192261.83
1,1.452113,151377.59,443898.53,Ctg,191792.06
2,1.303634,101145.55,407934.54,Rangpur,191050.39
3,1.156567,118671.85,383199.62,Dhaka,182901.99
4,1.119836,91391.77,366168.42,Rangpur,166187.94


In [35]:
# at a time fit and transform
df3['Administration'] = rbs.fit_transform(df[['Administration']])
df3.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit
0,0.67253,0.345355,471784.1,Dhaka,192261.83
1,1.452113,0.697565,443898.53,Ctg,191792.06
2,1.303634,-0.52429,407934.54,Rangpur,191050.39
3,1.156567,-0.097977,383199.62,Dhaka,182901.99
4,1.119836,-0.761543,366168.42,Rangpur,166187.94


In [37]:
# at a time fit and transform
df3['Transport	'] = rbs.fit_transform(df[['Transport']])
df3.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit,Transport\t
0,0.67253,0.345355,471784.1,Dhaka,192261.83,1.552016
1,1.452113,0.697565,443898.53,Ctg,191792.06,1.383714
2,1.303634,-0.52429,407934.54,Rangpur,191050.39,1.166654
3,1.156567,-0.097977,383199.62,Dhaka,182901.99,1.017368
4,1.119836,-0.761543,366168.42,Rangpur,166187.94,0.914576


# Max Absolute Scaler

In [38]:
df4 =pd.read_csv('supershops.csv')
df4.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit
0,114523.61,136897.8,471784.1,Dhaka,192261.83
1,162597.7,151377.59,443898.53,Ctg,191792.06
2,153441.51,101145.55,407934.54,Rangpur,191050.39
3,144372.41,118671.85,383199.62,Dhaka,182901.99
4,142107.34,91391.77,366168.42,Rangpur,166187.94


In [41]:
from sklearn.preprocessing import MaxAbsScaler

mas =MaxAbsScaler()

mas

MaxAbsScaler()

In [42]:
# at a time fit and transform
df4['Transport	'] = mas.fit_transform(df[['Transport']])
df4.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit,Transport\t
0,114523.61,136897.8,471784.1,Dhaka,192261.83,1.0
1,162597.7,151377.59,443898.53,Ctg,191792.06,0.891264
2,153441.51,101145.55,407934.54,Rangpur,191050.39,0.751028
3,144372.41,118671.85,383199.62,Dhaka,182901.99,0.654577
4,142107.34,91391.77,366168.42,Rangpur,166187.94,0.588166


In [43]:
# at a time fit and transform
df4['Marketing Spend'] = mas.fit_transform(df[['Marketing Spend']])
df4.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit,Transport\t
0,0.445303,136897.8,471784.1,Dhaka,192261.83,1.0
1,0.969971,151377.59,443898.53,Ctg,191792.06,0.891264
2,0.870043,101145.55,407934.54,Rangpur,191050.39,0.751028
3,0.771065,118671.85,383199.62,Dhaka,182901.99,0.654577
4,0.746344,91391.77,366168.42,Rangpur,166187.94,0.588166


In [44]:
# at a time fit and transform
df4['Administration'] = mas.fit_transform(df[['Administration']])
df4.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit,Transport\t
0,0.445303,0.221993,471784.1,Dhaka,192261.83,1.0
1,0.969971,0.428666,443898.53,Ctg,191792.06,0.891264
2,0.870043,-0.288305,407934.54,Rangpur,191050.39,0.751028
3,0.771065,-0.038149,383199.62,Dhaka,182901.99,0.654577
4,0.746344,-0.427523,366168.42,Rangpur,166187.94,0.588166
