## Importing Libraries

In [132]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

In [133]:
df = pd.read_csv('supershops.csv')
df.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit
0,114523.61,136897.8,471784.1,Dhaka,192261.83
1,162597.7,151377.59,443898.53,Ctg,191792.06
2,153441.51,101145.55,407934.54,Rangpur,191050.39
3,144372.41,118671.85,383199.62,Dhaka,182901.99
4,142107.34,91391.77,366168.42,Rangpur,166187.94


In [134]:
x = df.drop('Profit', axis=1)
x.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area
0,114523.61,136897.8,471784.1,Dhaka
1,162597.7,151377.59,443898.53,Ctg
2,153441.51,101145.55,407934.54,Rangpur
3,144372.41,118671.85,383199.62,Dhaka
4,142107.34,91391.77,366168.42,Rangpur


## Measure of Central Tendency

In [135]:
df.isnull().sum()

Marketing Spend    0
Administration     0
Transport          1
Area               0
Profit             0
dtype: int64

In [136]:
#The row with NaN value
df[df.isnull().any(axis=1)]

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit
19,86419.7,153514.11,,Dhaka,122776.86


In [137]:
df.Transport.mean()

215331.7324489796

In [138]:
df.Transport.median()

214634.81

In [139]:
df.Transport = df.Transport.fillna(df.Transport.mean())

In [140]:
df.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit
0,114523.61,136897.8,471784.1,Dhaka,192261.83
1,162597.7,151377.59,443898.53,Ctg,191792.06
2,153441.51,101145.55,407934.54,Rangpur,191050.39
3,144372.41,118671.85,383199.62,Dhaka,182901.99
4,142107.34,91391.77,366168.42,Rangpur,166187.94


In [141]:
df.isnull().sum()

Marketing Spend    0
Administration     0
Transport          0
Area               0
Profit             0
dtype: int64

## Label Encoding

### 1.1 Label Encoding using Function

In [142]:
def label_encoder(df, col):
    label_map = {}
    encoded_value = []

    for instance in df[col]:
        if instance not in label_map:
            label_map[instance] = len(label_map)
        encoded_value.append(label_map[instance])
    return encoded_value, label_map

encoded_value, label_map = label_encoder(x, 'Area')
x.Area = encoded_value

x.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area
0,114523.61,136897.8,471784.1,0
1,162597.7,151377.59,443898.53,1
2,153441.51,101145.55,407934.54,2
3,144372.41,118671.85,383199.62,0
4,142107.34,91391.77,366168.42,2


### 1.2 Label Encoding using Sklearn

In [143]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()

df1 = pd.read_csv('supershops.csv')

df1.Area = le.fit_transform(df1[['Area']])
df1.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit
0,114523.61,136897.8,471784.1,1,192261.83
1,162597.7,151377.59,443898.53,0,191792.06
2,153441.51,101145.55,407934.54,2,191050.39
3,144372.41,118671.85,383199.62,1,182901.99
4,142107.34,91391.77,366168.42,2,166187.94


### 1.3 Label Encoding using For Loop

In [144]:
from pandas.core.dtypes.common import is_numeric_dtype
from sklearn.preprocessing import LabelEncoder

df1 = pd.read_csv('supershops.csv') 

for col in df1.columns:
    if is_numeric_dtype(df1[col]):
        continue
    else:
        df1[col] = LabelEncoder().fit_transform(df1[col])

df1.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit
0,114523.61,136897.8,471784.1,1,192261.83
1,162597.7,151377.59,443898.53,0,191792.06
2,153441.51,101145.55,407934.54,2,191050.39
3,144372.41,118671.85,383199.62,1,182901.99
4,142107.34,91391.77,366168.42,2,166187.94


## One Hot Encoding

### 2.1 One Hot Encoding using Generating Dummy Variables

In [145]:
df1 = df.copy()
df1 = pd.get_dummies(df, prefix='Area', drop_first=True).astype(int).head()
df1.head()

# note: if any error occurs, please remove astype(int) from the above code or again rull all the code from the beginning

Unnamed: 0,Marketing Spend,Administration,Transport,Profit,Area_Dhaka,Area_Rangpur
0,114523,136897,471784,192261,1,0
1,162597,151377,443898,191792,0,0
2,153441,101145,407934,191050,0,1
3,144372,118671,383199,182901,1,0
4,142107,91391,366168,166187,0,1


### 2.2 One Hot Encoding using For Loop

In [146]:
from pandas.core.dtypes.common import is_numeric_dtype
import pandas as pd

df = pd.read_csv('supershops.csv')

for col in df.columns:
    if is_numeric_dtype(df[col]):
        continue
    else:
        df = pd.concat([df,pd.get_dummies(df[col], prefix=col, drop_first=True).astype(int)], axis=1)
        df.drop(col, axis=1, inplace=True)
df.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Profit,Area_Dhaka,Area_Rangpur
0,114523.61,136897.8,471784.1,192261.83,1,0
1,162597.7,151377.59,443898.53,191792.06,0,0
2,153441.51,101145.55,407934.54,191050.39,0,1
3,144372.41,118671.85,383199.62,182901.99,1,0
4,142107.34,91391.77,366168.42,166187.94,0,1


## Ordinal Encoding

### 3.1 Ordinal Encoding using Sklearn

In [147]:
df = pd.read_csv('supershops.csv')


from sklearn.preprocessing import OrdinalEncoder
ordinal = OrdinalEncoder(categories=[df.Area.unique()])
df.Area = ordinal.fit_transform(df[['Area']])

df.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit
0,114523.61,136897.8,471784.1,0.0,192261.83
1,162597.7,151377.59,443898.53,1.0,191792.06
2,153441.51,101145.55,407934.54,2.0,191050.39
3,144372.41,118671.85,383199.62,0.0,182901.99
4,142107.34,91391.77,366168.42,2.0,166187.94


### 3.2 Ordinal Encoding using For Loop

In [148]:
from pandas.core.dtypes.common import is_numeric_dtype
from sklearn.preprocessing import OrdinalEncoder
import pandas as pd

df2 = pd.read_csv('supershops.csv')

for col in df2.columns:
    if is_numeric_dtype(df2[col]):
        continue
    else:
        column_data = df2[col].values.reshape(-1, 1)
        ordinal = OrdinalEncoder(categories=[df2[col].unique()])
        df2[col] = ordinal.fit_transform(column_data)
df2.head()

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit
0,114523.61,136897.8,471784.1,0.0,192261.83
1,162597.7,151377.59,443898.53,1.0,191792.06
2,153441.51,101145.55,407934.54,2.0,191050.39
3,144372.41,118671.85,383199.62,0.0,182901.99
4,142107.34,91391.77,366168.42,2.0,166187.94
