# Importing Libraries

In [4]:
import pandas as pd
import numpy as np

# Fetching The Data

In [5]:
data = pd.read_csv("Data.csv")

In [6]:
data.head()

Unnamed: 0,Country,Age,Salary,Purchased
0,France,44.0,72000.0,No
1,Spain,27.0,48000.0,Yes
2,Germany,30.0,54000.0,No
3,Spain,38.0,61000.0,No
4,Germany,40.0,,Yes


In [7]:
X = data.iloc[:, :-1].values
Y = data.iloc[:, -1].values

In [8]:
X

array([['France', 44.0, 72000.0],
       ['Spain', 27.0, 48000.0],
       ['Germany', 30.0, 54000.0],
       ['Spain', 38.0, 61000.0],
       ['Germany', 40.0, nan],
       ['France', 35.0, 58000.0],
       ['Spain', nan, 52000.0],
       ['France', 48.0, 79000.0],
       ['Germany', 50.0, 83000.0],
       ['France', 37.0, 67000.0]], dtype=object)

In [9]:
Y

array(['No', 'Yes', 'No', 'No', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes'],
      dtype=object)

# Feature Scaling Using InBuilt Libraries

### Standard Scaler

In [10]:
from sklearn.preprocessing import StandardScaler

In [11]:
ss = StandardScaler()

In [14]:
ss.fit(X[:, 1:3])

In [15]:
ss.transform(X[:, 1:3])

array([[ 0.71993143,  0.71101276],
       [-1.62367514, -1.36437583],
       [-1.21009751, -0.84552869],
       [-0.10722383, -0.24020701],
       [ 0.16849459,         nan],
       [-0.52080146, -0.49963059],
       [        nan, -1.01847774],
       [ 1.27136827,  1.31633443],
       [ 1.54708669,  1.66223253],
       [-0.24508304,  0.27864014]])

### MinMax Scaler

In [17]:
from sklearn.preprocessing import MinMaxScaler

In [18]:
mm = MinMaxScaler()

In [19]:
mm.fit(X[:, 1:3])

In [23]:
mm.transform(X[:, 1:3])

array([[0.73913043, 0.68571429],
       [0.        , 0.        ],
       [0.13043478, 0.17142857],
       [0.47826087, 0.37142857],
       [0.56521739,        nan],
       [0.34782609, 0.28571429],
       [       nan, 0.11428571],
       [0.91304348, 0.88571429],
       [1.        , 1.        ],
       [0.43478261, 0.54285714]])

# Feature Scaling Using My Methods

### Standard Scaler

In [149]:
class MyStandardScaler():
    def __init__(self):
        pass
    def fit(self, X):
        self.metrics = []
        for i in range(len(X[0])):
            self.metrics.append([np.nanmean(X[:, i]), np.nanstd(X[:, i])])
    def transform(self, X):
        dataset = []
        for row in X:
            row_dataset = []
            for i in range(len(row)):
                row_dataset.append((row[i]-self.metrics[i][0])/self.metrics[i][1])
            dataset.append(row_dataset)
        return dataset

In [150]:
mss = MyStandardScaler()

In [151]:
mss.fit(X[:, 1:3])

In [152]:
mss.transform(X[:, 1:3])

[[0.7199314321591973, 0.711012758872281],
 [-1.6236751448696793, -1.3643758345927564],
 [-1.2100975136292893, -0.845528686226497],
 [-0.1072238303215827, -0.24020701313252774],
 [0.16849459050534396, nan],
 [-0.5208014615619727, -0.4996305873156574],
 [nan, -1.0184777356819168],
 [1.2713682738130507, 1.3163344319662502],
 [1.5470866946399773, 1.6622325308770898],
 [-0.24508304073504605, 0.2786401352337316]]

### MinMax Scaler

In [162]:
class MyMinMaxScaler():
    def __init__(self):
        pass
    def fit(self, X):
        self.metrics = []
        for i in range(len(X[0])):
            self.metrics.append([max(X[:, i]), min(X[:, i])])
    def transform(self, X):
        dataset = []
        for row in X:
            row_dataset = []
            for i in range(len(row)):
                row_dataset.append((row[i]-self.metrics[i][1])/(self.metrics[i][0]-self.metrics[i][1]))
            dataset.append(row_dataset)
        return dataset

In [163]:
mmm = MyMinMaxScaler()

In [164]:
mmm.fit(X[:, 1:3])

[[50.0, 27.0], [83000.0, 48000.0]]

In [165]:
mmm.transform(X[:, 1:3])

[[0.7391304347826086, 0.6857142857142857],
 [0.0, 0.0],
 [0.13043478260869565, 0.17142857142857143],
 [0.4782608695652174, 0.37142857142857144],
 [0.5652173913043478, nan],
 [0.34782608695652173, 0.2857142857142857],
 [nan, 0.11428571428571428],
 [0.9130434782608695, 0.8857142857142857],
 [1.0, 1.0],
 [0.43478260869565216, 0.5428571428571428]]