# Data Normalization Min-Max

In [1]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

In [2]:
data = pd.read_csv('https://raw.githubusercontent.com/diazzzz/Buku-Machine-Learning/main/Data%20Normalization%20and%20Discretization/datasetgaji2.csv')
data

Unnamed: 0,Nama,Status Rumah,Gaji Orang Tua,Umur
0,aldi taher,Kontrak,2100000,24
1,kekeyi,Milik,4000000,23
2,rangga sasana,Kontrak,1400000,21
3,vicky prasetyo,Kontrak,700000,28
4,Zuko,Milik,650000,27
5,dyrga,Kontrak,450000,22


In [4]:
scaler = MinMaxScaler()
scaled = scaler.fit_transform(data[['Gaji Orang Tua', 'Umur']])
print(scaled)

[[0.46478873 0.42857143]
 [1.         0.28571429]
 [0.26760563 0.        ]
 [0.07042254 1.        ]
 [0.05633803 0.85714286]
 [0.         0.14285714]]


#Standardization (Z-Score Normalization)

In [5]:
from sklearn.preprocessing import StandardScaler

In [7]:
scaler = StandardScaler()
scaled = scaler.fit_transform(data[['Gaji Orang Tua', 'Umur']])
print(scaled)

[[ 0.44733685 -0.06551218]
 [ 1.99268231 -0.45858525]
 [-0.12200096 -1.24473139]
 [-0.69133876  1.5067801 ]
 [-0.73200575  1.11370703]
 [-0.89467369 -0.85165832]]


#Binning atau Discretization

##1. Binning Manual

In [8]:
from sklearn.pipeline import Pipeline
import numpy as np

In [9]:
data = pd.read_csv('https://raw.githubusercontent.com/diazzzz/Buku-Machine-Learning/main/Data%20Normalization%20and%20Discretization/datasetgaji.csv')
data.head()

Unnamed: 0,Nama,Status Rumah,Gaji Orang Tua,Keterangan
0,aldi taher,Kontrak,2100000,
1,kekeyi,Milik,4000000,
2,rangga sasana,Kontrak,1400000,
3,vicky prasetyo,Kontrak,700000,
4,Zuko,Milik,650000,


In [12]:
batas_bin = [0, 1400000, 4000000]
kategori = ['Kecil', 'Besar']
data['gaji_binned_1'] = pd.cut(data['Gaji Orang Tua'],
                               bins=batas_bin,
                               labels=kategori)
print(data)

             Nama Status Rumah  Gaji Orang Tua  Keterangan gaji_binned_1
0      aldi taher      Kontrak         2100000         NaN         Besar
1          kekeyi        Milik         4000000         NaN         Besar
2   rangga sasana      Kontrak         1400000         NaN         Kecil
3  vicky prasetyo      Kontrak          700000         NaN         Kecil
4            Zuko        Milik          650000         NaN         Kecil
5           dyrga      Kontrak          450000         NaN         Kecil


##2. Binning Linspace

In [13]:
bins = np.linspace(min(data['Gaji Orang Tua']),
                   max(data['Gaji Orang Tua']), 3)
kategori = ['Kecil', 'Besar']
data['gaji_binned'] = pd.cut(data['Gaji Orang Tua'],
                             bins=bins,
                             labels=kategori,
                             include_lowest=True)
print(data)

             Nama Status Rumah  Gaji Orang Tua  Keterangan gaji_binned_1  \
0      aldi taher      Kontrak         2100000         NaN         Besar   
1          kekeyi        Milik         4000000         NaN         Besar   
2   rangga sasana      Kontrak         1400000         NaN         Kecil   
3  vicky prasetyo      Kontrak          700000         NaN         Kecil   
4            Zuko        Milik          650000         NaN         Kecil   
5           dyrga      Kontrak          450000         NaN         Kecil   

  gaji_binned  
0       Kecil  
1       Besar  
2       Kecil  
3       Kecil  
4       Kecil  
5       Kecil  


#3. Teknik Quantile

In [16]:
data['gaji_binned_3'] = pd.qcut(data['Gaji Orang Tua'],
                               q=2,
                               labels=kategori)
print(data)

             Nama Status Rumah  Gaji Orang Tua  Keterangan gaji_binned_1  \
0      aldi taher      Kontrak         2100000         NaN         Besar   
1          kekeyi        Milik         4000000         NaN         Besar   
2   rangga sasana      Kontrak         1400000         NaN         Kecil   
3  vicky prasetyo      Kontrak          700000         NaN         Kecil   
4            Zuko        Milik          650000         NaN         Kecil   
5           dyrga      Kontrak          450000         NaN         Kecil   

  gaji_binned gaji_binned_3  
0       Kecil         Besar  
1       Besar         Besar  
2       Kecil         Besar  
3       Kecil         Kecil  
4       Kecil         Kecil  
5       Kecil         Kecil  
