In [39]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

plt.rcParams["figure.figsize"] = (10, 7)
plt.rcParams["font.size"] = 16

## **Prediksi karyawan left (1) atau tidak (0)**

*Cek Resiko dengan Sederhana*

- Salah prediksi karyawan stay - diprediksi stay, padahal resign/left
  - Resource bisa berkurang sehingga produktifitas menurun
  - Mengeluarkan cost untuk pesangon karyawan
  - Ada kemungkinan beberapa target tidak terkejar

- Salah prediksi karyawan left - diprediksi left, padahal stay
  - Sudah membuka job opening lebih dulu, HR bisa buang-buang waktu.
  - Mungkin melakukan restrukturisasi tim.

---
- Lebih besar resiko dari salah prediksi karyawan stay
- Apabila stay = negative (0) & left = positive (1)
- Maka kita ingin memperkecil nilai False Negative
- Dengan kata lain memperbesar nilai Recall

$$
\text{Recall} = \cfrac{TP}{TP + FN}
$$

catatan: apabila ingin mengurangi kasus left (positive)nya, maka bisa dikurangi *Precision*-nya

$$
\text{Precision} = \cfrac{TP}{TP + FP}
$$

In [40]:
# load data

def importData(filename):
    # Read Data
    data = pd.read_csv(filename)
    print(f"shape awal                    : {data.shape}, (#observasi, #fitur)")

    # Drop duplicate
    data = data.drop_duplicates()
    print(f"shape setelah drop duplikat   : {data.shape}, (#observasi, #fitur)")

    return data


In [41]:
filename = "data/data_hr.csv"
data = importData(filename = filename)

data.head()

shape awal                    : (14999, 10), (#observasi, #fitur)
shape setelah drop duplikat   : (11991, 10), (#observasi, #fitur)


Unnamed: 0,satisfaction_level,last_evaluation,number_project,average_montly_hours,time_spend_company,Work_accident,left,promotion_last_5years,division,salary
0,0.38,0.53,2,157,3,0,1,0,sales,low
1,0.8,0.86,5,262,6,0,1,0,sales,medium
2,0.11,0.88,7,272,4,0,1,0,sales,medium
3,0.72,0.87,5,223,5,0,1,0,sales,low
4,0.37,0.52,2,159,3,0,1,0,sales,low


In [42]:
def splitInputOutput(data, target_column):
    X = data.drop(columns = target_column)
    y = data[target_column]

    return X, y

In [43]:
target_column = "left"
X, y = splitInputOutput(data = data,
                        target_column = target_column)

X.head, y.head

(<bound method NDFrame.head of        satisfaction_level  last_evaluation  number_project  \
 0                    0.38             0.53               2   
 1                    0.80             0.86               5   
 2                    0.11             0.88               7   
 3                    0.72             0.87               5   
 4                    0.37             0.52               2   
 ...                   ...              ...             ...   
 11995                0.90             0.55               3   
 11996                0.74             0.95               5   
 11997                0.85             0.54               3   
 11998                0.33             0.65               3   
 11999                0.50             0.73               4   
 
        average_montly_hours  time_spend_company  Work_accident  \
 0                       157                   3              0   
 1                       262                   6              0   
 2         

Split train-test

In [44]:
# Cek kondisi proporsi kelas target
y.value_counts(normalize = True)

# Tidak balance

0    0.833959
1    0.166041
Name: left, dtype: float64

In [45]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    stratify = y,
                                                    test_size = 0.2,
                                                    random_state = 123)

X_train.shape, X_test.shape, y_train.shape, y_test.shape

((9592, 9), (2399, 9), (9592,), (2399,))

Splitting Categorical and Numerical Column

In [46]:
X_train.head()

Unnamed: 0,satisfaction_level,last_evaluation,number_project,average_montly_hours,time_spend_company,Work_accident,promotion_last_5years,division,salary
9492,0.44,0.56,3,123,3,0,0,accounting,medium
11555,0.98,0.44,4,154,6,1,0,sales,medium
11301,0.76,0.99,3,133,4,0,0,sales,low
5578,0.47,0.55,2,156,2,0,0,management,medium
6241,0.84,0.68,4,151,2,0,0,technical,low


In [47]:
categorical_col = ["Work_accident", "promotion_last_5years", "division", "salary"]
numerical_col = list(set(X_train.columns) - set(categorical_col))

categorical_col, numerical_col

(['Work_accident', 'promotion_last_5years', 'division', 'salary'],
 ['satisfaction_level',
  'average_montly_hours',
  'time_spend_company',
  'last_evaluation',
  'number_project'])

In [48]:
def splitNumCat(data, num_col, cat_col):
    data_num = data[num_col]
    data_cat = data[cat_col]

    return data_num, data_cat
    

In [49]:
X_train_num, X_train_cat = splitNumCat(data = X_train,
                                       num_col = numerical_col,
                                       cat_col = categorical_col)

Handling Numerical Data

In [50]:
# Check missing value
X_train_num.isna().any()

satisfaction_level      False
average_montly_hours    False
time_spend_company      False
last_evaluation         False
number_project          False
dtype: bool

In [51]:
# Buat imputer untuk jaga-jaga apabila di data test ada yang kosong
from sklearn.impute import SimpleImputer

def imputerNum(data, imputer = None):
    if imputer == None:
        # Buat imputer
        imputer = SimpleImputer(missing_values = np.nan,
                                strategy = "median")
        imputer.fit(data)

    # Transform data dengan imputer
    data_imputed = imputer.transform(data)
    data_imputed = pd.DataFrame(data_imputed,
                                index = data.index,
                                columns = data.columns)
    
    return data_imputed, imputer


In [52]:
X_train_num_imputed, imputer_num = imputerNum(data = X_train_num)

In [53]:
X_train_num_imputed.head()

Unnamed: 0,satisfaction_level,average_montly_hours,time_spend_company,last_evaluation,number_project
9492,0.44,123.0,3.0,0.56,3.0
11555,0.98,154.0,6.0,0.44,4.0
11301,0.76,133.0,4.0,0.99,3.0
5578,0.47,156.0,2.0,0.55,2.0
6241,0.84,151.0,2.0,0.68,4.0


Handling Categorical Data

In [54]:
# Check missing value
X_train_cat.isna().any()

Work_accident            False
promotion_last_5years    False
division                 False
salary                   False
dtype: bool

In [55]:
# Buat imputer untuk jaga-jaga apabila di data test ada yang kosong
def imputerCat(data, imputer = None):
    if imputer == None:
        # Buat imputer
        imputer = SimpleImputer(missing_values = np.nan,
                                strategy = "constant",
                                fill_value = "UNKNOWN")
        imputer.fit(data)

    # Transform data dengan imputer
    data_imputed = imputer.transform(data)
    data_imputed = pd.DataFrame(data_imputed,
                                index = data.index,
                                columns = data.columns)
    
    return data_imputed, imputer


In [56]:
X_train_cat_imputed, imputer_cat = imputerCat(data = X_train_cat)

In [57]:
X_train_cat_imputed.head()

Unnamed: 0,Work_accident,promotion_last_5years,division,salary
9492,0,0,accounting,medium
11555,1,0,sales,medium
11301,0,0,sales,low
5578,0,0,management,medium
6241,0,0,technical,low


One Hot Encoding Data

In [58]:
from sklearn.preprocessing import OneHotEncoder

def encoderCat(data, encoder_col = None, encoder = None):
    if encoder == None:
        # Buat objek
        encoder = OneHotEncoder(handle_unknown = "ignore",
                                drop = "if_binary")
        encoder.fit(data)
        encoder_col = encoder.get_feature_names_out(data.columns)

    # Transform data
    data_encoded = encoder.transform(data).toarray()
    data_encoded = pd.DataFrame(data_encoded,
                                index = data.index,
                                columns = encoder_col)
    
    return data_encoded, encoder_col, encoder


In [59]:
X_train_cat_encoded, encoder_col, encoder_OHE = encoderCat(data = X_train_cat_imputed)

In [60]:
X_train_cat_imputed.head().T

Unnamed: 0,9492,11555,11301,5578,6241
Work_accident,0,1,0,0,0
promotion_last_5years,0,0,0,0,0
division,accounting,sales,sales,management,technical
salary,medium,medium,low,medium,low


In [61]:
X_train_cat_encoded.head().T

Unnamed: 0,9492,11555,11301,5578,6241
Work_accident_1,0.0,1.0,0.0,0.0,0.0
promotion_last_5years_1,0.0,0.0,0.0,0.0,0.0
division_IT,0.0,0.0,0.0,0.0,0.0
division_RandD,0.0,0.0,0.0,0.0,0.0
division_accounting,1.0,0.0,0.0,0.0,0.0
division_hr,0.0,0.0,0.0,0.0,0.0
division_management,0.0,0.0,0.0,1.0,0.0
division_marketing,0.0,0.0,0.0,0.0,0.0
division_product_mng,0.0,0.0,0.0,0.0,0.0
division_sales,0.0,1.0,1.0,0.0,0.0


In [62]:
encoder_col

array(['Work_accident_1', 'promotion_last_5years_1', 'division_IT',
       'division_RandD', 'division_accounting', 'division_hr',
       'division_management', 'division_marketing',
       'division_product_mng', 'division_sales', 'division_support',
       'division_technical', 'salary_high', 'salary_low', 'salary_medium'],
      dtype=object)

Concat Data

In [63]:
X_train_concat = pd.concat([X_train_num_imputed, X_train_cat_encoded],
                           axis = 1)
X_train_concat.head()

Unnamed: 0,satisfaction_level,average_montly_hours,time_spend_company,last_evaluation,number_project,Work_accident_1,promotion_last_5years_1,division_IT,division_RandD,division_accounting,division_hr,division_management,division_marketing,division_product_mng,division_sales,division_support,division_technical,salary_high,salary_low,salary_medium
9492,0.44,123.0,3.0,0.56,3.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
11555,0.98,154.0,6.0,0.44,4.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
11301,0.76,133.0,4.0,0.99,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
5578,0.47,156.0,2.0,0.55,2.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
6241,0.84,151.0,2.0,0.68,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0


Balancing Data: SMOTE

In [64]:
# Cek missing values
X_train_concat.isna().any()

satisfaction_level         False
average_montly_hours       False
time_spend_company         False
last_evaluation            False
number_project             False
Work_accident_1            False
promotion_last_5years_1    False
division_IT                False
division_RandD             False
division_accounting        False
division_hr                False
division_management        False
division_marketing         False
division_product_mng       False
division_sales             False
division_support           False
division_technical         False
salary_high                False
salary_low                 False
salary_medium              False
dtype: bool

In [65]:
from imblearn.over_sampling import SMOTE

In [66]:
y_train.value_counts()

0    7999
1    1593
Name: left, dtype: int64

In [67]:
# Buat objek
smt = SMOTE(sampling_strategy = "minority",
            random_state = 123)

# Fit objek
X_train_smote, y_train = smt.fit_resample(X_train_concat, y_train)

In [68]:
y_train.value_counts()

0    7999
1    7999
Name: left, dtype: int64

Scale Data

In [69]:
from sklearn.preprocessing import StandardScaler

In [70]:
def standardizeData(data, scaler = None):
    if scaler == None:
        # Buat & Fit scaler
        scaler = StandardScaler()
        scaler.fit(data)

    # Tranform data
    data_scaled = scaler.transform(data)
    data_scaled = pd.DataFrame(data_scaled,
                               index = data.index,
                               columns = data.columns)
    
    return data_scaled, scaler


In [71]:
X_train_clean, scaler = standardizeData(data = X_train_smote)
X_train_clean.head()

Unnamed: 0,satisfaction_level,average_montly_hours,time_spend_company,last_evaluation,number_project,Work_accident_1,promotion_last_5years_1,division_IT,division_RandD,division_accounting,division_hr,division_management,division_marketing,division_product_mng,division_sales,division_support,division_technical,salary_high,salary_low,salary_medium
0,-0.431466,-1.486414,-0.471164,-0.890754,-0.578282,-0.351352,-0.110301,-0.298421,-0.235832,4.617454,-0.24962,-0.185331,-0.256525,-0.25499,-0.658043,-0.441724,-0.502676,-0.244239,-1.078356,1.204143
1,1.603416,-0.914843,2.00945,-1.561036,0.111098,2.972531,-0.110301,-0.298421,-0.235832,-0.241401,-0.24962,-0.185331,-0.256525,-0.25499,1.621266,-0.441724,-0.502676,-0.244239,-1.078356,1.204143
2,0.77439,-1.302036,0.355707,1.51109,-0.578282,-0.351352,-0.110301,-0.298421,-0.235832,-0.241401,-0.24962,-0.185331,-0.256525,-0.25499,1.621266,-0.441724,-0.502676,-0.244239,0.954285,-0.853395
3,-0.318417,-0.877967,-1.298036,-0.946611,-1.267661,-0.351352,-0.110301,-0.298421,-0.235832,-0.241401,-0.24962,5.978482,-0.256525,-0.25499,-0.658043,-0.441724,-0.502676,-0.244239,-1.078356,1.204143
4,1.075854,-0.970156,-1.298036,-0.220472,0.111098,-0.351352,-0.110301,-0.298421,-0.235832,-0.241401,-0.24962,-0.185331,-0.256525,-0.25499,-0.658043,-0.441724,2.137594,-0.244239,0.954285,-0.853395


Transform Test Data

In [72]:
def transformTestData(data, num_col, cat_col, encoder_col,
                      imputer_num, imputer_cat, encoder_cat,
                      scaler):
    # 1. Split num-cat
    data_num, data_cat = splitNumCat(data = data,
                                     num_col = num_col,
                                     cat_col = cat_col)
    
    # 2. Handling num
    data_num_imputed, _ = imputerNum(data = data_num,
                                     imputer = imputer_num)
    
    # 3. Handling cat
    data_cat_imputed, _ = imputerCat(data = data_cat,
                                     imputer = imputer_cat)
    data_cat_encoded, _, _ = encoderCat(data = data_cat_imputed,
                                        encoder_col = encoder_col,
                                        encoder = encoder_cat)
    
    # 4. Concat data
    data_concat = pd.concat([data_num_imputed, data_cat_encoded],
                            axis = 1)
    
    # 5. Scale data
    data_clean, _ = standardizeData(data = data_concat,
                                    scaler = scaler)
    
    return data_clean


In [73]:
X_test_clean = transformTestData(data = X_test,
                                 num_col = numerical_col, 
                                 cat_col = categorical_col,
                                 encoder_col = encoder_col,
                                 imputer_num = imputer_num,
                                 imputer_cat = imputer_cat,
                                 encoder_cat = encoder_OHE,
                                 scaler = scaler)

X_test_clean.head()

Unnamed: 0,satisfaction_level,average_montly_hours,time_spend_company,last_evaluation,number_project,Work_accident_1,promotion_last_5years_1,division_IT,division_RandD,division_accounting,division_hr,division_management,division_marketing,division_product_mng,division_sales,division_support,division_technical,salary_high,salary_low,salary_medium
8578,1.000488,-0.232645,2.00945,1.287663,1.489857,-0.351352,-0.110301,-0.298421,-0.235832,4.617454,-0.24962,-0.185331,-0.256525,-0.25499,-0.658043,-0.441724,-0.502676,-0.244239,-1.078356,1.204143
5756,-1.411224,1.168627,0.355707,0.784951,0.800477,-0.351352,-0.110301,-0.298421,-0.235832,4.617454,-0.24962,-0.185331,-0.256525,-0.25499,-0.658043,-0.441724,-0.502676,-0.244239,-1.078356,1.204143
3994,0.284511,-0.804216,-1.298036,1.455233,0.111098,-0.351352,-0.110301,-0.298421,-0.235832,-0.241401,-0.24962,-0.185331,-0.256525,-0.25499,1.621266,-0.441724,-0.502676,-0.244239,-1.078356,1.204143
1784,-0.469149,-1.007032,-0.471164,-1.170038,-1.267661,2.972531,-0.110301,-0.298421,-0.235832,4.617454,-0.24962,-0.185331,-0.256525,-0.25499,-0.658043,-0.441724,-0.502676,-0.244239,-1.078356,1.204143
10508,-0.167685,0.762996,-0.471164,1.455233,0.111098,-0.351352,-0.110301,-0.298421,-0.235832,-0.241401,-0.24962,-0.185331,-0.256525,-0.25499,-0.658043,2.442705,-0.502676,-0.244239,-1.078356,1.204143


In [74]:
X_test_clean.columns

Index(['satisfaction_level', 'average_montly_hours', 'time_spend_company',
       'last_evaluation', 'number_project', 'Work_accident_1',
       'promotion_last_5years_1', 'division_IT', 'division_RandD',
       'division_accounting', 'division_hr', 'division_management',
       'division_marketing', 'division_product_mng', 'division_sales',
       'division_support', 'division_technical', 'salary_high', 'salary_low',
       'salary_medium'],
      dtype='object')

In [75]:
X_train_clean.columns

Index(['satisfaction_level', 'average_montly_hours', 'time_spend_company',
       'last_evaluation', 'number_project', 'Work_accident_1',
       'promotion_last_5years_1', 'division_IT', 'division_RandD',
       'division_accounting', 'division_hr', 'division_management',
       'division_marketing', 'division_product_mng', 'division_sales',
       'division_support', 'division_technical', 'salary_high', 'salary_low',
       'salary_medium'],
      dtype='object')

In [76]:
len(X_train_clean.columns)

20

Done

---
# Multi-Layer Perceptron (Neural Network)
- Mengadaptasi cara sel otak transfer & mengolah informasi.
- Satuan terkecilnya adalah Neuron (Perceptron)
<center>
<img src="https://blog.camelot-group.com/wp-content/uploads/2022/01/Perceptron_byCamelot.png">
<br>
<a href="https://blog.camelot-group.com/2022/01/neural-networks-perceptron/">source</a>
</center>
​
- 1 Perceptron dapat menerima beberapa input
- 1 Perceptron mengeluarkan 1 output
- Input-input akan dijumlahkan secara berbobot dahulu (`preActivation`)
- Kemudian akan diaktifasi menggunakan `activation function` seperti Sigmoid, ReLU, TanH, dan sebagainya.

---
## Feed Propagation
- Misal kita ingin selesaikan kasus klasifikasi
- Kita buat arsitektur Neural Network dengan 1 perceptron

In [77]:
def preActivation(X, w):
    """
    Fungsi untuk mendapatkan nilai pre activation (weighted-sum)
    :param X: <pandas DataFrame> data input
    :param w: <numpy array> weight dari masing-masing input
    :return a: <numpy array> weighted sum atau pre activation
    """
    a = np.dot(X, w[1:]) + w[0]

    return a

In [78]:
def activationFunction(a, types):
    """
    Fungsi untuk melakukan aktifasi nilai pre activation
    :param a: <numpy array> pre activation value
    :param types: <str> pilihan fungsi aktivasi
    :return y: <numpy array> output perceptron
    """
    if types == "linear":
        y = a
    elif types == "sigmoid":
        y = 1 / (1 + np.exp(-a))
    elif types == "relu":
        y = np.max(0, a)
    else:
        raise Exception("Activation Function tidak terdaftar")

    return y

Lakukan Forward Propagation

In [79]:
# Buat random weight
# Ada 20 weight & 1 bias
w = np.random.rand(21)
w

array([6.95805320e-02, 3.41788040e-01, 4.64823397e-02, 9.99255480e-01,
       9.15987093e-01, 8.00177371e-01, 5.51103954e-02, 7.70013686e-02,
       8.12559617e-01, 9.70860278e-01, 9.60571605e-01, 2.40200929e-01,
       6.94347867e-01, 4.61105694e-03, 1.71254041e-02, 4.79123742e-04,
       6.28930174e-02, 1.61061814e-01, 9.97834441e-01, 6.32227975e-01,
       9.88130863e-01])

pada data train

In [80]:
# Cari pre-activation function
a = preActivation(X = X_train_clean,
                  w = w)

In [81]:
# Cari output --> activate the pre-activation
# Karena klasifikasi, activation function menggunakan Sigmoid
y_pred_proba_train = activationFunction(a = a,
                                        types = "sigmoid")

In [82]:
# Buat prediksi
thresh = 0.5

y_pred_train = (y_pred_proba_train > thresh).astype("int")

pada data test

In [83]:
# Cari pre-activation function
a = preActivation(X = X_test_clean,
                  w = w)

In [84]:
# Cari output --> activate the pre-activation
# Karena klasifikasi, activation function menggunakan Sigmoid
y_pred_proba_test = activationFunction(a = a,
                                       types = "sigmoid")

In [85]:
# Buat prediksi
y_pred_test = (y_pred_proba_test > thresh).astype("int")

cek performa

In [86]:
# Cari performa
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import roc_curve
from sklearn.metrics import auc

pada data train

In [87]:
# Tampilkan confusion matrix
confusion_matrix(y_true = y_train,
                 y_pred = y_pred_train)

array([[4454, 3545],
       [3438, 4561]], dtype=int64)

In [88]:
# Tampilkan score
accuracy_score(y_true = y_train,
               y_pred = y_pred_train)

0.5635079384923115

In [89]:
# Tampilkan score
print(classification_report(y_true = y_train,
                            y_pred = y_pred_train,
                            target_names = ["stay (0)", "left (1)"]))

              precision    recall  f1-score   support

    stay (0)       0.56      0.56      0.56      7999
    left (1)       0.56      0.57      0.57      7999

    accuracy                           0.56     15998
   macro avg       0.56      0.56      0.56     15998
weighted avg       0.56      0.56      0.56     15998



**Baseline**
- karena data sudah balance, accuracy baseline adalah 50%
- model ini lebih buruk dibanding baseline, jadi harus diimprove

**Focus**
- `weighted_avg recall` bernilai 0.49, artinya dari yang **benar-benar** left, hanya 49% yang berhasil diprediksi left.
- Berarti masih ada hampir 50% karyawan yang left tapi salah prediksi, dengan kata lain resikonya besar.
- Kita ingin kecilkan hal tersebut

In [None]:
# ROC Curve
fpr_lr, tpr_lr, threshold_lr = roc_curve(y_train,
                                         y_pred_proba_train)
roc_auc_lr = auc(fpr_lr, tpr_lr)

plt.title('Receiver Operating Characteristic')
plt.plot(fpr_lr, tpr_lr, 'b', label = 'AUC = %0.2f' % roc_auc_lr)
plt.legend(loc = 'lower right')
plt.plot([0, 1], [0, 1],'r--')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()

pada data test

In [None]:
# Tampilkan confusion matrix
confusion_matrix(y_true = y_test,
                 y_pred = y_pred_test)

In [None]:
# Tampilkan score
accuracy_score(y_true = y_test,
               y_pred = y_pred_test)

In [None]:
# Tampilkan score
print(classification_report(y_true = y_test,
                            y_pred = y_pred_test,
                            target_names = ["stay (0)", "left (1)"]))

In [None]:
# ROC Curve
fpr_lr, tpr_lr, threshold_lr = roc_curve(y_test,
                                         y_pred_proba_test)
roc_auc_lr = auc(fpr_lr, tpr_lr)

plt.title('Receiver Operating Characteristic')
plt.plot(fpr_lr, tpr_lr, 'b', label = 'AUC = %0.2f' % roc_auc_lr)
plt.legend(loc = 'lower right')
plt.plot([0, 1], [0, 1],'r--')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()

Masih jelek, ingin kita improve

---
# Gradient Descent
- Akurasi model random masih kecil, ingin kita tingkatkan
- Artinya, **weight** ingin kita optimasi
- Dapat dioptimasi menggunakan **Gradient Descent**
<center>
<img src="https://static.javatpoint.com/tutorial/machine-learning/images/gradient-descent-in-machine-learning1.png">
<br>
<a href="https://www.javatpoint.com/gradient-descent-in-machine-learning">source</a>
</center>

Algoritma:
- Inisialisasi weight, *bisa random*
- Cari gradient error terhadap weight
- Lakukan update weight dari informasi gradient tersebut

$$
w_{\text{new}} = w_{\text{old}} - \nabla_{w} E
$$

- Ulangi hingga stopping criteria tercapai (max. iteration atau weight tidak berubah --> $\nabla_{w} E = 0$)

Cari gradient error menggunakan chain rules
​
$$
\begin{align*}
\cfrac{\partial \text{error}}{\partial w_{0}} &= \cfrac{\partial \text{error}}{\partial \hat{y}} \cdot \cfrac{\partial \hat{y}}{\partial a} \cdot \cfrac{\partial a}{\partial w_{0}} \\ \\
\cfrac{\partial \text{error}}{\partial w_{1}} &= \cfrac{\partial \text{error}}{\partial \hat{y}} \cdot \cfrac{\partial \hat{y}}{\partial a} \cdot \cfrac{\partial a}{\partial w_{1}} \\ \\
\cfrac{\partial \text{error}}{\partial w_{2}} &= \cfrac{\partial \text{error}}{\partial \hat{y}} \cdot \cfrac{\partial \hat{y}}{\partial a} \cdot \cfrac{\partial a}{\partial w_{2}} \\ \\
\vdots \\ \\ 
\cfrac{\partial \text{error}}{\partial w_{20}} &= \cfrac{\partial \text{error}}{\partial \hat{y}} \cdot \cfrac{\partial \hat{y}}{\partial a} \cdot \cfrac{\partial a}{\partial w_{20}}
\end{align*}
$$
Maka didapat
​
$$
\begin{align*}
\cfrac{\partial \text{error}}{\partial \hat{y}} &= -2 (y - \hat{y}) \\ \\
\cfrac{\partial \hat{y}}{\partial a} &= \sigma(a)(1 - \sigma(a)) \\ \\
\cfrac{\partial a}{\partial w_{0}} &= 1 \\ \\
\cfrac{\partial a}{\partial w_{1}} &= x_{1} \\ \\
\cfrac{\partial a}{\partial w_{2}} &= x_{2} \\ \\
\vdots \\ \\
\cfrac{\partial a}{\partial w_{20}} &= x_{20}
\end{align*}
$$

In [None]:
def gradientDescent(X, y, 
                    learning_rate, max_iter, 
                    epsilon = 1e-6):
    """
    Fungsi untuk melakukan update weight
    """
    X = np.array(X)
    n = X.shape[0]          # banyak data
    p = X.shape[1] + 1      # banyak parameter

    # Inisialisasi weights
    w = np.ones(p)

    # Inisialisasi
    w_hist = []
    loss_hist = []
    iter = 0
    previous_loss = 0

    # Lakukan iterasi
    for iter in range(max_iter):
        # Buat prediksi
        a = preActivation(X = X, w = w)
        y_pred = activationFunction(a = a, types = "sigmoid")

        # Cari error
        current_loss = np.linalg.norm(y - y_pred)**2
        
        if np.abs(previous_loss - current_loss) < epsilon:
            break

        # Update
        previous_loss = current_loss
        loss_hist.append(current_loss)
        w_hist.append(w)
        
        # Cari gradient error
        derr_dy = -2 * (y - y_pred)
        dy_da = y_pred * (1 - y_pred)
        
        derr_dw = np.zeros(p)
        for j in range(p):
            # Cari da_dw_j
            if j == 0:
                da_dw = np.ones(n)
            else:
                da_dw = X[:, j-1]
            
            # Cari error
            derr_dw[j] = np.sum(derr_dy * dy_da * da_dw)


        # Update weight dari gradient error
        for j in range(p):
            w[j] = w[j] - learning_rate * derr_dw[j]

        if iter%100 == 0:
            print(f"iterasi {iter+1} - Loss: {current_loss:.4f}")

    # Print last iteration
    print(f"iterasi {iter+1} - Loss: {current_loss:.4f}")

    return loss_hist, w_hist

In [None]:
loss_hist, w_hist = gradientDescent(X = X_train_clean,
                                    y = y_train,
                                    learning_rate = 0.01,
                                    max_iter = 5000)

In [None]:
# Plot Error history
plt.plot([i for i in range(len(loss_hist))], loss_hist)
plt.xlabel("Iteration")
plt.ylabel("Loss")

plt.show()

In [None]:
# Cari best weights
w_best = w_hist[-1]
w_best

pada data train

In [None]:
# Cari pre-activation function
a = preActivation(X = X_train_clean,
                  w = w_best)

In [None]:
# Cari output --> activate the pre-activation
# Karena klasifikasi, activation function menggunakan Sigmoid
y_pred_proba_train = activationFunction(a = a,
                                        types = "sigmoid")

In [None]:
# Buat prediksi
thresh = 0.5

y_pred_train = (y_pred_proba_train > thresh).astype("int")

pada data test

In [None]:
# Cari pre-activation function
a = preActivation(X = X_test_clean,
                  w = w_best)

In [None]:
# Cari output --> activate the pre-activation
# Karena klasifikasi, activation function menggunakan Sigmoid
y_pred_proba_test = activationFunction(a = a,
                                       types = "sigmoid")

In [None]:
# Buat prediksi
y_pred_test = (y_pred_proba_test > thresh).astype("int")

cek performa

pada data train

In [None]:
# Tampilkan confusion matrix
confusion_matrix(y_true = y_train,
                 y_pred = y_pred_train)

In [None]:
# Tampilkan score
accuracy_score(y_true = y_train,
               y_pred = y_pred_train)

In [None]:
# Tampilkan score
print(classification_report(y_true = y_train,
                            y_pred = y_pred_train,
                            target_names = ["stay (0)", "left (1)"]))

- Ada perbaikan recall, tadinya 49% sekarang jadi 87%
- Membaik

In [None]:
# ROC Curve
fpr_lr, tpr_lr, threshold_lr = roc_curve(y_train,
                                         y_pred_proba_train)
roc_auc_lr = auc(fpr_lr, tpr_lr)

plt.title('Receiver Operating Characteristic')
plt.plot(fpr_lr, tpr_lr, 'b', label = 'AUC = %0.2f' % roc_auc_lr)
plt.legend(loc = 'lower right')
plt.plot([0, 1], [0, 1],'r--')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()

pada data test

In [None]:
# Tampilkan confusion matrix
confusion_matrix(y_true = y_test,
                 y_pred = y_pred_test)

In [None]:
# Tampilkan score
accuracy_score(y_true = y_test,
               y_pred = y_pred_test)

In [None]:
# Tampilkan score
print(classification_report(y_true = y_test,
                            y_pred = y_pred_test,
                            target_names = ["stay (0)", "left (1)"]))

In [None]:
# ROC Curve
fpr_lr, tpr_lr, threshold_lr = roc_curve(y_test,
                                         y_pred_proba_test)
roc_auc_lr = auc(fpr_lr, tpr_lr)

plt.title('Receiver Operating Characteristic')
plt.plot(fpr_lr, tpr_lr, 'b', label = 'AUC = %0.2f' % roc_auc_lr)
plt.legend(loc = 'lower right')
plt.plot([0, 1], [0, 1],'r--')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()

---
# Membuat Model Neural Network

In [91]:
# Import library
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD

TypeError: Unable to convert function return value to a Python type! The signature was
	() -> handle

**Arsitektur Simpel**

In [None]:
# Buat arsitektur model dengan 1 perceptron
model = Sequential([
    Dense(1, input_shape = (X_train_clean.shape[1],), activation = "sigmoid")
])

# Buat optimizer, menggunakan ADAM (varian gradient descent)
opt = SGD(learning_rate = 0.01)

# Compile model
model.compile(loss = "binary_crossentropy",
              optimizer = opt,
              metrics = ["accuracy"])

In [None]:
# Train model
model_hist = model.fit(X_train_clean, y_train,
                       epochs = 10,
                       verbose = 1)

In [None]:
# Summary Model
model.summary()

In [None]:
# Plot Error
plt.plot(1 + np.arange(10), model_hist.history["loss"], marker="o")

plt.xlabel("epoch")
plt.ylabel("loss")
plt.show()

pada data train

In [None]:
# Prediksi
y_pred_proba_train = model.predict(X_train_clean)
y_pred_proba_train

In [None]:
# Buat prediksi
thresh = 0.5

y_pred_train = (y_pred_proba_train > thresh).astype("int")

pada data test

In [None]:
# Prediksi
y_pred_proba_test = model.predict(X_test_clean)
y_pred_proba_test

In [None]:
# Buat prediksi
y_pred_test = (y_pred_proba_test > thresh).astype("int")

cek performa

pada data train

In [None]:
# Tampilkan confusion matrix
confusion_matrix(y_true = y_train,
                 y_pred = y_pred_train)

In [None]:
# Tampilkan score
accuracy_score(y_true = y_train,
               y_pred = y_pred_train)

In [None]:
# Tampilkan score
print(classification_report(y_true = y_train,
                            y_pred = y_pred_train,
                            target_names = ["stay (0)", "left (1)"]))

In [None]:
# ROC Curve
fpr_lr, tpr_lr, threshold_lr = roc_curve(y_train,
                                         y_pred_proba_train)
roc_auc_lr = auc(fpr_lr, tpr_lr)

plt.title('Receiver Operating Characteristic')
plt.plot(fpr_lr, tpr_lr, 'b', label = 'AUC = %0.2f' % roc_auc_lr)
plt.legend(loc = 'lower right')
plt.plot([0, 1], [0, 1],'r--')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()

pada data test

In [None]:
# Tampilkan confusion matrix
confusion_matrix(y_true = y_test,
                 y_pred = y_pred_test)

In [None]:
# Tampilkan score
accuracy_score(y_true = y_test,
               y_pred = y_pred_test)

In [None]:
# Tampilkan score
print(classification_report(y_true = y_test,
                            y_pred = y_pred_test,
                            target_names = ["stay (0)", "left (1)"]))

In [None]:
# ROC Curve
fpr_lr, tpr_lr, threshold_lr = roc_curve(y_test,
                                         y_pred_proba_test)
roc_auc_lr = auc(fpr_lr, tpr_lr)

plt.title('Receiver Operating Characteristic')
plt.plot(fpr_lr, tpr_lr, 'b', label = 'AUC = %0.2f' % roc_auc_lr)
plt.legend(loc = 'lower right')
plt.plot([0, 1], [0, 1],'r--')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()

**Arsitektur Lebih Kompleks**

- 1 hidden layer, dengan 3 neuron

In [None]:
# Buat arsitektur model dengan 1 perceptron
model = Sequential([
    Dense(3, input_shape = (X_train_clean.shape[1],), activation = "sigmoid"),
    Dense(1, activation = "sigmoid")
])

# Buat optimizer, menggunakan ADAM (varian gradient descent)
opt = SGD(learning_rate = 0.01)

# Compile model
model.compile(loss = "binary_crossentropy",
              optimizer = opt,
              metrics = ["accuracy"])

In [None]:
# Train model
model_hist = model.fit(X_train_clean, y_train,
                       epochs = 10,
                       verbose = 1)

In [None]:
# Summary Model
model.summary()

In [None]:
# Plot Error
plt.plot(1 + np.arange(10), model_hist.history["loss"], marker="o")

plt.xlabel("epoch")
plt.ylabel("loss")
plt.show()

In [None]:
# Train model - Teruskan 100 iterasi lagi
model_hist = model.fit(X_train_clean, y_train,
                       epochs = 100,
                       verbose = 1)

In [None]:
# Plot Error
plt.plot(1 + np.arange(100), model_hist.history["loss"], marker="o")

plt.xlabel("epoch")
plt.ylabel("loss")
plt.show()

pada data train

In [None]:
# Prediksi
y_pred_proba_train = model.predict(X_train_clean)
y_pred_proba_train

In [None]:
# Buat prediksi
thresh = 0.5

y_pred_train = (y_pred_proba_train > thresh).astype("int")

pada data test

In [None]:
# Prediksi
y_pred_proba_test = model.predict(X_test_clean)
y_pred_proba_test

In [None]:
# Buat prediksi
y_pred_test = (y_pred_proba_test > thresh).astype("int")

cek performa

pada data train

In [None]:
# Tampilkan confusion matrix
confusion_matrix(y_true = y_train,
                 y_pred = y_pred_train)

In [None]:
# Tampilkan score
accuracy_score(y_true = y_train,
               y_pred = y_pred_train)

In [None]:
# Tampilkan score
print(classification_report(y_true = y_train,
                            y_pred = y_pred_train,
                            target_names = ["stay (0)", "left (1)"]))

In [None]:
# ROC Curve
fpr_lr, tpr_lr, threshold_lr = roc_curve(y_train,
                                         y_pred_proba_train)
roc_auc_lr = auc(fpr_lr, tpr_lr)

plt.title('Receiver Operating Characteristic')
plt.plot(fpr_lr, tpr_lr, 'b', label = 'AUC = %0.2f' % roc_auc_lr)
plt.legend(loc = 'lower right')
plt.plot([0, 1], [0, 1],'r--')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()

pada data test

In [None]:
# Tampilkan confusion matrix
confusion_matrix(y_true = y_test,
                 y_pred = y_pred_test)

In [None]:
# Tampilkan score
accuracy_score(y_true = y_test,
               y_pred = y_pred_test)

In [None]:
# Tampilkan score
print(classification_report(y_true = y_test,
                            y_pred = y_pred_test,
                            target_names = ["stay (0)", "left (1)"]))

- Recall bisa dinaikan hingga 88%

In [None]:
# ROC Curve
fpr_lr, tpr_lr, threshold_lr = roc_curve(y_test,
                                         y_pred_proba_test)
roc_auc_lr = auc(fpr_lr, tpr_lr)

plt.title('Receiver Operating Characteristic')
plt.plot(fpr_lr, tpr_lr, 'b', label = 'AUC = %0.2f' % roc_auc_lr)
plt.legend(loc = 'lower right')
plt.plot([0, 1], [0, 1],'r--')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()