### Import library yang dibutuhkan

In [9]:
# Import library yang dibutuhkan
import pandas as pd
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

### Membaca dataset dan Menghapus kolom yang tidak dibutuhkan

In [3]:
# Membaca dataset
loandata = pd.read_csv('LoanData2.csv')

loandata.head()

Unnamed: 0.1,Unnamed: 0,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Loan_Status,Gender,Married,Dependents_0,Dependents_1,Dependents_2,Dependents_3+,Education,Self_Employed,Property_Area_Rural,Property_Area_Semiurban,Property_Area_Urban
0,0,5849,0.0,146.412162,360.0,1.0,1,1,0,1,0,0,0,1,0,0,0,1
1,4,6000,0.0,141.0,360.0,1.0,1,1,0,1,0,0,0,1,0,0,0,1
2,13,1853,2840.0,114.0,360.0,1.0,0,1,0,1,0,0,0,1,0,1,0,0
3,15,4950,0.0,125.0,360.0,1.0,1,1,0,1,0,0,0,1,0,0,0,1
4,19,2600,3500.0,115.0,360.0,1.0,1,1,1,1,0,0,0,1,0,0,0,1


In [7]:
#Menghapus kolom yang tidak dibutuhkan
loandata.drop(columns=['Unnamed: 0'], inplace=True)

In [8]:
loandata.head()

Unnamed: 0,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Loan_Status,Gender,Married,Dependents_0,Dependents_1,Dependents_2,Dependents_3+,Education,Self_Employed,Property_Area_Rural,Property_Area_Semiurban,Property_Area_Urban
0,5849,0.0,146.412162,360.0,1.0,1,1,0,1,0,0,0,1,0,0,0,1
1,6000,0.0,141.0,360.0,1.0,1,1,0,1,0,0,0,1,0,0,0,1
2,1853,2840.0,114.0,360.0,1.0,0,1,0,1,0,0,0,1,0,1,0,0
3,4950,0.0,125.0,360.0,1.0,1,1,0,1,0,0,0,1,0,0,0,1
4,2600,3500.0,115.0,360.0,1.0,1,1,1,1,0,0,0,1,0,0,0,1


In [10]:
# Menghitung jumlah nilai pada kolom Loan_Status
loan_status_counts = loandata['Loan_Status'].value_counts()

print("Jumlah nilai pada kolom Loan_Status:")
print(loan_status_counts)

Jumlah nilai pada kolom Loan_Status:
Loan_Status
1    112
0     24
Name: count, dtype: int64


### Membagi fitur dan target

In [11]:
# Menyiapkan fitur (X) dan target (y)
X = loandata.drop(columns=['Loan_Status'])
y = loandata['Loan_Status']

### Melakukan normalisasi

In [12]:
# Normalization
scaler = MinMaxScaler()
X_normalized = scaler.fit_transform(X)

### Melakukan teknik SMOTE

In [13]:
# SMOTE
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_normalized, y)

### Menyiapkan pembagian data untuk model machine learning

In [14]:
# Membagi data training dan testing
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

# Hasil splitting
print("Training data shape:", X_train.shape)
print("Testing data shape:", X_test.shape)
print("Training labels shape:", y_train.shape)
print("Testing labels shape:", y_test.shape)

Training data shape: (179, 16)
Testing data shape: (45, 16)
Training labels shape: (179,)
Testing labels shape: (45,)


### Logistic Regression ML Model

In [15]:
# Inisialisasi model Logistic Regression
LRmodel = LogisticRegression(random_state=42, max_iter=1000)

# Melatih model pada data pelatihan
LRmodel.fit(X_train, y_train)

# Memprediksi pada data pelatihan
y_train_pred_LR = LRmodel.predict(X_train)

# Menghitung akurasi pada data pelatihan
LRtraining_acc = accuracy_score(y_train, y_train_pred_LR)

print(f'Akurasi pada data pelatihan: {LRtraining_acc:.4f}')

Akurasi pada data pelatihan: 0.6927


In [16]:
# Memprediksi pada data pengujian
y_test_pred_LR = LRmodel.predict(X_test)

# Menghitung akurasi pada data pengujian
LRtesting_acc = accuracy_score(y_test, y_test_pred_LR)
print(f'Akurasi pada data pengujian: {LRtesting_acc:.4f}')

# Membuat confusion matrix
LRconf_matrix = confusion_matrix(y_test, y_test_pred_LR)
print('Confusion Matrix:')
print(LRconf_matrix)

# Membuat classification report
LRclass_report = classification_report(y_test, y_test_pred_LR, target_names=['Not Eligible', 'Eligible'])
print('Classification Report:')
print(LRclass_report)

Akurasi pada data pengujian: 0.6667
Confusion Matrix:
[[18  7]
 [ 8 12]]
Classification Report:
              precision    recall  f1-score   support

Not Eligible       0.69      0.72      0.71        25
    Eligible       0.63      0.60      0.62        20

    accuracy                           0.67        45
   macro avg       0.66      0.66      0.66        45
weighted avg       0.67      0.67      0.67        45



### Decision Tree ML Model

In [17]:
# Inisialisasi model Decision Tree
DT_model = DecisionTreeClassifier(random_state=42)

# Melatih model pada data pelatihan
DT_model.fit(X_train, y_train)

# Memprediksi pada data pelatihan dengan model Decision Tree
y_train_pred_DT = DT_model.predict(X_train)

# Menghitung akurasi pada data pelatihan untuk Decision Tree
training_accuracy_DT = accuracy_score(y_train, y_train_pred_DT)
print(f'Akurasi pada data pelatihan (Decision Tree): {training_accuracy_DT:.4f}')

Akurasi pada data pelatihan (Decision Tree): 1.0000


In [18]:
# Memprediksi pada data pengujian dengan model Decision Tree
y_test_pred_DT = DT_model.predict(X_test)

# Menghitung akurasi pada data pengujian untuk Decision Tree
testing_accuracy_DT = accuracy_score(y_test, y_test_pred_DT)
print(f'Akurasi pada data pengujian (Decision Tree): {testing_accuracy_DT:.4f}')

# Membuat confusion matrix untuk Decision Tree
conf_matrix_DT = confusion_matrix(y_test, y_test_pred_DT)
print('Confusion Matrix (Decision Tree):')
print(conf_matrix_DT)

# Membuat classification report untuk Decision Tree
class_report_DT = classification_report(y_test, y_test_pred_DT, target_names=['Not Eligible', 'Eligible'])
print('Classification Report (Decision Tree):')
print(class_report_DT)

Akurasi pada data pengujian (Decision Tree): 0.8889
Confusion Matrix (Decision Tree):
[[22  3]
 [ 2 18]]
Classification Report (Decision Tree):
              precision    recall  f1-score   support

Not Eligible       0.92      0.88      0.90        25
    Eligible       0.86      0.90      0.88        20

    accuracy                           0.89        45
   macro avg       0.89      0.89      0.89        45
weighted avg       0.89      0.89      0.89        45



### Random Forest ML Model

In [25]:
# Inisialisasi model Random Forest
RF_model = RandomForestClassifier(n_estimators=100)

# Melatih model pada data pelatihan
RF_model.fit(X_train, y_train)

# Memprediksi pada data pelatihan dengan model Random Forest
y_train_pred_RF = RF_model.predict(X_train)

# Menghitung akurasi pada data pelatihan untuk Random Forest
training_accuracy_RF = accuracy_score(y_train, y_train_pred_RF)
print(f'Akurasi pada data pelatihan (Random Forest): {training_accuracy_RF:.4f}')

Akurasi pada data pelatihan (Random Forest): 1.0000


In [26]:
# Memprediksi pada data pengujian dengan model Random Forest
y_test_pred_RF = RF_model.predict(X_test)

# Menghitung akurasi pada data pengujian untuk Random Forest
testing_accuracy_RF = accuracy_score(y_test, y_test_pred_RF)
print(f'Akurasi pada data pengujian (Random Forest): {testing_accuracy_RF:.4f}')

# Membuat confusion matrix untuk Random Forest
conf_matrix_RF = confusion_matrix(y_test, y_test_pred_RF)
print('Confusion Matrix (Random Forest):')
print(conf_matrix_RF)

# Membuat classification report untuk Random Forest
class_report_RF = classification_report(y_test, y_test_pred_RF, target_names=['Not Eligible', 'Eligible'])
print('Classification Report (Random Forest):')
print(class_report_RF)

Akurasi pada data pengujian (Random Forest): 0.8000
Confusion Matrix (Random Forest):
[[19  6]
 [ 3 17]]
Classification Report (Random Forest):
              precision    recall  f1-score   support

Not Eligible       0.86      0.76      0.81        25
    Eligible       0.74      0.85      0.79        20

    accuracy                           0.80        45
   macro avg       0.80      0.80      0.80        45
weighted avg       0.81      0.80      0.80        45



### SVM ML Model

In [41]:
# Inisialisasi model SVM
SVM_model = SVC(random_state=42)

# Melatih model pada data pelatihan
SVM_model.fit(X_train, y_train)

# Memprediksi pada data pelatihan dengan model SVM
y_train_pred_SVM = SVM_model.predict(X_train)

# Menghitung akurasi pada data pelatihan untuk SVM
training_accuracy_SVM = accuracy_score(y_train, y_train_pred_SVM)
print(f'Akurasi pada data pelatihan (SVM): {training_accuracy_SVM:.4f}')

Akurasi pada data pelatihan (SVM): 0.7318


In [42]:
# Memprediksi pada data pengujian dengan model SVM
y_test_pred_SVM = SVM_model.predict(X_test)

# Menghitung akurasi pada data pengujian untuk SVM
testing_accuracy_SVM = accuracy_score(y_test, y_test_pred_SVM)
print(f'Akurasi pada data pengujian (SVM): {testing_accuracy_SVM:.4f}')

# Membuat confusion matrix untuk SVM
conf_matrix_SVM = confusion_matrix(y_test, y_test_pred_SVM)
print('Confusion Matrix (SVM):')
print(conf_matrix_SVM)

# Membuat classification report untuk SVM
class_report_SVM = classification_report(y_test, y_test_pred_SVM, target_names=['Not Eligible', 'Eligible'])
print('Classification Report (SVM):')
print(class_report_SVM)

Akurasi pada data pengujian (SVM): 0.6889
Confusion Matrix (SVM):
[[20  5]
 [ 9 11]]
Classification Report (SVM):
              precision    recall  f1-score   support

Not Eligible       0.69      0.80      0.74        25
    Eligible       0.69      0.55      0.61        20

    accuracy                           0.69        45
   macro avg       0.69      0.68      0.68        45
weighted avg       0.69      0.69      0.68        45



Dari berbagai model, yang mendapatkan akurasi terbaik adalah model Decision Tree dengan akurasi pengujian sebesar 89%.

### Menyimpan model terbaik

In [44]:
# Menyimpan model ke dalam file
from joblib import dump, load

LoanEligible_DT = 'LoanEligible_DT.joblib'
dump(DT_model, LoanEligible_DT)
print(f'Model berhasil disimpan ke {LoanEligible_DT}')

Model berhasil disimpan ke LoanEligible_DT.joblib


In [45]:
# save the best model
import pickle
pickle.dump(DT_model, open('LoanEligible_DT.pkl', 'wb'))