# Install Library

In [None]:
!pip install scikit-learn



In [2]:
import pandas as pd
import numpy as np
import joblib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, roc_curve, auc
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer

In [None]:
!pip install lazypredict

Collecting lazypredict
  Downloading lazypredict-0.2.12-py2.py3-none-any.whl (12 kB)
Installing collected packages: lazypredict
Successfully installed lazypredict-0.2.12


# Import Dataset

In [6]:
data = pd.read_csv('Dummy Data Debitur_Roviani Amelia_v06_20231008.csv')

# Modeling Kelas Debitur

## By Location

### Pipeline Model

In [None]:
df = pd.DataFrame(data)

# Pilih fitur-fitur yang ingin digunakan
selected_features = ['debtor_address']

# Memisahkan data berdasarkan fitur yang dipilih
X = df[selected_features]
y = df['debtor_island']

address_feature = 'debtor_address'

address_transformer = Pipeline(steps=[
    ('tfidf', TfidfVectorizer()),
])

# Gabungkan transformers menggunakan ColumnTransformer
preprocessor = ColumnTransformer(
    transformers=[
        ('address', address_transformer, address_feature)
    ])

# Gabungkan preprocessor dengan model klasifikasi dalam pipeline
pipeline_debtor_location = Pipeline(steps=[('preprocessor', preprocessor),
                           ('classifier', RandomForestClassifier())])

# Bagi data menjadi data pelatihan dan pengujian
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Latih pipeline pada data pelatihan
pipeline_debtor_location.fit(X_train, y_train)

# Prediksi dengan pipeline
y_pred = pipeline_debtor_location.predict(X_test)

# Evaluasi performa model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')

# Tampilkan data nilai prediksi dan nilai seharusnya
results = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
print("Data Nilai Prediksi dan Nilai Seharusnya:")
print(results)

# Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

# Laporan Klasifikasi
class_report = classification_report(y_test, y_pred)
print("Classification Report:")
print(class_report)

Accuracy: 0.993
Data Nilai Prediksi dan Nilai Seharusnya:
          Actual   Predicted
1501    Sumatera    Sumatera
2586    Sumatera    Sumatera
2653        Jawa        Jawa
1055    Sulawesi    Sulawesi
705     Sumatera    Sumatera
...          ...         ...
4711    Sumatera    Sumatera
2313        Bali        Bali
3214        Jawa        Jawa
2732    Sulawesi    Sulawesi
1926  Kalimantan  Kalimantan

[1000 rows x 2 columns]
Confusion Matrix:
[[ 89   0   0   0   0   0]
 [  0 180   1   1   1   4]
 [  0   0 132   0   0   0]
 [  0   0   0 114   0   0]
 [  0   0   0   0 176   0]
 [  0   0   0   0   0 302]]
Classification Report:
              precision    recall  f1-score   support

        Bali       1.00      1.00      1.00        89
        Jawa       1.00      0.96      0.98       187
  Kalimantan       0.99      1.00      1.00       132
       Papua       0.99      1.00      1.00       114
    Sulawesi       0.99      1.00      1.00       176
    Sumatera       0.99      1.00      0

###Saving Model

In [None]:
# Simpan model ke dalam file
joblib.dump(pipeline_debtor_location, 'debtor_class_by_location.joblib')
loaded_model_location = joblib.load('debtor_class_by_location.joblib')

## By Age

###Pipeline Model

In [8]:
df = pd.DataFrame(data)

# Memilih fitur-fitur yang ingin digunakan
selected_features = ['debtor_age', 'debtor_education_level']

# Memisahkan data berdasarkan fitur yang dipilih
X = df[selected_features]
y = df['debtor_age_class']

numeric_features = ['debtor_age']

# Definisikan transformer untuk masing-masing tipe fitur
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

categorical_features = ['debtor_education_level']

categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder())
])

# Gabungkan transformers menggunakan ColumnTransformer
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])

# Gabungkan preprocessor dengan model klasifikasi dalam pipeline
pipeline_debtor_age = Pipeline(steps=[('preprocessor', preprocessor),
                           ('classifier', RandomForestClassifier())])

# Bagi data menjadi data pelatihan dan pengujian
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Latih pipeline pada data pelatihan
pipeline_debtor_age.fit(X_train, y_train)

# Prediksi dengan pipeline
y_pred = pipeline_debtor_age.predict(X_test)

# Evaluasi performa model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')

# Tampilkan data nilai prediksi dan nilai seharusnya
results = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
print("Data Nilai Prediksi dan Nilai Seharusnya:")
print(results)

# Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

# Laporan Klasifikasi
class_report = classification_report(y_test, y_pred)
print("Classification Report:")
print(class_report)

Accuracy: 1.0
Data Nilai Prediksi dan Nilai Seharusnya:
      Actual Predicted
1501  Dewasa    Dewasa
2586  Dewasa    Dewasa
2653  Dewasa    Dewasa
1055    Muda      Muda
705      Tua       Tua
...      ...       ...
4711     Tua       Tua
2313  Dewasa    Dewasa
3214     Tua       Tua
2732     Tua       Tua
1926  Dewasa    Dewasa

[1000 rows x 2 columns]
Confusion Matrix:
[[530   0   0]
 [  0 184   0]
 [  0   0 286]]
Classification Report:
              precision    recall  f1-score   support

      Dewasa       1.00      1.00      1.00       530
        Muda       1.00      1.00      1.00       184
         Tua       1.00      1.00      1.00       286

    accuracy                           1.00      1000
   macro avg       1.00      1.00      1.00      1000
weighted avg       1.00      1.00      1.00      1000



###Saving Model

In [10]:
# Simpan model ke dalam file
joblib.dump(pipeline_debtor_age, 'debtor_class_by_age.joblib')
loaded_model_age = joblib.load('debtor_class_by_age.joblib')


##By SES

###Pipeline Model

In [None]:
df = pd.DataFrame(data)

# Pilih fitur-fitur yang ingin digunakan
selected_features = ['debtor_monthly_income', 'debtor_monthly_expense',
                     'debtor_number_of_dependents', 'debtor_marital_status',
                     'debtor_education_level', 'debtor_asset_ownership',
                     'loan_amount']

# Memisahkan data berdasarkan fitur yang dipilih
X = df[selected_features]
y = df['ses_class']

# Definisikan transformer untuk masing-masing tipe fitur
numeric_features = ['debtor_monthly_income', 'debtor_monthly_expense',
                    'debtor_number_of_dependents', 'loan_amount']

numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

categorical_features = ['debtor_education_level', 'debtor_asset_ownership',
                        'debtor_marital_status']

categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder())
])

# Gabungkan transformers menggunakan ColumnTransformer
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])

# Gabungkan preprocessor dengan model klasifikasi dalam pipeline
pipeline_debtor_ses = Pipeline(steps=[('preprocessor', preprocessor),
                           ('classifier', RandomForestClassifier())])

# Bagi data menjadi data pelatihan dan pengujian
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Latih pipeline pada data pelatihan
pipeline_debtor_ses.fit(X_train, y_train)

# Prediksi dengan pipeline
y_pred = pipeline_debtor_ses.predict(X_test)

# Evaluasi performa model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')

# Tampilkan data nilai prediksi dan nilai seharusnya
results = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
print("Data Nilai Prediksi dan Nilai Seharusnya:")
print(results)

# Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

# Laporan Klasifikasi
class_report = classification_report(y_test, y_pred)
print("Classification Report:")
print(class_report)

Accuracy: 0.997
Data Nilai Prediksi dan Nilai Seharusnya:
     Actual Predicted
1501      C         C
2586      E         E
2653      D         D
1055      D         D
705       D         D
...     ...       ...
4711      D         D
2313      B         B
3214      D         D
2732      A         A
1926      D         D

[1000 rows x 2 columns]
Confusion Matrix:
[[162   0   0   0   1]
 [  0 236   0   0   0]
 [  0   0 230   0   0]
 [  0   0   0 213   1]
 [  0   0   0   1 156]]
Classification Report:
              precision    recall  f1-score   support

           A       1.00      0.99      1.00       163
           B       1.00      1.00      1.00       236
           C       1.00      1.00      1.00       230
           D       1.00      1.00      1.00       214
           E       0.99      0.99      0.99       157

    accuracy                           1.00      1000
   macro avg       1.00      1.00      1.00      1000
weighted avg       1.00      1.00      1.00      1000



###Saving Model

In [None]:
# Simpan model ke dalam file
joblib.dump(pipeline_debtor_ses, 'debtor_class_by_ses.joblib')
loaded_model_ses = joblib.load('debtor_class_by_ses.joblib')

##By Field

###Pipeline Model

In [None]:
df = pd.DataFrame(data)

# Pilih fitur-fitur yang ingin digunakan
selected_features = ['risk_credit', 'debtor_aging', 'billing_type',
                     'payment_pattern', 'remaining_loan', 'loan_type',
                     'debtor_personality', 'working_time']

# Memisahkan data berdasarkan fitur yang dipilih
X = df[selected_features]
y = df['field_class']

# Definisikan transformer untuk masing-masing tipe fitur
numeric_features = ['remaining_loan']

numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

categorical_features = ['risk_credit', 'debtor_aging', 'billing_type',
                        'payment_pattern', 'loan_type', 'debtor_personality',
                        'working_time']

categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder())
])

# Gabungkan transformers menggunakan ColumnTransformer
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])

# Gabungkan preprocessor dengan model klasifikasi dalam pipeline
pipeline_billing_type = Pipeline(steps=[('preprocessor', preprocessor),
                           ('classifier', RandomForestClassifier())])

# Bagi data menjadi data pelatihan dan pengujian
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Latih pipeline pada data pelatihan
pipeline_billing_type.fit(X_train, y_train)

# Prediksi dengan pipeline
y_pred = pipeline_billing_type.predict(X_test)

# Evaluasi performa model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')

# Tampilkan data nilai prediksi dan nilai seharusnya
results = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
print("Data Nilai Prediksi dan Nilai Seharusnya:")
print(results)

# Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

# Laporan Klasifikasi
class_report = classification_report(y_test, y_pred)
print("Classification Report:")
print(class_report)

Accuracy: 1.0
Data Nilai Prediksi dan Nilai Seharusnya:
                          Actual                   Predicted
1501                Melalui Meja                Melalui Meja
2586  Turun Langsung ke Lapangan  Turun Langsung ke Lapangan
2653                Melalui Meja                Melalui Meja
1055                Melalui Meja                Melalui Meja
705                 Melalui Meja                Melalui Meja
...                          ...                         ...
4711                Melalui Meja                Melalui Meja
2313                Melalui Meja                Melalui Meja
3214                Melalui Meja                Melalui Meja
2732  Turun Langsung ke Lapangan  Turun Langsung ke Lapangan
1926                Melalui Meja                Melalui Meja

[1000 rows x 2 columns]
Confusion Matrix:
[[616   0]
 [  0 384]]
Classification Report:
                            precision    recall  f1-score   support

              Melalui Meja       1.00      1.00      1

###Saving Model

In [None]:
# Simpan model ke dalam file
joblib.dump(pipeline_billing_type, 'debtor_class_by_collector_field.joblib')
loaded_model_field= joblib.load('debtor_class_by_collector_field.joblib')

## By Behavior

### Pipeline Model

In [None]:
df = pd.DataFrame(data)

# Pilih fitur-fitur yang ingin digunakan
selected_features = ['risk_credit', 'debtor_aging', 'payment_pattern',
                     'billing_type', 'remaining_loan', 'loan_amount',
                     'debtor_monthly_income', 'debtor_age',
                     'debtor_monthly_expense', 'debtor_number_of_dependents']

# Memisahkan data berdasarkan fitur yang dipilih
X = df[selected_features]
y = df['behavior_class']

# Definisikan transformer untuk masing-masing tipe fitur
numeric_features = ['remaining_loan', 'debtor_monthly_income', 'loan_amount',
                    'debtor_age', 'debtor_monthly_expense',
                    'debtor_number_of_dependents']

numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

categorical_features = ['risk_credit', 'debtor_aging', 'payment_pattern',
                        'billing_type']

categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder())
])

# Gabungkan transformers menggunakan ColumnTransformer
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])

# Gabungkan preprocessor dengan model klasifikasi dalam pipeline
pipeline_debtor_behavior = Pipeline(steps=[('preprocessor', preprocessor),
                           ('classifier', RandomForestClassifier())])

# Bagi data menjadi data pelatihan dan pengujian
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Latih pipeline pada data pelatihan
pipeline_debtor_behavior.fit(X_train, y_train)

# Prediksi dengan pipeline
y_pred = pipeline_debtor_behavior.predict(X_test)

# Evaluasi performa model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')

# Tampilkan data nilai prediksi dan nilai seharusnya
results = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
print("Data Nilai Prediksi dan Nilai Seharusnya:")
print(results)

# Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

# Laporan Klasifikasi
class_report = classification_report(y_test, y_pred)
print("Classification Report:")
print(class_report)

Accuracy: 1.0
Data Nilai Prediksi dan Nilai Seharusnya:
           Actual    Predicted
1501     Disiplin     Disiplin
2586  Gagal Bayar  Gagal Bayar
2653     Disiplin     Disiplin
1055     Disiplin     Disiplin
705         Kabur        Kabur
...           ...          ...
4711        Kabur        Kabur
2313     Disiplin     Disiplin
3214     Disiplin     Disiplin
2732  Gagal Bayar  Gagal Bayar
1926     Disiplin     Disiplin

[1000 rows x 2 columns]
Confusion Matrix:
[[259   0   0]
 [  0 242   0]
 [  0   0 499]]
Classification Report:
              precision    recall  f1-score   support

    Disiplin       1.00      1.00      1.00       259
 Gagal Bayar       1.00      1.00      1.00       242
       Kabur       1.00      1.00      1.00       499

    accuracy                           1.00      1000
   macro avg       1.00      1.00      1.00      1000
weighted avg       1.00      1.00      1.00      1000



###Saving Model

In [None]:
# Simpan model ke dalam file
joblib.dump(pipeline_debtor_behavior, 'debtor_class_by_debtor_behavior.joblib')
loaded_model_behavior = joblib.load('debtor_class_by_debtor_behavior.joblib')

## By Character

###Pipeline Model

In [None]:
df = pd.DataFrame(data)

# Pilih fitur-fitur yang ingin digunakan
selected_features = ['debtor_education_level', 'debtor_occupation',
                     'debtor_communication_channel', 'debtor_gender',
                     'debtor_age', 'debtor_marital_status',
                     'debtor_monthly_income', ]

# Memisahkan data berdasarkan fitur yang dipilih
X = df[selected_features]
y = df['debtor_personality']

# Definisikan transformer untuk masing-masing tipe fitur
numeric_features = ['debtor_age', 'debtor_monthly_income']

numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

categorical_features = ['debtor_education_level', 'debtor_occupation',
                        'debtor_communication_channel', 'debtor_gender',
                        'debtor_marital_status']

categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder())
])

# Gabungkan transformers menggunakan ColumnTransformer
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])

# Gabungkan preprocessor dengan model klasifikasi dalam pipeline
pipeline_debtor_character = Pipeline(steps=[('preprocessor', preprocessor),
                           ('classifier', RandomForestClassifier())])

# Bagi data menjadi data pelatihan dan pengujian
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Latih pipeline pada data pelatihan
pipeline_debtor_character.fit(X_train, y_train)

# Prediksi dengan pipeline
y_pred = pipeline_debtor_character.predict(X_test)

# Evaluasi performa model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')

# Tampilkan data nilai prediksi dan nilai seharusnya
results = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
print("Data Nilai Prediksi dan Nilai Seharusnya:")
print(results)

# Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

# Laporan Klasifikasi
class_report = classification_report(y_test, y_pred)
print("Classification Report:")
print(class_report)

Accuracy: 1.0
Data Nilai Prediksi dan Nilai Seharusnya:
         Actual  Predicted
1501      Ramah      Ramah
2586  Bijaksana  Bijaksana
2653      Ramah      Ramah
1055      Ramah      Ramah
705       Tegas      Tegas
...         ...        ...
4711      Tegas      Tegas
2313      Ramah      Ramah
3214      Tegas      Tegas
2732      Tegas      Tegas
1926      Kasar      Kasar

[1000 rows x 2 columns]
Confusion Matrix:
[[420   0   0   0]
 [  0  37   0   0]
 [  0   0 279   0]
 [  0   0   0 264]]
Classification Report:
              precision    recall  f1-score   support

   Bijaksana       1.00      1.00      1.00       420
       Kasar       1.00      1.00      1.00        37
       Ramah       1.00      1.00      1.00       279
       Tegas       1.00      1.00      1.00       264

    accuracy                           1.00      1000
   macro avg       1.00      1.00      1.00      1000
weighted avg       1.00      1.00      1.00      1000



###Saving Model

In [None]:
# Simpan model ke dalam file
joblib.dump(pipeline_debtor_character, 'debtor_class_by_debtor_character.joblib')
loaded_model_character = joblib.load('debtor_class_by_debtor_character.joblib')

## By Demography

###Pipeline Model

In [None]:
df = pd.DataFrame(data)

# Pilih fitur-fitur yang ingin digunakan
selected_features = ['debtor_occupation', 'debtor_monthly_income', 'debtor_monthly_expense',
                     'debtor_number_of_dependents', 'debtor_age', 'debtor_education_level',
                     'debtor_asset_ownership', 'debtor_communication_channel',
                     'loan_amount', 'debtor_aging', 'billing_type']

# Memisahkan data berdasarkan fitur yang dipilih
X = df[selected_features]
y = df['demography_class']

# Definisikan transformer untuk masing-masing tipe fitur
numeric_features = ['debtor_age', 'debtor_monthly_income', 'debtor_monthly_expense',
                    'debtor_number_of_dependents', 'loan_amount']

numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

categorical_features = ['debtor_occupation', 'debtor_education_level', 'debtor_asset_ownership',
                        'debtor_communication_channel', 'debtor_aging', 'billing_type']

categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder())
])

# Gabungkan transformers menggunakan ColumnTransformer
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])

# Gabungkan preprocessor dengan model klasifikasi dalam pipeline
pipeline_debtor_demography = Pipeline(steps=[('preprocessor', preprocessor),
                           ('classifier', RandomForestClassifier())])

# Bagi data menjadi data pelatihan dan pengujian
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Latih pipeline pada data pelatihan
pipeline_debtor_demography.fit(X_train, y_train)

# Prediksi dengan pipeline
y_pred = pipeline_debtor_demography.predict(X_test)

# Evaluasi performa model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')

# Tampilkan data nilai prediksi dan nilai seharusnya
results = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
print("Data Nilai Prediksi dan Nilai Seharusnya:")
print(results)

# Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

# Laporan Klasifikasi
class_report = classification_report(y_test, y_pred)
print("Classification Report:")
print(class_report)

Accuracy: 0.999
Data Nilai Prediksi dan Nilai Seharusnya:
                        Actual                 Predicted
1501  Pemukiman Padat Penduduk  Pemukiman Padat Penduduk
2586                 Perumahan                 Perumahan
2653  Pemukiman Padat Penduduk  Pemukiman Padat Penduduk
1055  Pemukiman Padat Penduduk  Pemukiman Padat Penduduk
705   Pemukiman Padat Penduduk  Pemukiman Padat Penduduk
...                        ...                       ...
4711  Pemukiman Padat Penduduk  Pemukiman Padat Penduduk
2313                     Rusun                     Rusun
3214  Pemukiman Padat Penduduk  Pemukiman Padat Penduduk
2732                 Apartemen                 Apartemen
1926  Pemukiman Padat Penduduk  Pemukiman Padat Penduduk

[1000 rows x 2 columns]
Confusion Matrix:
[[118   0   1   0]
 [  0 372   0   0]
 [  0   0 174   0]
 [  0   0   0 335]]
Classification Report:
                          precision    recall  f1-score   support

               Apartemen       1.00      0.99   

###Saving Model

In [None]:
# Simpan model ke dalam file
joblib.dump(pipeline_debtor_demography, 'debtor_class_by_debtor_demography.joblib')
loaded_model_demography = joblib.load('debtor_class_by_debtor_demography.joblib')