In [95]:
# Import library yang dibutuhkan
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, export_text
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [96]:

# Load dataset
file_name = "indonesia_cities_large_dataset.csv"  # Ganti sesuai nama file di Colab
data = pd.read_csv(file_name)


In [97]:
# Preprocessing dataset
data = data.drop(columns=["Nama Kota"])  # Hapus kolom 'Nama Kota' karena tidak relevan

In [98]:
# Encode kolom 'Status Kota' menjadi numerik
data["Status Kota"] = data["Status Kota"].apply(lambda x: 1 if x == "Metropolitan" else 0)


In [99]:
# One-Hot Encoding untuk kolom kategori lainnya (contoh: Provinsi)
data = pd.get_dummies(data, columns=["Provinsi"], drop_first=True)

In [100]:
# Tampilkan info dataset setelah preprocessing
print(data.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 19 columns):
 #   Column                       Non-Null Count  Dtype
---  ------                       --------------  -----
 0   Populasi (ribu)              100 non-null    int64
 1   Status Kota                  100 non-null    int64
 2   Provinsi_Banten              100 non-null    bool 
 3   Provinsi_DI Yogyakarta       100 non-null    bool 
 4   Provinsi_DKI Jakarta         100 non-null    bool 
 5   Provinsi_Jawa Barat          100 non-null    bool 
 6   Provinsi_Jawa Tengah         100 non-null    bool 
 7   Provinsi_Jawa Timur          100 non-null    bool 
 8   Provinsi_Kalimantan Barat    100 non-null    bool 
 9   Provinsi_Kalimantan Selatan  100 non-null    bool 
 10  Provinsi_Kalimantan Timur    100 non-null    bool 
 11  Provinsi_Kepulauan Riau      100 non-null    bool 
 12  Provinsi_Papua               100 non-null    bool 
 13  Provinsi_Riau                100 non-null    bool 


In [101]:
# Memilih fitur dan label
X = data.drop(columns=["Status Kota"])  # Fitur
y = data["Status Kota"]  # Label

In [102]:
# Split dataset menjadi training dan testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [103]:

# Inisialisasi model Decision Tree
model = DecisionTreeClassifier(random_state=42)

In [104]:
# Latih model dengan data training
model.fit(X_train, y_train)


In [105]:

# Prediksi data testing
y_pred = model.predict(X_test)


In [106]:

# Evaluasi model
print("Akurasi Model:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Akurasi Model: 0.55

Confusion Matrix:
 [[11  2]
 [ 7  0]]

Classification Report:
               precision    recall  f1-score   support

           0       0.61      0.85      0.71        13
           1       0.00      0.00      0.00         7

    accuracy                           0.55        20
   macro avg       0.31      0.42      0.35        20
weighted avg       0.40      0.55      0.46        20



In [107]:
# Menampilkan struktur Decision Tree
tree_rules = export_text(model, feature_names=list(data.drop(columns=["Status Kota"]).columns))
print("\nAturan Decision Tree:\n")
print(tree_rules)


Aturan Decision Tree:

|--- Populasi (ribu) <= 7524.00
|   |--- Populasi (ribu) <= 5803.00
|   |   |--- Populasi (ribu) <= 5547.50
|   |   |   |--- Provinsi_Sumatera Utara <= 0.50
|   |   |   |   |--- Populasi (ribu) <= 3863.50
|   |   |   |   |   |--- Populasi (ribu) <= 3797.50
|   |   |   |   |   |   |--- Populasi (ribu) <= 902.00
|   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |--- Populasi (ribu) >  902.00
|   |   |   |   |   |   |   |--- Populasi (ribu) <= 1094.50
|   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |--- Populasi (ribu) >  1094.50
|   |   |   |   |   |   |   |   |--- Provinsi_Jawa Timur <= 0.50
|   |   |   |   |   |   |   |   |   |--- Provinsi_Kalimantan Timur <= 0.50
|   |   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |   |   |--- Provinsi_Kalimantan Timur >  0.50
|   |   |   |   |   |   |   |   |   |   |--- Populasi (ribu) <= 3152.50
|   |   |   |   |   |   |   |   |   |   |   |--- class: 0
| 