In [11]:
# üì¶ 1. Import library yang dibutuhkan
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import matplotlib.pyplot as plt
import seaborn as sns

In [12]:
# üìÇ 2. Load dataset
df = pd.read_csv("data_wisata_cleaned_filtered.csv")

In [13]:
# üîç 3. Tampilkan beberapa baris pertama
print("Contoh data:")
print(df.head())

Contoh data:
   ID Tempat Wisata  Tahun  Wisnus  Wisman
0   1   Museum Bali   2020    4248    3082
1   2   Museum Bali   2021    1991      86
2   3   Museum Bali   2022   11535    6553
3   4   Museum Bali   2023   18230   13803
4   5   Museum Bali   2024   24796   13835


In [14]:
# üß™ 4. Buat label klasifikasi (target: Ramai jika Wisnus > 10.000, else Sepi)
df['Kategori_Wisnus'] = df['Wisnus'].apply(lambda x: 'Ramai' if x > 10000 else 'Sepi')

In [15]:
# üß† 5. Definisikan fitur dan target
X = df[['Tahun', 'Tempat Wisata']]
y = df['Kategori_Wisnus']

In [16]:
# üîß 6. Preprocessing (OneHotEncoder untuk fitur kategorikal)
categorical_features = ['Tempat Wisata']
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
    ],
    remainder='passthrough'  # agar Tahun tetap masuk
)

In [17]:
# ‚öôÔ∏è 7. Buat pipeline klasifikasi menggunakan RandomForestClassifier
clf_pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(random_state=42))
])

In [18]:
# üß™ 8. Split data latih dan uji
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

In [19]:
# üöÄ 9. Latih model
clf_pipeline.fit(X_train, y_train)

In [20]:
# üîÆ 10. Lakukan prediksi terhadap data uji
y_pred = clf_pipeline.predict(X_test)

In [21]:
# üìä 11. Evaluasi model
print("\n=== Confusion Matrix ===")
print(confusion_matrix(y_test, y_pred))

print("\n=== Classification Report ===")
print(classification_report(y_test, y_pred))


=== Confusion Matrix ===
[[ 24  32]
 [  2 152]]

=== Classification Report ===
              precision    recall  f1-score   support

       Ramai       0.92      0.43      0.59        56
        Sepi       0.83      0.99      0.90       154

    accuracy                           0.84       210
   macro avg       0.87      0.71      0.74       210
weighted avg       0.85      0.84      0.82       210



In [None]:
# üîç 12. Visualisasi confusion matrix
conf_mat = confusion_matrix(y_test, y_pred, labels=['Ramai', 'Sepi'])
sns.heatmap(conf_mat, annot=True, fmt='d', cmap='Blues',
            xticklabels=['Ramai', 'Sepi'],
            yticklabels=['Ramai', 'Sepi'])
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()