In [None]:
# Impor library yang diperlukan
import pandas as pd
from sklearn.tree import DecisionTreeClassifier, export_graphviz
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import graphviz
from IPython.display import Image
from sklearn.datasets import load_iris

# Memuat dataset Iris
iris = load_iris()

# Membuat DataFrame dari data Iris
iris_df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
iris_df['target'] = iris.target

# Menampilkan beberapa baris pertama dari DataFrame
print(iris_df.head())

# Menampilkan informasi dataset Iris
iris_df.info()

# Menampilkan nilai unik pada kolom "target"
unique_targets = iris_df['target'].unique()
print(unique_targets)

# Kolom atribut (fitur) dan target
X = iris_df[['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']]
y = iris_df['target']

# Membagi data menjadi data latih (training) dan data uji (testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Membuat model Decision Tree
decision_tree = DecisionTreeClassifier(random_state=42)

# Melatih model pada data latih
decision_tree.fit(X_train, y_train)

# Memprediksi data uji
y_pred = decision_tree.predict(X_test)

# Mengukur akurasi model
accuracy = accuracy_score(y_test, y_pred)
print(f"Akurasi model Decision Tree: {accuracy * 100:.2f}%")

# Menghasilkan file DOT yang berisi struktur pohon keputusan
dot_data = export_graphviz(decision_tree, out_file=None,
                           feature_names=iris.feature_names,
                           class_names=iris.target_names,
                           filled=True, rounded=True, special_characters=True)

# Membuat grafik dari file DOT
graph = graphviz.Source(dot_data)

# Menyimpan file PDF
graph.render("iris_decision_tree")

# Menampilkan grafik di dalam notebook
Image(graph.render(filename='iris_decision_tree', format='png'))

# Inisialisasi tabel untuk menyimpan hasil pemisahan data
results = []

# Lakukan pemisahan data beberapa kali dengan random_state dan tanpa random_state
for i in range(5):
    # Pemisahan data tanpa random_state
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
    train_size = len(X_train)
    test_size = len(X_test)
    results.append(["Tanpa Random State", i + 1, train_size, test_size])

    # Pemisahan data dengan random_state
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    train_size = len(X_train)
    test_size = len(X_test)
    results.append(["Dengan Random State", i + 1, train_size, test_size])

# Membuat DataFrame hasil pemisahan data
results_df = pd.DataFrame(results, columns=["Metode", "Percobaan", "Jumlah Data Latih", "Jumlah Data Uji"])

# Menampilkan tabel hasil pemisahan data
print(results_df)

# Contoh penggunaan random_state
print("random_state ditentukan")
for i in range(3):
    X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size=0.3, random_state=42)
    print(y_test)

# Contoh tanpa menggunakan random_state
print("random_state tidak ditentukan")
for i in range(3):
    X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size=0.3)
    print(y_test)
