1. Ulangilah tahapan klasifikasi menggunakan dataset Wine. Hitunglah akurasinya berdasar confusion matrix dan classification report dengan persentase data testing 20%, 30% dan 40%

In [None]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix, classification_report

# Memuat dataset Wine
wine_data = load_wine()
X = pd.DataFrame(wine_data.data, columns=wine_data.feature_names)
y = wine_data.target

# Fungsi untuk melakukan pelatihan dan evaluasi model
def train_and_evaluate(test_size):
    # Membagi dataset menjadi data latih dan data uji
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)

    # Inisialisasi dan pelatihan model Decision Tree Classifier
    clf = DecisionTreeClassifier(random_state=42)
    clf.fit(X_train, y_train)

    # Memprediksi kelas pada data uji
    y_pred = clf.predict(X_test)

    # Menghitung confusion matrix dan classification report
    conf_matrix = confusion_matrix(y_test, y_pred)
    class_report = classification_report(y_test, y_pred)

    # Menghitung akurasi berdasarkan confusion matrix
    accuracy = np.sum(np.diag(conf_matrix)) / np.sum(conf_matrix)

    return conf_matrix, class_report, accuracy

# Menghitung akurasi untuk persentase data testing 20%, 30%, dan 40%
test_sizes = [0.2, 0.3, 0.4]
for test_size in test_sizes:
    conf_matrix, class_report, accuracy = train_and_evaluate(test_size)
    print(f"Persentase Data Testing: {test_size * 100}%")
    print("Confusion Matrix:")
    print(conf_matrix)
    print("Classification Report:")
    print(class_report)
    print(f"Akurasi: {accuracy:.4f}")  # Akurasi dengan 4 angka desimal
    print("="*50)


Persentase Data Testing: 20.0%
Confusion Matrix:
[[13  1  0]
 [ 0 14  0]
 [ 1  0  7]]
Classification Report:
              precision    recall  f1-score   support

           0       0.93      0.93      0.93        14
           1       0.93      1.00      0.97        14
           2       1.00      0.88      0.93         8

    accuracy                           0.94        36
   macro avg       0.95      0.93      0.94        36
weighted avg       0.95      0.94      0.94        36

Akurasi: 0.9444
Persentase Data Testing: 30.0%
Confusion Matrix:
[[18  1  0]
 [ 0 21  0]
 [ 1  0 13]]
Classification Report:
              precision    recall  f1-score   support

           0       0.95      0.95      0.95        19
           1       0.95      1.00      0.98        21
           2       1.00      0.93      0.96        14

    accuracy                           0.96        54
   macro avg       0.97      0.96      0.96        54
weighted avg       0.96      0.96      0.96        54

Akur

2. Ulangilah tahapan klasifikasi menggunakan dataset Stars. Hitunglah akurasinya berdasar confusion matrix dan classification report dengan persentase data testing 20%, 30% dan 40%

In [None]:

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix, classification_report

# Membaca data
data_stars = pd.read_csv('Stars.csv')

# Memisahkan data menjadi fitur dan target
X = data_stars.iloc[:, :-1]  # Mengambil semua kolom kecuali kolom terakhir sebagai fitur
y = data_stars.iloc[:, -1]   # Mengambil kolom terakhir sebagai target

# Fungsi untuk melakukan pelatihan dan evaluasi model
def train_and_evaluate(test_size):
    # Membagi dataset menjadi data latih dan data uji
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)

    # Inisialisasi dan pelatihan model Decision Tree Classifier
    clf = DecisionTreeClassifier(random_state=42)
    clf.fit(X_train, y_train)

    # Memprediksi kelas pada data uji
    y_pred = clf.predict(X_test)

    # Menghitung confusion matrix dan classification report
    conf_matrix = confusion_matrix(y_test, y_pred)
    class_report = classification_report(y_test, y_pred)

    # Menghitung akurasi berdasarkan confusion matrix
    accuracy = clf.score(X_test, y_test)

    return conf_matrix, class_report, accuracy

# Menghitung akurasi untuk persentase data testing 20%, 30%, dan 40%
test_sizes = [0.2, 0.3, 0.4]
for test_size in test_sizes:
    conf_matrix, class_report, accuracy = train_and_evaluate(test_size)
    print(f"Persentase Data Testing: {test_size * 100}%")
    print("Confusion Matrix:")
    print(conf_matrix)
    print("Classification Report:")
    print(class_report)
    print(f"Akurasi: {accuracy:.4f}")  # Akurasi dengan 4 angka desimal
    print("="*50)


Persentase Data Testing: 20.0%
Confusion Matrix:
[[ 8  0  0  0  0  0]
 [ 0 11  0  0  0  0]
 [ 0  0  8  0  0  0]
 [ 0  0  0  7  0  0]
 [ 0  0  0  0  8  0]
 [ 0  0  0  0  0  6]]
Classification Report:
               precision    recall  f1-score   support

  Brown Dwarf       1.00      1.00      1.00         8
   Hypergiant       1.00      1.00      1.00        11
Main Sequence       1.00      1.00      1.00         8
    Red Dwarf       1.00      1.00      1.00         7
   Supergiant       1.00      1.00      1.00         8
  White Dwarf       1.00      1.00      1.00         6

     accuracy                           1.00        48
    macro avg       1.00      1.00      1.00        48
 weighted avg       1.00      1.00      1.00        48

Akurasi: 1.0000
Persentase Data Testing: 30.0%
Confusion Matrix:
[[14  0  0  0  0  0]
 [ 0 15  0  0  0  0]
 [ 0  0 10  0  0  0]
 [ 0  0  0 12  0  0]
 [ 0  0  0  0 11  0]
 [ 0  0  0  0  0 10]]
Classification Report:
               precision    recal

3. Ulangilah tahapan klasifikasi menggunakan dataset Breast Cancer. Hitunglah akurasinya berdasar confusion matrix dan classification report dengan persentase data testing 20%, 30% dan 40%

In [None]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix, classification_report

# Memuat dataset Breast Cancer
breast_cancer_data = load_breast_cancer()
X = pd.DataFrame(breast_cancer_data.data, columns=breast_cancer_data.feature_names)
y = breast_cancer_data.target

# Fungsi untuk melakukan pelatihan dan evaluasi model
def train_and_evaluate(test_size):
    # Membagi dataset menjadi data latih dan data uji
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)

    # Inisialisasi dan pelatihan model Decision Tree Classifier
    clf = DecisionTreeClassifier(random_state=42)
    clf.fit(X_train, y_train)

    # Memprediksi kelas pada data uji
    y_pred = clf.predict(X_test)

    # Menghitung confusion matrix dan classification report
    conf_matrix = confusion_matrix(y_test, y_pred)
    class_report = classification_report(y_test, y_pred)

    # Menghitung akurasi berdasarkan confusion matrix
    accuracy = np.sum(np.diag(conf_matrix)) / np.sum(conf_matrix)

    return conf_matrix, class_report, accuracy

# Menghitung akurasi untuk persentase data testing 20%, 30%, dan 40%
test_sizes = [0.2, 0.3, 0.4]
for test_size in test_sizes:
    conf_matrix, class_report, accuracy = train_and_evaluate(test_size)
    print(f"Persentase Data Testing: {test_size * 100}%")
    print("Confusion Matrix:")
    print(conf_matrix)
    print("Classification Report:")
    print(class_report)
    print(f"Akurasi: {accuracy * 100:.2f}%")
    print("="*50)


Persentase Data Testing: 20.0%
Confusion Matrix:
[[40  3]
 [ 3 68]]
Classification Report:
              precision    recall  f1-score   support

           0       0.93      0.93      0.93        43
           1       0.96      0.96      0.96        71

    accuracy                           0.95       114
   macro avg       0.94      0.94      0.94       114
weighted avg       0.95      0.95      0.95       114

Akurasi: 94.74%
Persentase Data Testing: 30.0%
Confusion Matrix:
[[ 60   3]
 [  7 101]]
Classification Report:
              precision    recall  f1-score   support

           0       0.90      0.95      0.92        63
           1       0.97      0.94      0.95       108

    accuracy                           0.94       171
   macro avg       0.93      0.94      0.94       171
weighted avg       0.94      0.94      0.94       171

Akurasi: 94.15%
Persentase Data Testing: 40.0%
Confusion Matrix:
[[ 75   5]
 [  9 139]]
Classification Report:
              precision    recall