# Tugas Kecil 1
## Eksplorasi library Decision Tree Learning pada Jupyter Notebook

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Modul six untuk Id3Estimator
import six
import sys
sys.modules['sklearn.externals.six'] = six

# Modul-modul pengolahan data
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.pipeline import make_pipeline

# Modul model algoritma machine learning
from sklearn.tree import DecisionTreeClassifier, export_text, export_graphviz
from id3 import Id3Estimator
from sklearn.cluster import KMeans
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC

# Modul pengecekan akurasi
from sklearn.metrics import accuracy_score, f1_score

In [None]:
breast_cancer = load_breast_cancer()
df_breast_cancer = pd.DataFrame(breast_cancer.data, 
                                columns=breast_cancer.feature_names)
df_breast_cancer['target'] = breast_cancer.target
df_breast_cancer.head()

In [None]:
df_play_tennis = pd.read_csv('PlayTennis.csv')

leO = LabelEncoder()
df_play_tennis['Outlook'] = leO.fit_transform(df_play_tennis['Outlook'])

leT = LabelEncoder()
df_play_tennis['Temperature'] = leT.fit_transform(df_play_tennis['Temperature'])

leH = LabelEncoder()
df_play_tennis['Humidity'] = leH.fit_transform(df_play_tennis['Humidity'])

leW = LabelEncoder()
df_play_tennis['Wind'] = leW.fit_transform(df_play_tennis['Wind'])

lePT = LabelEncoder()
df_play_tennis['Play Tennis'] = lePT.fit_transform(df_play_tennis['Play Tennis'])

df_play_tennis.head()

In [None]:
X_breast_cancer = df_breast_cancer[breast_cancer.feature_names]
y_breast_cancer = df_breast_cancer['target']

In [None]:
X_bc_train, X_bc_test, y_bc_train, y_bc_test = train_test_split(
    X_breast_cancer, y_breast_cancer, test_size=0.2, random_state=42)

In [None]:
X_play_tennis = df_play_tennis[['Outlook', 'Temperature', 'Humidity', 'Wind']]
y_play_tennis = df_play_tennis['Play Tennis']

In [None]:
X_pt_train, X_pt_test, y_pt_train, y_pt_test = train_test_split(
    X_play_tennis, y_play_tennis, test_size=0.2, random_state=42)

### Decision Tree Classifier

#### 1. Dataset Breast Cancer

In [None]:
decision_tree = DecisionTreeClassifier(random_state=42)
decision_tree = decision_tree.fit(X_bc_train, y_bc_train)

r = export_text(decision_tree, feature_names=breast_cancer['feature_names'].tolist())
print(r)

In [None]:
#visualisasi tree
#import graphviz

#dot_data = export_graphviz(decision_tree, out_file=None) 
#graph = graphviz.Source(dot_data) 


In [None]:
# Memprediksi hasil dari model
y_bc_pred = decision_tree.predict(X_bc_test)

# Mengevaluasi hasil prediksi
# Menggunakan metric Accuracy
print("Evaluasi hasil prediksi menggunakan metric Accuracy: {}".format(accuracy_score(y_bc_test, y_bc_pred)))

# Menggunakan metric F1
print("Evaluasi hasil prediksi menggunakan metric F1: {}".format(f1_score(y_bc_test, y_bc_pred)))

#### 2. Dataset Play Tennis

In [None]:
tennis_decision_tree = DecisionTreeClassifier(random_state=42)
tennis_decision_tree.fit(X_pt_train, y_pt_train)

res = export_text(tennis_decision_tree, feature_names=['Outlook', 'Temperature', 'Humidity', 'Wind'])
print(res)

In [None]:
# Memprediksi hasil dari model
y_pt_pred = tennis_decision_tree.predict(X_pt_test)

# Mengevaluasi hasil prediksi
# Menggunakan metric Accuracy
print("Evaluasi hasil prediksi menggunakan metric Accuracy: {}".format(accuracy_score(y_pt_test, y_pt_pred)))

# Menggunakan metric F1
print("Evaluasi hasil prediksi menggunakan metric F1: {}".format(f1_score(y_pt_test, y_pt_pred)))

### Id3Estimator

In [None]:
pass

#### 1. Dataset Breast Cancer

#### 2. Dataset Play Tennis

In [None]:
from id3 import export_text

tennis_id3_estimator = Id3Estimator()
tennis_id3_estimator.fit(X_pt_train, y_pt_train)

res = export_text(tennis_id3_estimator.tree_, feature_names=['Outlook', 'Temperature', 'Humidity', 'Wind'])
print(res)

In [None]:
# Memprediksi hasil dari model
y_pt_pred = tennis_id3_estimator.predict(X_pt_test)

# Mengevaluasi hasil prediksi
# Menggunakan metric Accuracy
print("Evaluasi hasil prediksi menggunakan metric Accuracy: {}".format(accuracy_score(y_pt_test, y_pt_pred)))

# Menggunakan metric F1
print("Evaluasi hasil prediksi menggunakan metric F1: {}".format(f1_score(y_pt_test, y_pt_pred)))

### K Means

In [None]:
pass

#### 1. Dataset Breast Cancer

#### 2. Dataset Play Tennis

In [None]:
tennis_k_means = KMeans(random_state=42)
tennis_k_means.fit(X_pt_train)

# res = export_text(tennis_k_means, feature_names=['Outlook', 'Temperature', 'Humidity', 'Wind'])
# print(res)

In [None]:
# # Memprediksi hasil dari model
# y_pt_pred = tennis_k_means.predict(X_pt_test)

# # Mengevaluasi hasil prediksi
# # Menggunakan metric Accuracy
# print("Evaluasi hasil prediksi menggunakan metric Accuracy: {}".format(accuracy_score(y_pt_test, y_pt_pred)))

# # Menggunakan metric F1
# print("Evaluasi hasil prediksi menggunakan metric F1: {}".format(f1_score(y_pt_test, y_pt_pred)))

### LogisticRegression

In [None]:
pass

#### 1. Dataset Breast Cancer

In [None]:
bc_log_regression = LogisticRegression(max_iter=10000)
bc_log_regression.fit(X_bc_train, y_bc_train)

# res = export_text(bc_log_regression, feature_names=['Outlook', 'Temperature', 'Humidity', 'Wind'])
# print(res)

In [None]:
# Memprediksi hasil dari model
y_bc_pred = bc_log_regression.predict(X_bc_test)

# Mengevaluasi hasil prediksi
# Menggunakan metric Accuracy
print("Evaluasi hasil prediksi menggunakan metric Accuracy: {}".format(accuracy_score(y_bc_test, y_bc_pred)))

# Menggunakan metric F1
print("Evaluasi hasil prediksi menggunakan metric F1: {}".format(f1_score(y_bc_test, y_bc_pred)))

#### 2. Dataset Play Tennis

### Neural Network

In [None]:
pass

#### 1. Dataset Breast Cancer

In [None]:
bc_neural_network = MLPClassifier(random_state=42)
bc_neural_network.fit(X_bc_train, y_bc_train)

In [None]:
y_bc_pred = bc_neural_network.predict(X_bc_test)

# Mengevaluasi hasil prediksi
# Menggunakan metric Accuracy
print("Evaluasi hasil prediksi menggunakan metric Accuracy: {}".format(accuracy_score(y_bc_test, y_bc_pred)))

# Menggunakan metric F1
print("Evaluasi hasil prediksi menggunakan metric F1: {}".format(f1_score(y_bc_test, y_bc_pred)))

#### 2. Dataset Play Tennis

### SVM

In [None]:
pass

#### 1. Dataset Breast Cancer

In [None]:
bc_clf = make_pipeline(StandardScaler(), SVC(gamma='auto'))
bc_clf.fit(X_bc_train, y_bc_train)

In [None]:
y_bc_pred = bc_clf.predict(X_bc_test)

# Mengevaluasi hasil prediksi
# Menggunakan metric Accuracy
print("Evaluasi hasil prediksi menggunakan metric Accuracy: {}".format(accuracy_score(y_bc_test, y_bc_pred)))

# Menggunakan metric F1
print("Evaluasi hasil prediksi menggunakan metric F1: {}".format(f1_score(y_bc_test, y_bc_pred)))

#### 2. Dataset Play Tennis

Pembagian Tugas
- Raihan Astrada (1,2,3 dataset breast cancer & 4,5,6 dataset play tennis)
- Daffa Ananda (4,5,6 dataset breast cancer & 1,2,3 dataset play tennis)