# Decision Tree Learning

### Member: 
- [Febriawan Ghally Ar Rahman (1359111)](https://github.com/ghallyy)
- [Mgs. Tabrani (13519122)](https://github.com/mgstabrani)

### Content
1. [DecisionTreeClassifier](http://scikit-learn.org/stable/modules/tree.html)
2. [Id3Estimator](https://github.com/svaante/decision-tree-id3)
3. [K Means](https://scikit-learn.org/0.19/modules/generated/sklearn.cluster.KMeans.html)
4. [LogisticRegression](https://scikitlearn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html)
5. [Neural_network](https://scikitlearn.org/stable/modules/generated/sklearn.neural_network.MLPClassifier.html)
6. [SVM](https://scikitlearn.org/stable/modules/generated/sklearn.svm.SVC.html#sklearn.svm.SVC)

## Load Datasets

In [332]:
import pandas as pd
from sklearn import datasets

# Load breast cancer dataset
breast_cancer = datasets.load_breast_cancer()
X_breast_cancer, y_breast_cancer = datasets.load_breast_cancer(return_X_y=True)

# Load play tennis dataset
df_play_tennis = pd.read_csv('data/play_tennis.csv')
df_play_tennis = df_play_tennis.drop(['day'],axis=1)

## Encode Categorical Data

In [333]:
# Encode categorical data in play tennis dataframe
from sklearn import preprocessing
df_play_tennis = df_play_tennis.apply(preprocessing.LabelEncoder().fit_transform)

# Divide play tennis dataframe to data and target
dataset_play_tennis = df_play_tennis.to_numpy()
X_play_tennis = []
y_play_tennis = []
for i in range(len(dataset_play_tennis)):
    X_play_tennis.append(dataset_play_tennis[i][:-1])
    y_play_tennis.append(dataset_play_tennis[i][-1])

## Split Datasets

In [334]:
# Split dataset to 80% training data and 20% testing data
from sklearn.model_selection import train_test_split

# Split breast cancer dataset
X_training_breast_cancer, X_testing_breast_cancer = train_test_split(X_breast_cancer, test_size=0.2, random_state=25)
y_training_breast_cancer, y_testing_breast_cancer = train_test_split(y_breast_cancer, test_size=0.2, random_state=25)

# Split play tennis dataset
X_training_play_tennis, X_testing_play_tennis = train_test_split(X_play_tennis, test_size=0.2, random_state=25)
y_training_play_tennis, y_testing_play_tennis = train_test_split(y_play_tennis, test_size=0.2, random_state=25)

## Learning with Logistic Regression Algorithm

In [335]:
from sklearn.linear_model import LogisticRegression
from sklearn import metrics

### Breast cancer

In [336]:
clf_breast_cancer = LogisticRegression(random_state=0, max_iter=10000).fit(X_training_breast_cancer, y_training_breast_cancer)
y_predict = clf_breast_cancer.predict(X_testing_breast_cancer)
clf_breast_cancer.score(X_testing_breast_cancer, y_testing_breast_cancer)

0.9298245614035088

#### Metrics Evaluation

In [337]:
accuracy_score = metrics.accuracy_score(y_testing_breast_cancer, y_predict)
f1_score = metrics.f1_score(y_testing_breast_cancer, y_predict)

print('Accuracy score =', accuracy_score)
print('F1 score =', f1_score)

Accuracy score = 0.9298245614035088
F1 score = 0.9466666666666667


### Play tennis

In [338]:
clf_play_tennis = LogisticRegression().fit(X_training_play_tennis, y_training_play_tennis)
y_predict = clf_play_tennis.predict(X_testing_play_tennis)
clf_play_tennis.score(X_testing_play_tennis, y_testing_play_tennis)


1.0

#### Metrics Evaluation

In [339]:
accuracy_score = metrics.accuracy_score(y_testing_play_tennis, y_predict)
f1_score = metrics.f1_score(y_testing_play_tennis, y_predict)

print('Accuracy score =', accuracy_score)
print('F1 score =', f1_score)

Accuracy score = 1.0
F1 score = 1.0


## Learning with Neural Network Algorithm

In [340]:
from sklearn.neural_network import MLPClassifier

### Breast cancer

In [341]:
clf_breast_cancer = MLPClassifier(random_state=1, max_iter=300).fit(X_training_breast_cancer, y_training_breast_cancer)
y_predict = clf_breast_cancer.predict(X_testing_breast_cancer)
clf_breast_cancer.score(X_testing_breast_cancer, y_testing_breast_cancer)

0.956140350877193

#### Metrics Evaluation

In [342]:
accuracy_score = metrics.accuracy_score(y_testing_breast_cancer, y_predict)
f1_score = metrics.f1_score(y_testing_breast_cancer, y_predict)

print('Accuracy score =', accuracy_score)
print('F1 score =', f1_score)

Accuracy score = 0.956140350877193
F1 score = 0.9673202614379085


### Play tennis

In [343]:
clf_play_tennis = MLPClassifier(random_state=1, max_iter=1000).fit(X_training_play_tennis, y_training_play_tennis)
y_predict = clf_play_tennis.predict(X_testing_play_tennis)
clf_play_tennis.score(X_testing_play_tennis, y_testing_play_tennis)

1.0

#### Metrics Evaluation

In [344]:
accuracy_score = metrics.accuracy_score(y_testing_play_tennis, y_predict)
f1_score = metrics.f1_score(y_testing_play_tennis, y_predict)

print('Accuracy score =', accuracy_score)
print('F1 score =', f1_score)

Accuracy score = 1.0
F1 score = 1.0


## Learning with SVM algorithm

In [345]:
from sklearn.svm import SVC

### Breast cancer

In [346]:
clf_play_tennis = SVC().fit(X_training_breast_cancer, y_training_breast_cancer)
y_predict = clf_play_tennis.predict(X_testing_breast_cancer)
clf_play_tennis.score(X_testing_breast_cancer, y_testing_breast_cancer)

0.9122807017543859

#### Metrics Evaluation

In [347]:
accuracy_score = metrics.accuracy_score(y_testing_breast_cancer, y_predict)
f1_score = metrics.f1_score(y_testing_breast_cancer, y_predict)

print('Accuracy score =', accuracy_score)
print('F1 score =', f1_score)

Accuracy score = 0.9122807017543859
F1 score = 0.9367088607594936


### Play tennis

In [348]:
clf_play_tennis = SVC().fit(X_training_play_tennis, y_training_play_tennis)
y_predict = clf_play_tennis.predict(X_testing_play_tennis)
clf_play_tennis.score(X_testing_play_tennis, y_testing_play_tennis)

0.6666666666666666

#### Metrics Evaluation

In [349]:
accuracy_score = metrics.accuracy_score(y_testing_play_tennis, y_predict)
f1_score = metrics.f1_score(y_testing_play_tennis, y_predict)

print('Accuracy score =', accuracy_score)
print('F1 score =', f1_score)

Accuracy score = 0.6666666666666666
F1 score = 0.6666666666666666
