## Decision Tree using Scikit

In [26]:
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree.export import export_text

In [7]:
df = pd.read_csv('Iris.csv')
df = df.drop("Id", axis=1)
df = df.rename(columns={"species": "label"})

In [28]:
# Train test split
def train_test_split(df, test_size=0.8, random_state=None):
    train_df = df.sample(frac=test_size, random_state=random_state)
    test_df = df[~df.index.isin(train_df.index)]
    return train_df.sort_index(), test_df.sort_index()

In [29]:
train_df, test_df = train_test_split(df, 0.8, 100)

In [8]:
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,label
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [10]:
decision_tree = DecisionTreeClassifier(random_state=0, max_depth=3)

In [16]:
decision_tree = decision_tree.fit(train_df.iloc[:, :-1], train_df.iloc[:, -1])

In [22]:
formated_tree = export_text(decision_tree, feature_names=df.iloc[:, :-1].columns.tolist())

In [24]:
print(formated_tree)

|--- petal_width <= 0.80
|   |--- class: Iris-setosa
|--- petal_width >  0.80
|   |--- petal_width <= 1.75
|   |   |--- petal_length <= 4.95
|   |   |   |--- class: Iris-versicolor
|   |   |--- petal_length >  4.95
|   |   |   |--- class: Iris-virginica
|   |--- petal_width >  1.75
|   |   |--- petal_length <= 4.85
|   |   |   |--- class: Iris-virginica
|   |   |--- petal_length >  4.85
|   |   |   |--- class: Iris-virginica



### Evaluate

In [31]:
decision_tree.score(test_df.iloc[:, :-1], test_df.iloc[:, -1]) * 100

100.0