In [2]:
import numpy as np
import pandas as pd
from decisiontree import *

In [3]:
df = pd.read_csv('tenni.txt', delimiter="\t", header=None, names=['Weather', 'Temp', 'Humidity', 'Wind', 'e'])

In [4]:
df.to_csv('tennis.csv')

In [5]:
df

Unnamed: 0,Weather,Temp,Humidity,Wind,e
1,Sunny,Hot,High,Weak,No
2,Sunny,Hot,High,Strong,No
3,Overcast,Hot,High,Weak,Yes
4,Rain,Mild,High,Weak,Yes
5,Rain,Cool,Normal,Weak,Yes
6,Rain,Cool,Normal,Strong,No
7,Overcast,Cool,Normal,Strong,Yes
8,Sunny,Mild,High,Weak,No
9,Sunny,Cool,Normal,Weak,Yes
10,Rain,Mild,Normal,Weak,Yes


In [6]:
dt = DecisionTree(max_depth=3)

In [7]:
dt.fit(df, 'e')

In [8]:
# dt.items_level_order()

In [9]:
for row in dt.show_tree():
    print(row)

[Weather]
[[('Sunny', Humidity, 'Weather'), ('Overcast', {'Yes': 1.0}, 'Weather'), ('Rain', Wind, 'Weather')]]
[Humidity, {'Yes': 1.0}, Wind]
[[('High', {'No': 1.0}, 'Humidity'), ('Normal', {'Yes': 1.0}, 'Humidity')], [('Weak', {'Yes': 1.0}, 'Wind'), ('Strong', {'No': 1.0}, 'Wind')]]
[{'No': 1.0}, {'Yes': 1.0}, {'Yes': 1.0}, {'No': 1.0}]


In [10]:
for row in dt.re_order_show_index_for_dot(dt.show_tree()):
    print(row)

['N', 0, Weather]
['N', 1, Humidity]
['L', 0, 1, 'Sunny']
['N', 2, {'Yes': 1.0}]
['L', 0, 2, 'Overcast']
['N', 3, Wind]
['L', 0, 3, 'Rain']
['N', 4, {'No': 1.0}]
['L', 1, 4, 'High']
['N', 5, {'Yes': 1.0}]
['L', 1, 5, 'Normal']
['N', 6, {'Yes': 1.0}]
['L', 3, 6, 'Weak']
['N', 7, {'No': 1.0}]
['L', 3, 7, 'Strong']


In [11]:
dt.create_dot_png('my_tree')

In [12]:
dt.size

8

In [13]:
dt.height()

2

In [14]:
df = pd.read_csv('lenses.csv',  delimiter="\s+", header=None, names=['index','age', 'prescription', 'astigmatic','tear production rate', 'target'])
df = df.drop(columns='index')


In [15]:
df

Unnamed: 0,age,prescription,astigmatic,tear production rate,target
0,1,1,1,1,3
1,1,1,1,2,2
2,1,1,2,1,3
3,1,1,2,2,1
4,1,2,1,1,3
5,1,2,1,2,2
6,1,2,2,1,3
7,1,2,2,2,1
8,2,1,1,1,3
9,2,1,1,2,2


In [16]:
dt = DecisionTree(max_depth=3)
dt.fit(df, 'target')

In [17]:
dt.items_level_order()

[[tear production rate,
  [(1, {3: 1.0}, 'tear production rate'),
   (2, astigmatic, 'tear production rate')]],
 [{3: 1.0}, []],
 [astigmatic, [(1, age, 'astigmatic'), (2, prescription, 'astigmatic')]],
 [age, [(1, {2: 1.0}, 'age'), (2, {2: 1.0}, 'age'), (3, prescription, 'age')]],
 [prescription, [(1, {1: 1.0}, 'prescription'), (2, age, 'prescription')]],
 [{2: 1.0}, []],
 [{2: 1.0}, []],
 [prescription,
  [(1, {3: 1.0}, 'prescription'), (2, {2: 1.0}, 'prescription')]],
 [{1: 1.0}, []],
 [age, [(1, {1: 1.0}, 'age'), (2, {3: 1.0}, 'age'), (3, {3: 1.0}, 'age')]],
 [{3: 1.0}, []],
 [{2: 1.0}, []],
 [{1: 1.0}, []],
 [{3: 1.0}, []],
 [{3: 1.0}, []]]

In [18]:
for row in dt.show_tree():
    print(row)

[tear production rate]
[[(1, {3: 1.0}, 'tear production rate'), (2, astigmatic, 'tear production rate')]]
[{3: 1.0}, astigmatic]
[[(1, age, 'astigmatic'), (2, prescription, 'astigmatic')]]
[age, prescription]
[[(1, {2: 1.0}, 'age'), (2, {2: 1.0}, 'age'), (3, prescription, 'age')], [(1, {1: 1.0}, 'prescription'), (2, age, 'prescription')]]
[{2: 1.0}, {2: 1.0}, prescription, {1: 1.0}, age]
[[(1, {3: 1.0}, 'prescription'), (2, {2: 1.0}, 'prescription')], [(1, {1: 1.0}, 'age'), (2, {3: 1.0}, 'age'), (3, {3: 1.0}, 'age')]]
[{3: 1.0}, {2: 1.0}, {1: 1.0}, {3: 1.0}, {3: 1.0}]


In [19]:
h = dt.root.height()
items = [[] for i in range(h)]

In [20]:
items

[[], [], [], []]

In [21]:
df = pd.DataFrame(np.array([['ab', 'ab', 'ac'],['bc', 'ab', 'bc'],['No', 'Yes', 'Yes']]))

In [22]:
df

Unnamed: 0,0,1,2
0,ab,ab,ac
1,bc,ab,bc
2,No,Yes,Yes


In [23]:
df = df.transpose()

In [24]:
dt.fit(df, 2)

In [25]:
for row in dt.show_tree():
    print(row)

[0]
[[('ab', 1, 0), ('ac', {'Yes': 1.0}, 0)]]
[1, {'Yes': 1.0}]
[[('bc', {'No': 1.0}, 1), ('ab', {'Yes': 1.0}, 1)]]
[{'No': 1.0}, {'Yes': 1.0}]


In [26]:
dt.create_dot_png('simple_test_tree')

In [27]:
#Import scikit-learn dataset library
from sklearn import datasets

#Load dataset
iris = datasets.load_iris()

In [28]:
df = pd.DataFrame(iris.data)

In [29]:
list(df.columns)[0:-1]

[0, 1, 2]

In [30]:
df.columns = iris.feature_names

In [31]:
df['Target'] = iris.target

In [32]:
df = np.floor(df)

In [33]:
dt = DecisionTree(5)
dt.fit(df, 'Target')

In [34]:
dt.size

21

In [35]:
dt.height()

4

In [36]:
for row in dt.show_tree():
    print(row)

[petal length (cm)]
[[(1.0, {0.0: 1.0}, 'petal length (cm)'), (4.0, sepal length (cm), 'petal length (cm)'), (3.0, {1.0: 1.0}, 'petal length (cm)'), (5.0, petal width (cm), 'petal length (cm)'), (6.0, {2.0: 1.0}, 'petal length (cm)')]]
[{0.0: 1.0}, sepal length (cm), {1.0: 1.0}, petal width (cm), {2.0: 1.0}]
[[(7.0, {1.0: 1.0}, 'sepal length (cm)'), (6.0, sepal width (cm), 'sepal length (cm)'), (5.0, petal width (cm), 'sepal length (cm)'), (4.0, {2.0: 1.0}, 'sepal length (cm)')], [(1.0, sepal length (cm), 'petal width (cm)'), (2.0, {2.0: 1.0}, 'petal width (cm)')]]
[{1.0: 1.0}, sepal width (cm), petal width (cm), {2.0: 1.0}, sepal length (cm), {2.0: 1.0}]
[[(3.0, {1.0: 0.8, 2.0: 0.2}, 'sepal width (cm)'), (2.0, {1.0: 0.8666666666666667, 2.0: 0.13333333333333333}, 'sepal width (cm)')], [(1.0, {1.0: 1.0}, 'petal width (cm)'), (2.0, {2.0: 1.0}, 'petal width (cm)')], [(6.0, sepal width (cm), 'sepal length (cm)'), (5.0, {2.0: 1.0}, 'sepal length (cm)'), (7.0, {2.0: 1.0}, 'sepal length (cm)'

In [37]:
dt.create_dot_png('flower_tree')