In [59]:
import numpy as np
import pandas as pd
from decisiontree import *

In [60]:
df = pd.read_csv('tenni.txt', delimiter="\t", header=None, names=['a', 'b', 'c', 'd', 'e'])

In [61]:
df

Unnamed: 0,a,b,c,d,e
1,Sunny,Hot,High,Weak,No
2,Sunny,Hot,High,Strong,No
3,Overcast,Hot,High,Weak,Yes
4,Rain,Mild,High,Weak,Yes
5,Rain,Cool,Normal,Weak,Yes
6,Rain,Cool,Normal,Strong,No
7,Overcast,Cool,Normal,Strong,Yes
8,Sunny,Mild,High,Weak,No
9,Sunny,Cool,Normal,Weak,Yes
10,Rain,Mild,Normal,Weak,Yes


In [62]:
dt = DecisionTree(max_depth=3)

In [63]:
dt.probability(df, 'a')

{'Sunny': 0.35714285714285715,
 'Overcast': 0.2857142857142857,
 'Rain': 0.35714285714285715}

In [64]:
dt.conditional_prob(df, 'a', 'e', 'Overcast')

{'Yes': 1.0}

In [65]:
dt.info_gain(df, 'a', 'e')

0.24674981977443933

In [66]:
dt.max_info_gain(df, 'e', [['a', 'Rain'],['d', 'Weak']])

(0.0, 'b')

In [67]:
df

Unnamed: 0,a,b,c,d,e
1,Sunny,Hot,High,Weak,No
2,Sunny,Hot,High,Strong,No
3,Overcast,Hot,High,Weak,Yes
4,Rain,Mild,High,Weak,Yes
5,Rain,Cool,Normal,Weak,Yes
6,Rain,Cool,Normal,Strong,No
7,Overcast,Cool,Normal,Strong,Yes
8,Sunny,Mild,High,Weak,No
9,Sunny,Cool,Normal,Weak,Yes
10,Rain,Mild,Normal,Weak,Yes


In [68]:
dt.fit(df, 'e')

In [69]:
dt.items_level_order()

[[Node(a),
  [('Sunny', Node(c)), ('Overcast', Node({'Yes': 1.0})), ('Rain', Node(d))]],
 [Node(c), [('High', Node({'No': 1.0})), ('Normal', Node({'Yes': 1.0}))]],
 [Node({'Yes': 1.0}), []],
 [Node(d), [('Weak', Node({'Yes': 1.0})), ('Strong', Node({'No': 1.0}))]],
 [Node({'No': 1.0}), []],
 [Node({'Yes': 1.0}), []],
 [Node({'Yes': 1.0}), []],
 [Node({'No': 1.0}), []]]

In [70]:
for row in dt.show_tree():
    print(row)

[Node(a)]
[[('Sunny', Node(c)), ('Overcast', Node({'Yes': 1.0})), ('Rain', Node(d))]]
[Node(c), Node({'Yes': 1.0}), Node(d)]
[[('High', Node({'No': 1.0})), ('Normal', Node({'Yes': 1.0}))], [], [('Weak', Node({'Yes': 1.0})), ('Strong', Node({'No': 1.0}))]]
[Node({'No': 1.0}), Node({'Yes': 1.0}), Node({'Yes': 1.0}), Node({'No': 1.0})]


In [71]:
dt.size

8

In [72]:
dt.height()

2

In [73]:
df = pd.read_csv('lenses.csv',  delimiter="\s+", header=None, names=['index','age', 'prescription', 'astigmatic','tear production rate', 'target'])
df = df.drop(columns='index')


In [74]:
df

Unnamed: 0,age,prescription,astigmatic,tear production rate,target
0,1,1,1,1,3
1,1,1,1,2,2
2,1,1,2,1,3
3,1,1,2,2,1
4,1,2,1,1,3
5,1,2,1,2,2
6,1,2,2,1,3
7,1,2,2,2,1
8,2,1,1,1,3
9,2,1,1,2,2


In [75]:
dt = DecisionTree(max_depth=3)
dt.fit(df, 'target')

In [76]:
dt.items_level_order()

[[Node(tear production rate), [(1, Node({3: 1.0})), (2, Node(astigmatic))]],
 [Node({3: 1.0}), []],
 [Node(astigmatic), [(1, Node(age)), (2, Node(prescription))]],
 [Node(age),
  [(1, Node({2: 1.0})), (2, Node({2: 1.0})), (3, Node(prescription))]],
 [Node(prescription), [(1, Node({1: 1.0})), (2, Node(age))]],
 [Node({2: 1.0}), []],
 [Node({2: 1.0}), []],
 [Node(prescription), [(1, Node({3: 1.0})), (2, Node({2: 1.0}))]],
 [Node({1: 1.0}), []],
 [Node(age), [(1, Node({1: 1.0})), (2, Node({3: 1.0})), (3, Node({3: 1.0}))]],
 [Node({3: 1.0}), []],
 [Node({2: 1.0}), []],
 [Node({1: 1.0}), []],
 [Node({3: 1.0}), []],
 [Node({3: 1.0}), []]]

In [77]:
for row in dt.show_tree():
    print(row)

[Node(tear production rate)]
[[(1, Node({3: 1.0})), (2, Node(astigmatic))]]
[Node({3: 1.0}), Node(astigmatic)]
[[], [(1, Node(age)), (2, Node(prescription))]]
[Node(age), Node(prescription)]
[[(1, Node({2: 1.0})), (2, Node({2: 1.0})), (3, Node(prescription))], [(1, Node({1: 1.0})), (2, Node(age))]]
[Node({2: 1.0}), Node({2: 1.0}), Node(prescription), Node({1: 1.0}), Node(age)]
[[], [], [(1, Node({3: 1.0})), (2, Node({2: 1.0}))], [], [(1, Node({1: 1.0})), (2, Node({3: 1.0})), (3, Node({3: 1.0}))]]
[Node({3: 1.0}), Node({2: 1.0}), Node({1: 1.0}), Node({3: 1.0}), Node({3: 1.0})]


In [78]:
h = dt.root.height()
items = [[] for i in range(h)]

In [79]:
items

[[], [], [], []]

In [80]:
df = pd.DataFrame(np.array([['ab', 'ab', 'ac'],['bc', 'ab', 'bc'],['No', 'Yes', 'Yes']]))

In [81]:
df

Unnamed: 0,0,1,2
0,ab,ab,ac
1,bc,ab,bc
2,No,Yes,Yes


In [82]:
df = df.transpose()

In [83]:
df.

SyntaxError: invalid syntax (<ipython-input-83-38e9e33c87a8>, line 1)

In [84]:
dt.fit(df, 2)

In [85]:
for row in dt.show_tree():
    print(row)

[Node(0)]
[[('ab', Node(1)), ('ac', Node({'Yes': 1.0}))]]
[Node(1), Node({'Yes': 1.0})]
[[('bc', Node({'No': 1.0})), ('ab', Node({'Yes': 1.0}))], []]
[Node({'No': 1.0}), Node({'Yes': 1.0})]


In [123]:
#Import scikit-learn dataset library
from sklearn import datasets

#Load dataset
iris = datasets.load_iris()

In [124]:
df = pd.DataFrame(iris.data)

In [125]:
df.columns = iris.feature_names

In [126]:
df['Target'] = iris.target

In [132]:
df = np.floor(df)

In [133]:
dt = DecisionTree(5)
dt.fit(df, 'Target')

In [134]:
dt.size

21

In [135]:
dt.height()

4

In [136]:
for row in dt.show_tree():
    print(row)

[Node(petal length (cm))]
[[(1.0, Node({0.0: 1.0})), (4.0, Node(sepal length (cm))), (3.0, Node({1.0: 1.0})), (5.0, Node(petal width (cm))), (6.0, Node({2.0: 1.0}))]]
[Node({0.0: 1.0}), Node(sepal length (cm)), Node({1.0: 1.0}), Node(petal width (cm)), Node({2.0: 1.0})]
[[], [(7.0, Node({1.0: 1.0})), (6.0, Node(sepal width (cm))), (5.0, Node(petal width (cm))), (4.0, Node({2.0: 1.0}))], [], [(1.0, Node(sepal length (cm))), (2.0, Node({2.0: 1.0}))], []]
[Node({1.0: 1.0}), Node(sepal width (cm)), Node(petal width (cm)), Node({2.0: 1.0}), Node(sepal length (cm)), Node({2.0: 1.0})]
[[], [(3.0, Node({1.0: 0.8, 2.0: 0.2})), (2.0, Node({1.0: 0.8666666666666667, 2.0: 0.13333333333333333}))], [(1.0, Node({1.0: 1.0})), (2.0, Node({2.0: 1.0}))], [], [(6.0, Node(sepal width (cm))), (5.0, Node({2.0: 1.0})), (7.0, Node({2.0: 1.0}))], []]
[Node({1.0: 0.8, 2.0: 0.2}), Node({1.0: 0.8666666666666667, 2.0: 0.13333333333333333}), Node({1.0: 1.0}), Node({2.0: 1.0}), Node(sepal width (cm)), Node({2.0: 1.0})