### Importing the Libraries

In [1]:
import numpy as np
import pandas as pd

### Model Definition

In [2]:
class DecisionTree:
    class Node:
        def __init__(self, leaf=False, prop=None, left=None, right=None, val=None, gini=None):
            self.leaf = leaf
            self.prop = prop
            self.left = left
            self.right = right
            self.val = val
            self.gini = gini
    
    def __init__(self):
        self.root = None
    
    def fit(self, X, Y):
        self.root = self.buildrec(X, Y)
        
    def gini(self, Y):
        n = len(Y)
        counts = Y.value_counts()
        val = 1.0
        for c in counts.values:
            tmp = c / n
            val -= tmp ** 2
        
        return val
    
    def buildrec(self, X, Y):
        uniques = Y.unique()
        if len(uniques) == 1:
            return self.Node(leaf=True, val=uniques[0], gini=0.0)
        
        least_feature = None
        least_split = None
        least_gini = None
        
        for feature in X.columns:
            gini = 0.0
            vals = X[feature].unique()
            vals = np.sort(vals)
            n = len(vals)
            
            for i in range(n-1):
                splitval = (vals[i+1] + vals[i]) / 2
                
                ind = X[feature] <= splitval
                not_ind = np.invert(ind)
                
                YT_, YF_ = Y[ind], Y[not_ind]
                
                gini = (len(YT_) / len(Y)) * self.gini(YT_) + (len(YF_) / len(Y)) * self.gini(YF_)

                if least_feature is None or gini < least_gini:
                    least_feature = feature
                    least_split = splitval
                    least_gini = gini
        
        ind = X[least_feature] <= least_split
        not_ind = np.invert(ind)
        XT_, XF_ = X[ind], X[not_ind]
        YT_, YF_ = Y[ind], Y[not_ind]
            
        path = []
        left = self.buildrec(XT_, YT_)
        right = self.buildrec(XF_, YF_)
            
        return self.Node(leaf=False, prop=least_feature, left=left, right=right, val=least_split, gini=self.gini(Y))
    
    def predict(self, X):
        Y_pred = pd.Series()
        
        for i, x in X.iterrows():
            curr = self.root
            
            while curr.leaf is False:
                if x[curr.prop] <= curr.val:
                    curr = curr.left
                else:
                    curr = curr.right
            
            Y_pred.loc[i] = curr.val 
        
        return Y_pred
    
    def printtree(self):
        self.printrec(self.root, 0)
        
    def printrec(self, root, level):
        print('   ' * level, end='')
        if root.leaf is True:
            print(root.val)
        else:
            print('%s <= %.2f (Gini: %.2f):' % (root.prop, root.val, root.gini))
            print('   ' * (level+1) + 'True' + ':')
            self.printrec(root.left, level+2)
            print('   ' * (level+1) + 'False' + ':')
            self.printrec(root.right, level+2) 

### Dataset Loading

In [3]:
df = pd.read_csv('dataset.csv')

df.head()

Unnamed: 0,Outlook,Temp.,Humidity,Wind,Decision
0,Sunny,Hot,High,Weak,No
1,Sunny,Hot,High,Strong,No
2,Overcast,Hot,High,Weak,Yes
3,Rain,Mild,High,Weak,Yes
4,Rain,Cool,Normal,Weak,Yes


### Preparing the Dataset for Training

In [4]:
df_copy = df.copy()

In [5]:
mappers = {}
rev_mappers = {}
for col in df_copy.columns:
    if df[col].dtype == 'object':
        mapper = {}
        rev_mapper = {}
        for i, unq in enumerate(df_copy[col].unique()):
            mapper[unq] = i
            rev_mapper[i] = unq
        df_copy[col] = df_copy[col].map(mapper)
        mappers[col] = mapper
        rev_mappers[col] = rev_mapper
    
df_copy.head()

Unnamed: 0,Outlook,Temp.,Humidity,Wind,Decision
0,0,0,0,0,0
1,0,0,0,1,0
2,1,0,0,0,1
3,2,1,0,0,1
4,2,2,1,0,1


In [6]:
X_train, Y_train = df_copy.drop('Decision', axis=1), df_copy['Decision']

### Train the model and Display the tree

In [7]:
model = DecisionTree()

model.fit(X_train, Y_train)

In [8]:
model.printtree()

Humidity <= 0.50 (Gini: 0.46):
   True:
      Outlook <= 0.50 (Gini: 0.49):
         True:
            0
         False:
            Outlook <= 1.50 (Gini: 0.38):
               True:
                  1
               False:
                  Wind <= 0.50 (Gini: 0.50):
                     True:
                        1
                     False:
                        0
   False:
      Outlook <= 1.50 (Gini: 0.24):
         True:
            1
         False:
            Wind <= 0.50 (Gini: 0.44):
               True:
                  1
               False:
                  0


### Define the Testing Dataset

In [9]:
X_test = pd.DataFrame({'Outlook': ['Sunny', 'Overcast'],
                       'Temp.': ['Mild', 'Cool'],
                       'Humidity': ['High', 'Normal'],
                       'Wind': ['Weak', 'Strong']})

X_test

Unnamed: 0,Outlook,Temp.,Humidity,Wind
0,Sunny,Mild,High,Weak
1,Overcast,Cool,Normal,Strong


In [10]:
X_test_num = X_test.copy()

In [11]:
for col in X_test_num:
    X_test_num[col] = X_test_num[col].map(mappers[col])

In [12]:
X_test_num

Unnamed: 0,Outlook,Temp.,Humidity,Wind
0,0,1,0,0
1,1,2,1,1


### Predict the Outcomes

In [13]:
Y_pred = model.predict(X_test_num)

In [14]:
print('Predictions by the Decision Tree Classifier:')
X_test.assign(Decision=Y_pred.map(rev_mappers['Decision']))

Predictions by the Decision Tree Classifier:


Unnamed: 0,Outlook,Temp.,Humidity,Wind,Decision
0,Sunny,Mild,High,Weak,No
1,Overcast,Cool,Normal,Strong,Yes
