# Decision Tree classifier
## Created By :Nitesh Sukhwani

In [33]:
'''Importing Required Libraries'''

import numpy as np
import pandas as pd
import math

In [34]:
'''Loading the dataset'''
data = pd.read_csv('weather.csv')

In [35]:
'''Checking the Data'''
data.dropna(inplace=True)
data.head()

Unnamed: 0,outlook,temp,humidity,windy,play
1,sunny,hot,high,False,no
3,sunny,hot,high,True,no
5,overcast,hot,high,False,yes
7,rainy,mild,high,False,yes
9,rainy,cool,normal,False,yes


In [36]:
'''Calculating Entropy for Entire data
data = data for which entropy need to be calculated
target = target variable based on which entropy need to calculated
entropy = entropy of data'''

def Entropy(data,target='play'):
    entropy = 0
    val = data[target].unique()
    for value in val:
        fraction = data[target].value_counts()[value]/len(data)
        entropy+=(-1)*fraction*math.log(fraction,2)
    return entropy

In [37]:
'''Calculating Gini for Entire data
data = data for which entropy need to be calculated
target = target variable based on which entropy need to calculated
entropy = entropy of data'''

def Gini(data,target='play'):
    gini = 0
    val = data[target].unique()
    for value in val:
        fraction = data[target].value_counts()[value]/len(data)
        gini+=(fraction**2)
    return (1-gini)

In [38]:
'''Calculating Entropy of every attribute and find the best spillint attribute
    data : data for which best spillting attribute need to find
    col : columns of dataset
    target : target variable
    mname : best splitting attribute
'''

def Entropy_all(data,col,target='play'):
    mval = 0
    tot_entropy = Entropy(data,target)
    for cname in col:
        val = data[cname].unique()
        entropy=0
        for value in val:
            tmp = data[data[cname]==value]
            entropy += Entropy(tmp)*len(tmp)/len(data)
        info_gain = tot_entropy-entropy
        if info_gain>mval:
            mname = cname
            mval=info_gain
       # print('entropy of',cname ,'=',a)
    return mname

In [39]:
'''Calculating Gini index of every attribute and find the best spillint attribute
    data : data for which best spillting attribute need to find
    col : columns of dataset
    target : target variable
    mname : best splitting attribute
'''

def Gini_all(data,col,target='play'):
    mval = 0
    tot_gini = Gini(data,target)
    for cname in col:
        val = data[cname].unique()
        gini=0
        for value in val:
            tmp = data[data[cname]==value]
            gini += Gini(tmp)*len(tmp)/len(data)
        info_gain = tot_gini-gini
        if info_gain>mval:
            mname = cname
            mval=info_gain
       # print('entropy of',cname ,'=',a)
    return mname

In [40]:
'''Finding subset of data after splitting 
    data = data which get splitted
    mname = attribute name based on which splitting has to done
    value = splitting attribute value'''

def sub_data(data,mname,value):
    data = data[data[mname]==value].reset_index(drop=True)
    return data

In [41]:
'''Give decision tree classifier based on data
    data = data which need to get trained
    target =  target variable
    tree = output tree in the form of dictionary
    method = for split which method to used gini index or entropy'''


def decision_tree(data,target,tree=None,method='Entropy'):
    col = list(data.columns)
    col.remove(target)
    if method=='gini' or method=='Gini':
        root = Gini_all(data,col,target)
    else:
        root = Entropy_all(data,col,target)
    Values = np.unique(data[root])
    if tree is None:
        tree={}
        tree[root] = {}
    for val in Values:
        SubData = sub_data(data,root,val)
        val2,counts = np.unique(SubData[target],return_counts=True) 
        if len(counts)==1:
            tree[root][val] = val2[0]                                                    
        else:
            tree[root][val] = decision_tree(SubData,target)
    return tree

In [42]:
'''Give prediction on single data
d = instance of the data
tree = the tree made from build tree function
output : print the predicted value '''

def single_pred(d,tree):
    for key in tree:
        key = str(key)
        val = d[key]
        tree = tree[key][val]
        #print(tree,key,val)
        if type(tree)!=dict:
            pred = str(tree)
            #print(output)
            return pred
        pred = single_pred(d,tree)
        return pred

'''give prediction on entire data
data: data on which prediction need to found
tree = tree build using decision tree
output :  print the prediction as output
'''

def predict(data,tree):
    n = len(data)
    output = []
    for i in range(n):
        tree2 = tree
        d = data.iloc[i]
        res = single_pred(d,tree2)
        output.append(res)
    return output

In [43]:
tree = decision_tree(data,'play',method='gini')

In [44]:
x = predict(data,tree)
x

['no',
 'no',
 'yes',
 'yes',
 'yes',
 'no',
 'yes',
 'no',
 'yes',
 'yes',
 'yes',
 'yes',
 'yes',
 'no']