# Program to Implement Decision Tree

## Import Libraries

In [2]:
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier

## Importing Dataset

In [11]:
def importData():
    balance_data = pd.read_csv('http://archive.ics.uci.edu/ml/machine-learning-databases/balance-scale/balance-scale.data', 
                              sep=',', header = None)
    print("Dataset Length: ", len(balance_data))
    print("Dataset Shape: ", balance_data.shape)
    
    print("Dataset: ", balance_data.head())
    
    return balance_data

## Function to split the dataset

In [13]:
def splitdataset(balance_data):
    
    #separating target variables
    Y = balance_data.values[:, 0]
    X = balance_data.values[:, 1:5]
    
    #splitting into training set and test set
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=100)
    
    return X, Y, X_train, X_test, Y_train, Y_test

## Function to perform training with gini Index

In [15]:
def train_using_gini(X_train, Y_train):
    #classification Object
    clf_gini = DecisionTreeClassifier(criterion="gini",
                                     random_state = 100,
                                     max_depth = 3,
                                     min_samples_leaf = 5)
    
    #Perform Training
    clf_gini.fit(X_train, Y_train)
    return clf_gini    

## Function to perform training with entropy

In [16]:
def train_using_entropy(X_train, Y_train):
    clf_entropy = DecisionTreeClassifier(criterion="entropy",
                                     random_state = 100,
                                     max_depth = 3,
                                     min_samples_leaf = 5)
    #Perform Training
    clf_entropy.fit(X_train, Y_train)
    return clf_entropy

## Function to make Prediction

In [7]:
def prediction(X_test, clf_object):
    y_pred = clf_object.predict(X_test)
    print("Predicted Values: ")
    print(y_pred)
    return y_pred

## Function for Calculating Accuracy

In [8]:
def cal_accuracy(Y_test, y_pred):
    print("Confusion Matrix: ", confusion_matrix(Y_test, y_pred))
    print("Accuracy: ", accuracy_score(Y_test, y_pred)*100)
    print("Report: ", classification_report(Y_test, y_pred))

## Driver Code

In [18]:
def main():
    # Building
    data = importData()
    X, Y, X_train, X_test, Y_train, Y_test = splitdataset(data)
    clf_gini = train_using_gini(X_train, Y_train)
    clf_entropy = train_using_entropy(X_train, Y_train)
    
    #Operational Phase
    
    #Predition using Gini Index
    print("Results Using Gini Index:")
    y_pred_gini = prediction(X_test, clf_gini)
    cal_accuracy(Y_test, y_pred_gini)
    
    #Prediction using Entropy
    print("Results Using Entropy Index:")
    y_pred_entropy = prediction(X_test, clf_entropy)
    cal_accuracy(Y_test, y_pred_entropy)

## Executing Main Function

In [17]:
if __name__ == "__main__":
    main()

Dataset Length:  625
Dataset Shape:  (625, 5)
Dataset:     0  1  2  3  4
0  B  1  1  1  1
1  R  1  1  1  2
2  R  1  1  1  3
3  R  1  1  1  4
4  R  1  1  1  5
Results Using Gini Index:
Predicted Values: 
['R' 'L' 'R' 'R' 'R' 'L' 'R' 'L' 'L' 'L' 'R' 'L' 'L' 'L' 'R' 'L' 'R' 'L'
 'L' 'R' 'L' 'R' 'L' 'L' 'R' 'L' 'L' 'L' 'R' 'L' 'L' 'L' 'R' 'L' 'L' 'L'
 'L' 'R' 'L' 'L' 'R' 'L' 'R' 'L' 'R' 'R' 'L' 'L' 'R' 'L' 'R' 'R' 'L' 'R'
 'R' 'L' 'R' 'R' 'L' 'L' 'R' 'R' 'L' 'L' 'L' 'L' 'L' 'R' 'R' 'L' 'L' 'R'
 'R' 'L' 'R' 'L' 'R' 'R' 'R' 'L' 'R' 'L' 'L' 'L' 'L' 'R' 'R' 'L' 'R' 'L'
 'R' 'R' 'L' 'L' 'L' 'R' 'R' 'L' 'L' 'L' 'R' 'L' 'R' 'R' 'R' 'R' 'R' 'R'
 'R' 'L' 'R' 'L' 'R' 'R' 'L' 'R' 'R' 'R' 'R' 'R' 'L' 'R' 'L' 'L' 'L' 'L'
 'L' 'L' 'L' 'R' 'R' 'R' 'R' 'L' 'R' 'R' 'R' 'L' 'L' 'R' 'L' 'R' 'L' 'R'
 'L' 'L' 'R' 'L' 'L' 'R' 'L' 'R' 'L' 'R' 'R' 'R' 'L' 'R' 'R' 'R' 'R' 'R'
 'L' 'L' 'R' 'R' 'R' 'R' 'L' 'R' 'R' 'R' 'L' 'R' 'L' 'L' 'L' 'L' 'R' 'R'
 'L' 'R' 'R' 'L' 'L' 'R' 'R' 'R']
Confusion Matrix:  [[ 0  6  7]
 [

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
