# Decision Tree

In [9]:
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix,accuracy_score,classification_report
from sklearn.model_selection import train_test_split


In [10]:
def importdata():
    balance_data = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/balance-scale/balance-scale.data',sep= ',', header = None)
      
    # Printing the dataswet shape
    print ("Dataset Length: ", len(balance_data))
    print ("Dataset Shape: ", balance_data.shape)
      
    # Printing the dataset obseravtions
    print ("top 5 data in Dataset:\n ",balance_data.head(5))
    return balance_data

  

In [11]:
# Function to split the dataset
def splitdataset(balance_data):
  
    # Separating the target variable
    X = balance_data.values[:, 1:5]
    Y = balance_data.values[:, 0]
  
    # Splitting the dataset into train and test
    X_train, X_test, y_train, y_test = train_test_split( 
    X, Y, random_state = 10,test_size=0.15)
      
    return X, Y, X_train, X_test, y_train, y_test
      

In [12]:
# Function to perform training with giniIndex.
def train_using_gini(X_train, X_test, y_train):
  
    # Creating the classifier object
    clf_gini = DecisionTreeClassifier(criterion = "gini",
            random_state = 10, max_depth=3, min_samples_leaf=5)
  
    # Performing training
    clf_gini.fit(X_train, y_train)
    return clf_gini

In [13]:
# Function to perform training with entropy.
def train_using_entropy(X_train, X_test, y_train):
  
    # Decision tree with entropy
    clf_entropy = DecisionTreeClassifier(
            criterion = "entropy", random_state = 10,
            max_depth = 3, min_samples_leaf = 5)
  
    # Performing training
    clf_entropy.fit(X_train, y_train)
    return clf_entropy
  

In [14]:
# Function to make predictions
def prediction(X_test, clf_object):
  
    # Predicton on test with giniIndex
    y_pred = clf_object.predict(X_test)
    print("Predicted values:")
    print(y_pred)
    return y_pred
 

In [15]:
# Function to calculate accuracy
def cal_accuracy(y_test, y_pred): # here ytest - actual output,ypred - predicted output
      
    print("Confusion Matrix:\n\n----------------\n ",
        confusion_matrix(y_test, y_pred))
      
    print ("Accuracy score: \n-----------------\n",
    accuracy_score(y_test,y_pred)*100)
      
    print("f1 score Report : \n-------------------\n",
    classification_report(y_test, y_pred))

In [16]:
# Driver code
def driver():
      
    # Building Phase
    data = importdata()
    
    X, Y, X_train, X_test, y_train, y_test = splitdataset(data)
    
    clf_gini = train_using_gini(X_train, X_test, y_train)
    
    clf_entropy = train_using_entropy(X_train, X_test, y_train)
      
    # Operational Phase
    print("Results Using Gini Index:")
      
    # Prediction using gini
    y_pred_gini = prediction(X_test, clf_gini)
    cal_accuracy(y_test, y_pred_gini)
      
    print("Results Using Entropy:")
    # Prediction using entropy
    y_pred_entropy = prediction(X_test, clf_entropy)
    cal_accuracy(y_test, y_pred_entropy)
      
      
# Calling main function
if __name__=="__main__":
    driver()

Dataset Length:  625
Dataset Shape:  (625, 5)
top 5 data in Dataset:
     0  1  2  3  4
0  B  1  1  1  1
1  R  1  1  1  2
2  R  1  1  1  3
3  R  1  1  1  4
4  R  1  1  1  5
Results Using Gini Index:
Predicted values:
['L' 'L' 'L' 'L' 'R' 'L' 'L' 'L' 'L' 'L' 'L' 'R' 'L' 'R' 'L' 'L' 'R' 'R'
 'R' 'L' 'R' 'L' 'R' 'R' 'L' 'R' 'L' 'R' 'L' 'L' 'R' 'R' 'L' 'R' 'R' 'R'
 'R' 'R' 'R' 'L' 'R' 'R' 'L' 'L' 'L' 'R' 'R' 'R' 'L' 'R' 'L' 'L' 'R' 'R'
 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'L' 'R' 'L' 'L' 'L' 'L' 'R' 'L' 'L' 'R' 'R'
 'L' 'L' 'R' 'L' 'L' 'R' 'R' 'R' 'L' 'L' 'R' 'L' 'R' 'L' 'L' 'R' 'L' 'L'
 'R' 'L' 'R' 'R']
Confusion Matrix:

----------------
  [[ 0  1  2]
 [ 0 36 13]
 [ 0  9 33]]
Accuracy score: 
-----------------
 73.40425531914893
f1 score Report : 
-------------------


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


               precision    recall  f1-score   support

           B       0.00      0.00      0.00         3
           L       0.78      0.73      0.76        49
           R       0.69      0.79      0.73        42

    accuracy                           0.73        94
   macro avg       0.49      0.51      0.50        94
weighted avg       0.72      0.73      0.72        94

Results Using Entropy:
Predicted values:
['L' 'L' 'R' 'R' 'L' 'R' 'R' 'L' 'L' 'R' 'R' 'L' 'L' 'R' 'L' 'R' 'R' 'L'
 'R' 'L' 'L' 'L' 'L' 'L' 'L' 'R' 'L' 'L' 'R' 'R' 'R' 'R' 'L' 'R' 'R' 'R'
 'R' 'L' 'R' 'L' 'L' 'L' 'L' 'R' 'L' 'R' 'L' 'R' 'R' 'R' 'L' 'R' 'R' 'R'
 'R' 'R' 'R' 'L' 'R' 'R' 'R' 'R' 'R' 'L' 'R' 'R' 'R' 'R' 'L' 'L' 'R' 'L'
 'L' 'L' 'L' 'R' 'L' 'R' 'L' 'L' 'R' 'L' 'R' 'L' 'R' 'R' 'L' 'L' 'R' 'L'
 'R' 'L' 'L' 'R']
Confusion Matrix:

----------------
  [[ 0  2  1]
 [ 0 34 15]
 [ 0  8 34]]
Accuracy score: 
-----------------
 72.3404255319149


  _warn_prf(average, modifier, msg_start, len(result))


f1 score Report : 
-------------------
               precision    recall  f1-score   support

           B       0.00      0.00      0.00         3
           L       0.77      0.69      0.73        49
           R       0.68      0.81      0.74        42

    accuracy                           0.72        94
   macro avg       0.48      0.50      0.49        94
weighted avg       0.71      0.72      0.71        94



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
